npm - postal-mime - Versions diffs - 2.0.0 → 2.0.1 - Mend

postal-mime 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +7 -0
package/README.md +0 -2
package/package.json +1 -1
package/src/address-parser.js +313 -0
package/src/base64-decoder.js +50 -0
package/src/decode-strings.js +268 -0
package/src/html-entities.js +2236 -0
package/src/mime-node.js +271 -0
package/src/package.json +3 -0
package/src/pass-through-decoder.js +17 -0
package/src/postal-mime.js +395 -0
package/src/qp-decoder.js +96 -0
package/src/text-format.js +334 -0
package/.github/workflows/release.yaml +0 -37

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # Changelog
+## [2.0.1](https://github.com/postalsys/postal-mime/compare/v2.0.0...v2.0.1) (2023-11-05)
+### Bug Fixes
+* **npm:** DO not ignore src folder when publishing to npm ([ef8a2df](https://github.com/postalsys/postal-mime/commit/ef8a2df8d65be3dcfc52784c5c73c79f820c1c82))
 ## [2.0.0](https://github.com/postalsys/postal-mime/compare/v1.1.0...v2.0.0) (2023-11-03)

package/README.md CHANGED Viewed

@@ -4,8 +4,6 @@ Email parser for browser environments.
 PostalMime can be run in the main web thread or from Web Workers.
-PostalMime can be bundled using WebPack. In fact the distribution file is also built with WebPack.
 ## Source
 Source code is available from [Github](https://github.com/postalsys/postal-mime).

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "postal-mime",
-    "version": "2.0.0",
+    "version": "2.0.1",
     "description": "Email parser for browser environments",
     "main": "./src/postal-mime.js",
     "exports": {

package/src/address-parser.js ADDED Viewed

@@ -0,0 +1,313 @@
+import { decodeWords } from './decode-strings.js';
+/**
+ * Converts tokens for a single address into an address object
+ *
+ * @param {Array} tokens Tokens object
+ * @return {Object} Address object
+ */
+function _handleAddress(tokens) {
+    let token;
+    let isGroup = false;
+    let state = 'text';
+    let address;
+    let addresses = [];
+    let data = {
+        address: [],
+        comment: [],
+        group: [],
+        text: []
+    };
+    let i;
+    let len;
+    // Filter out <addresses>, (comments) and regular text
+    for (i = 0, len = tokens.length; i < len; i++) {
+        token = tokens[i];
+        if (token.type === 'operator') {
+            switch (token.value) {
+                case '<':
+                    state = 'address';
+                    break;
+                case '(':
+                    state = 'comment';
+                    break;
+                case ':':
+                    state = 'group';
+                    isGroup = true;
+                    break;
+                default:
+                    state = 'text';
+            }
+        } else if (token.value) {
+            if (state === 'address') {
+                // handle use case where unquoted name includes a "<"
+                // Apple Mail truncates everything between an unexpected < and an address
+                // and so will we
+                token.value = token.value.replace(/^[^<]*<\s*/, '');
+            }
+            data[state].push(token.value);
+        }
+    }
+    // If there is no text but a comment, replace the two
+    if (!data.text.length && data.comment.length) {
+        data.text = data.comment;
+        data.comment = [];
+    }
+    if (isGroup) {
+        // http://tools.ietf.org/html/rfc2822#appendix-A.1.3
+        data.text = data.text.join(' ');
+        addresses.push({
+            name: decodeWords(data.text || (address && address.name)),
+            group: data.group.length ? addressParser(data.group.join(',')) : []
+        });
+    } else {
+        // If no address was found, try to detect one from regular text
+        if (!data.address.length && data.text.length) {
+            for (i = data.text.length - 1; i >= 0; i--) {
+                if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
+                    data.address = data.text.splice(i, 1);
+                    break;
+                }
+            }
+            let _regexHandler = function (address) {
+                if (!data.address.length) {
+                    data.address = [address.trim()];
+                    return ' ';
+                } else {
+                    return address;
+                }
+            };
+            // still no address
+            if (!data.address.length) {
+                for (i = data.text.length - 1; i >= 0; i--) {
+                    // fixed the regex to parse email address correctly when email address has more than one @
+                    data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^\s]+\b\s*/, _regexHandler).trim();
+                    if (data.address.length) {
+                        break;
+                    }
+                }
+            }
+        }
+        // If there's still is no text but a comment exixts, replace the two
+        if (!data.text.length && data.comment.length) {
+            data.text = data.comment;
+            data.comment = [];
+        }
+        // Keep only the first address occurence, push others to regular text
+        if (data.address.length > 1) {
+            data.text = data.text.concat(data.address.splice(1));
+        }
+        // Join values with spaces
+        data.text = data.text.join(' ');
+        data.address = data.address.join(' ');
+        if (!data.address && isGroup) {
+            return [];
+        } else {
+            address = {
+                address: data.address || data.text || '',
+                name: decodeWords(data.text || data.address || '')
+            };
+            if (address.address === address.name) {
+                if ((address.address || '').match(/@/)) {
+                    address.name = '';
+                } else {
+                    address.address = '';
+                }
+            }
+            addresses.push(address);
+        }
+    }
+    return addresses;
+}
+/**
+ * Creates a Tokenizer object for tokenizing address field strings
+ *
+ * @constructor
+ * @param {String} str Address field string
+ */
+class Tokenizer {
+    constructor(str) {
+        this.str = (str || '').toString();
+        this.operatorCurrent = '';
+        this.operatorExpecting = '';
+        this.node = null;
+        this.escaped = false;
+        this.list = [];
+        /**
+         * Operator tokens and which tokens are expected to end the sequence
+         */
+        this.operators = {
+            '"': '"',
+            '(': ')',
+            '<': '>',
+            ',': '',
+            ':': ';',
+            // Semicolons are not a legal delimiter per the RFC2822 grammar other
+            // than for terminating a group, but they are also not valid for any
+            // other use in this context.  Given that some mail clients have
+            // historically allowed the semicolon as a delimiter equivalent to the
+            // comma in their UI, it makes sense to treat them the same as a comma
+            // when used outside of a group.
+            ';': ''
+        };
+    }
+    /**
+     * Tokenizes the original input string
+     *
+     * @return {Array} An array of operator|text tokens
+     */
+    tokenize() {
+        let chr,
+            list = [];
+        for (let i = 0, len = this.str.length; i < len; i++) {
+            chr = this.str.charAt(i);
+            this.checkChar(chr);
+        }
+        this.list.forEach(node => {
+            node.value = (node.value || '').toString().trim();
+            if (node.value) {
+                list.push(node);
+            }
+        });
+        return list;
+    }
+    /**
+     * Checks if a character is an operator or text and acts accordingly
+     *
+     * @param {String} chr Character from the address field
+     */
+    checkChar(chr) {
+        if (this.escaped) {
+            // ignore next condition blocks
+        } else if (chr === this.operatorExpecting) {
+            this.node = {
+                type: 'operator',
+                value: chr
+            };
+            this.list.push(this.node);
+            this.node = null;
+            this.operatorExpecting = '';
+            this.escaped = false;
+            return;
+        } else if (!this.operatorExpecting && chr in this.operators) {
+            this.node = {
+                type: 'operator',
+                value: chr
+            };
+            this.list.push(this.node);
+            this.node = null;
+            this.operatorExpecting = this.operators[chr];
+            this.escaped = false;
+            return;
+        } else if (['"', "'"].includes(this.operatorExpecting) && chr === '\\') {
+            this.escaped = true;
+            return;
+        }
+        if (!this.node) {
+            this.node = {
+                type: 'text',
+                value: ''
+            };
+            this.list.push(this.node);
+        }
+        if (chr === '\n') {
+            // Convert newlines to spaces. Carriage return is ignored as \r and \n usually
+            // go together anyway and there already is a WS for \n. Lone \r means something is fishy.
+            chr = ' ';
+        }
+        if (chr.charCodeAt(0) >= 0x21 || [' ', '\t'].includes(chr)) {
+            // skip command bytes
+            this.node.value += chr;
+        }
+        this.escaped = false;
+    }
+}
+/**
+ * Parses structured e-mail addresses from an address field
+ *
+ * Example:
+ *
+ *    'Name <address@domain>'
+ *
+ * will be converted to
+ *
+ *     [{name: 'Name', address: 'address@domain'}]
+ *
+ * @param {String} str Address field
+ * @return {Array} An array of address objects
+ */
+function addressParser(str, options) {
+    options = options || {};
+    let tokenizer = new Tokenizer(str);
+    let tokens = tokenizer.tokenize();
+    let addresses = [];
+    let address = [];
+    let parsedAddresses = [];
+    tokens.forEach(token => {
+        if (token.type === 'operator' && (token.value === ',' || token.value === ';')) {
+            if (address.length) {
+                addresses.push(address);
+            }
+            address = [];
+        } else {
+            address.push(token);
+        }
+    });
+    if (address.length) {
+        addresses.push(address);
+    }
+    addresses.forEach(address => {
+        address = _handleAddress(address);
+        if (address.length) {
+            parsedAddresses = parsedAddresses.concat(address);
+        }
+    });
+    if (options.flatten) {
+        let addresses = [];
+        let walkAddressList = list => {
+            list.forEach(address => {
+                if (address.group) {
+                    return walkAddressList(address.group);
+                } else {
+                    addresses.push(address);
+                }
+            });
+        };
+        walkAddressList(parsedAddresses);
+        return addresses;
+    }
+    return parsedAddresses;
+}
+// expose to the world
+export default addressParser;

package/src/base64-decoder.js ADDED Viewed

@@ -0,0 +1,50 @@
+import { decodeBase64, blobToArrayBuffer } from './decode-strings.js';
+export default class Base64Decoder {
+    constructor(opts) {
+        opts = opts || {};
+        this.decoder = opts.decoder || new TextDecoder();
+        this.maxChunkSize = 100 * 1024;
+        this.chunks = [];
+        this.remainder = '';
+    }
+    update(buffer) {
+        let str = this.decoder.decode(buffer);
+        if (/[^a-zA-Z0-9+\/]/.test(str)) {
+            str = str.replace(/[^a-zA-Z0-9+\/]+/g, '');
+        }
+        this.remainder += str;
+        if (this.remainder.length >= this.maxChunkSize) {
+            let allowedBytes = Math.floor(this.remainder.length / 4) * 4;
+            let base64Str;
+            if (allowedBytes === this.remainder.length) {
+                base64Str = this.remainder;
+                this.remainder = '';
+            } else {
+                base64Str = this.remainder.substr(0, allowedBytes);
+                this.remainder = this.remainder.substr(allowedBytes);
+            }
+            if (base64Str.length) {
+                this.chunks.push(decodeBase64(base64Str));
+            }
+        }
+    }
+    finalize() {
+        if (this.remainder && !/^=+$/.test(this.remainder)) {
+            this.chunks.push(decodeBase64(this.remainder));
+        }
+        return blobToArrayBuffer(new Blob(this.chunks, { type: 'application/octet-stream' }));
+    }
+}

package/src/decode-strings.js ADDED Viewed

@@ -0,0 +1,268 @@
+export const textEncoder = new TextEncoder();
+const decoders = new Map();
+const base64Chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
+// Use a lookup table to find the index.
+const base64Lookup = new Uint8Array(256);
+for (var i = 0; i < base64Chars.length; i++) {
+    base64Lookup[base64Chars.charCodeAt(i)] = i;
+}
+export function decodeBase64(base64) {
+    let bufferLength = Math.ceil(base64.length / 4) * 3;
+    const len = base64.length;
+    let p = 0;
+    if (base64.length % 4 === 3) {
+        bufferLength--;
+    } else if (base64.length % 4 === 2) {
+        bufferLength -= 2;
+    } else if (base64[base64.length - 1] === '=') {
+        bufferLength--;
+        if (base64[base64.length - 2] === '=') {
+            bufferLength--;
+        }
+    }
+    const arrayBuffer = new ArrayBuffer(bufferLength);
+    const bytes = new Uint8Array(arrayBuffer);
+    for (let i = 0; i < len; i += 4) {
+        let encoded1 = base64Lookup[base64.charCodeAt(i)];
+        let encoded2 = base64Lookup[base64.charCodeAt(i + 1)];
+        let encoded3 = base64Lookup[base64.charCodeAt(i + 2)];
+        let encoded4 = base64Lookup[base64.charCodeAt(i + 3)];
+        bytes[p++] = (encoded1 << 2) | (encoded2 >> 4);
+        bytes[p++] = ((encoded2 & 15) << 4) | (encoded3 >> 2);
+        bytes[p++] = ((encoded3 & 3) << 6) | (encoded4 & 63);
+    }
+    return arrayBuffer;
+}
+export function getDecoder(charset) {
+    charset = charset || 'utf8';
+    if (decoders.has(charset)) {
+        return decoders.get(charset);
+    }
+    let decoder;
+    try {
+        decoder = new TextDecoder(charset);
+    } catch (err) {
+        if (charset === 'utf8') {
+            // is this even possible?
+            throw err;
+        }
+        // use default
+        return getDecoder();
+    }
+    decoders.set(charset, decoder);
+    return decoder;
+}
+/**
+ * Converts a Blob into an ArrayBuffer
+ * @param {Blob} blob Blob to convert
+ * @returns {ArrayBuffer} Converted value
+ */
+export async function blobToArrayBuffer(blob) {
+    if ('arrayBuffer' in blob) {
+        return await blob.arrayBuffer();
+    }
+    const fr = new FileReader();
+    return new Promise((resolve, reject) => {
+        fr.onload = function (e) {
+            resolve(e.target.result);
+        };
+        fr.onerror = function (e) {
+            reject(fr.error);
+        };
+        fr.readAsArrayBuffer(blob);
+    });
+}
+export function getHex(c) {
+    if ((c >= 0x30 /* 0 */ && c <= 0x39) /* 9 */ || (c >= 0x61 /* a */ && c <= 0x66) /* f */ || (c >= 0x41 /* A */ && c <= 0x46) /* F */) {
+        return String.fromCharCode(c);
+    }
+    return false;
+}
+/**
+ * Decode a complete mime word encoded string
+ *
+ * @param {String} str Mime word encoded string
+ * @return {String} Decoded unicode string
+ */
+export function decodeWord(charset, encoding, str) {
+    // RFC2231 added language tag to the encoding
+    // see: https://tools.ietf.org/html/rfc2231#section-5
+    // this implementation silently ignores this tag
+    let splitPos = charset.indexOf('*');
+    if (splitPos >= 0) {
+        charset = charset.substr(0, splitPos);
+    }
+    encoding = encoding.toUpperCase();
+    let byteStr;
+    if (encoding === 'Q') {
+        str = str
+            // remove spaces between = and hex char, this might indicate invalidly applied line splitting
+            .replace(/=\s+([0-9a-fA-F])/g, '=$1')
+            // convert all underscores to spaces
+            .replace(/[_\s]/g, ' ');
+        let buf = textEncoder.encode(str);
+        let encodedBytes = [];
+        for (let i = 0, len = buf.length; i < len; i++) {
+            let c = buf[i];
+            if (i <= len - 2 && c === 0x3d /* = */) {
+                let c1 = getHex(buf[i + 1]);
+                let c2 = getHex(buf[i + 2]);
+                if (c1 && c2) {
+                    let c = parseInt(c1 + c2, 16);
+                    encodedBytes.push(c);
+                    i += 2;
+                    continue;
+                }
+            }
+            encodedBytes.push(c);
+        }
+        byteStr = new ArrayBuffer(encodedBytes.length);
+        let dataView = new DataView(byteStr);
+        for (let i = 0, len = encodedBytes.length; i < len; i++) {
+            dataView.setUint8(i, encodedBytes[i]);
+        }
+    } else if (encoding === 'B') {
+        byteStr = decodeBase64(str.replace(/[^a-zA-Z0-9\+\/=]+/g, ''));
+    } else {
+        // keep as is, convert ArrayBuffer to unicode string, assume utf8
+        byteStr = textEncoder.encode(str);
+    }
+    return getDecoder(charset).decode(byteStr);
+}
+export function decodeWords(str) {
+    return (
+        (str || '')
+            .toString()
+            // find base64 words that can be joined
+            .replace(/(=\?([^?]+)\?[Bb]\?[^?]*\?=)\s*(?==\?([^?]+)\?[Bb]\?[^?]*\?=)/g, (match, left, chLeft, chRight) => {
+                // only mark b64 chunks to be joined if charsets match
+                if (chLeft === chRight) {
+                    // set a joiner marker
+                    return left + '__\x00JOIN\x00__';
+                }
+                return match;
+            })
+            // find QP words that can be joined
+            .replace(/(=\?([^?]+)\?[Qq]\?[^?]*\?=)\s*(?==\?([^?]+)\?[Qq]\?[^?]*\?=)/g, (match, left, chLeft, chRight) => {
+                // only mark QP chunks to be joined if charsets match
+                if (chLeft === chRight) {
+                    // set a joiner marker
+                    return left + '__\x00JOIN\x00__';
+                }
+                return match;
+            })
+            // join base64 encoded words
+            .replace(/(\?=)?__\x00JOIN\x00__(=\?([^?]+)\?[QqBb]\?)?/g, '')
+            // remove spaces between mime encoded words
+            .replace(/(=\?[^?]+\?[QqBb]\?[^?]*\?=)\s+(?==\?[^?]+\?[QqBb]\?[^?]*\?=)/g, '$1')
+            // decode words
+            .replace(/=\?([\w_\-*]+)\?([QqBb])\?([^?]*)\?=/g, (m, charset, encoding, text) => decodeWord(charset, encoding, text))
+    );
+}
+export function decodeURIComponentWithCharset(encodedStr, charset) {
+    charset = charset || 'utf-8';
+    let encodedBytes = [];
+    for (let i = 0; i < encodedStr.length; i++) {
+        let c = encodedStr.charAt(i);
+        if (c === '%' && /^[a-f0-9]{2}/i.test(encodedStr.substr(i + 1, 2))) {
+            // encoded sequence
+            let byte = encodedStr.substr(i + 1, 2);
+            i += 2;
+            encodedBytes.push(parseInt(byte, 16));
+        } else if (c.charCodeAt(0) > 126) {
+            c = textEncoder.encode(c);
+            for (let j = 0; j < c.length; j++) {
+                encodedBytes.push(c[j]);
+            }
+        } else {
+            // "normal" char
+            encodedBytes.push(c.charCodeAt(0));
+        }
+    }
+    const byteStr = new ArrayBuffer(encodedBytes.length);
+    const dataView = new DataView(byteStr);
+    for (let i = 0, len = encodedBytes.length; i < len; i++) {
+        dataView.setUint8(i, encodedBytes[i]);
+    }
+    return getDecoder(charset).decode(byteStr);
+}
+export function decodeParameterValueContinuations(header) {
+    // handle parameter value continuations
+    // https://tools.ietf.org/html/rfc2231#section-3
+    // preprocess values
+    let paramKeys = new Map();
+    Object.keys(header.params).forEach(key => {
+        let match = key.match(/\*((\d+)\*?)?$/);
+        if (!match) {
+            // nothing to do here, does not seem like a continuation param
+            return;
+        }
+        let actualKey = key.substr(0, match.index).toLowerCase();
+        let nr = Number(match[2]) || 0;
+        let paramVal;
+        if (!paramKeys.has(actualKey)) {
+            paramVal = {
+                charset: false,
+                values: []
+            };
+            paramKeys.set(actualKey, paramVal);
+        } else {
+            paramVal = paramKeys.get(actualKey);
+        }
+        let value = header.params[key];
+        if (nr === 0 && match[0].charAt(match[0].length - 1) === '*' && (match = value.match(/^([^']*)'[^']*'(.*)$/))) {
+            paramVal.charset = match[1] || 'utf-8';
+            value = match[2];
+        }
+        paramVal.values.push({ nr, value });
+        // remove the old reference
+        delete header.params[key];
+    });
+    paramKeys.forEach((paramVal, key) => {
+        header.params[key] = decodeURIComponentWithCharset(
+            paramVal.values
+                .sort((a, b) => a.nr - b.nr)
+                .map(a => a.value)
+                .join(''),
+            paramVal.charset
+        );
+    });
+}