postal-mime 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [2.0.1](https://github.com/postalsys/postal-mime/compare/v2.0.0...v2.0.1) (2023-11-05)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * **npm:** DO not ignore src folder when publishing to npm ([ef8a2df](https://github.com/postalsys/postal-mime/commit/ef8a2df8d65be3dcfc52784c5c73c79f820c1c82))
9
+
3
10
  ## [2.0.0](https://github.com/postalsys/postal-mime/compare/v1.1.0...v2.0.0) (2023-11-03)
4
11
 
5
12
 
package/README.md CHANGED
@@ -4,8 +4,6 @@ Email parser for browser environments.
4
4
 
5
5
  PostalMime can be run in the main web thread or from Web Workers.
6
6
 
7
- PostalMime can be bundled using WebPack. In fact the distribution file is also built with WebPack.
8
-
9
7
  ## Source
10
8
 
11
9
  Source code is available from [Github](https://github.com/postalsys/postal-mime).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "postal-mime",
3
- "version": "2.0.0",
3
+ "version": "2.0.1",
4
4
  "description": "Email parser for browser environments",
5
5
  "main": "./src/postal-mime.js",
6
6
  "exports": {
@@ -0,0 +1,313 @@
1
+ import { decodeWords } from './decode-strings.js';
2
+
3
+ /**
4
+ * Converts tokens for a single address into an address object
5
+ *
6
+ * @param {Array} tokens Tokens object
7
+ * @return {Object} Address object
8
+ */
9
+ function _handleAddress(tokens) {
10
+ let token;
11
+ let isGroup = false;
12
+ let state = 'text';
13
+ let address;
14
+ let addresses = [];
15
+ let data = {
16
+ address: [],
17
+ comment: [],
18
+ group: [],
19
+ text: []
20
+ };
21
+ let i;
22
+ let len;
23
+
24
+ // Filter out <addresses>, (comments) and regular text
25
+ for (i = 0, len = tokens.length; i < len; i++) {
26
+ token = tokens[i];
27
+ if (token.type === 'operator') {
28
+ switch (token.value) {
29
+ case '<':
30
+ state = 'address';
31
+ break;
32
+ case '(':
33
+ state = 'comment';
34
+ break;
35
+ case ':':
36
+ state = 'group';
37
+ isGroup = true;
38
+ break;
39
+ default:
40
+ state = 'text';
41
+ }
42
+ } else if (token.value) {
43
+ if (state === 'address') {
44
+ // handle use case where unquoted name includes a "<"
45
+ // Apple Mail truncates everything between an unexpected < and an address
46
+ // and so will we
47
+ token.value = token.value.replace(/^[^<]*<\s*/, '');
48
+ }
49
+ data[state].push(token.value);
50
+ }
51
+ }
52
+
53
+ // If there is no text but a comment, replace the two
54
+ if (!data.text.length && data.comment.length) {
55
+ data.text = data.comment;
56
+ data.comment = [];
57
+ }
58
+
59
+ if (isGroup) {
60
+ // http://tools.ietf.org/html/rfc2822#appendix-A.1.3
61
+ data.text = data.text.join(' ');
62
+ addresses.push({
63
+ name: decodeWords(data.text || (address && address.name)),
64
+ group: data.group.length ? addressParser(data.group.join(',')) : []
65
+ });
66
+ } else {
67
+ // If no address was found, try to detect one from regular text
68
+ if (!data.address.length && data.text.length) {
69
+ for (i = data.text.length - 1; i >= 0; i--) {
70
+ if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
71
+ data.address = data.text.splice(i, 1);
72
+ break;
73
+ }
74
+ }
75
+
76
+ let _regexHandler = function (address) {
77
+ if (!data.address.length) {
78
+ data.address = [address.trim()];
79
+ return ' ';
80
+ } else {
81
+ return address;
82
+ }
83
+ };
84
+
85
+ // still no address
86
+ if (!data.address.length) {
87
+ for (i = data.text.length - 1; i >= 0; i--) {
88
+ // fixed the regex to parse email address correctly when email address has more than one @
89
+ data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^\s]+\b\s*/, _regexHandler).trim();
90
+ if (data.address.length) {
91
+ break;
92
+ }
93
+ }
94
+ }
95
+ }
96
+
97
+ // If there's still is no text but a comment exixts, replace the two
98
+ if (!data.text.length && data.comment.length) {
99
+ data.text = data.comment;
100
+ data.comment = [];
101
+ }
102
+
103
+ // Keep only the first address occurence, push others to regular text
104
+ if (data.address.length > 1) {
105
+ data.text = data.text.concat(data.address.splice(1));
106
+ }
107
+
108
+ // Join values with spaces
109
+ data.text = data.text.join(' ');
110
+ data.address = data.address.join(' ');
111
+
112
+ if (!data.address && isGroup) {
113
+ return [];
114
+ } else {
115
+ address = {
116
+ address: data.address || data.text || '',
117
+ name: decodeWords(data.text || data.address || '')
118
+ };
119
+
120
+ if (address.address === address.name) {
121
+ if ((address.address || '').match(/@/)) {
122
+ address.name = '';
123
+ } else {
124
+ address.address = '';
125
+ }
126
+ }
127
+
128
+ addresses.push(address);
129
+ }
130
+ }
131
+
132
+ return addresses;
133
+ }
134
+
135
+ /**
136
+ * Creates a Tokenizer object for tokenizing address field strings
137
+ *
138
+ * @constructor
139
+ * @param {String} str Address field string
140
+ */
141
+ class Tokenizer {
142
+ constructor(str) {
143
+ this.str = (str || '').toString();
144
+ this.operatorCurrent = '';
145
+ this.operatorExpecting = '';
146
+ this.node = null;
147
+ this.escaped = false;
148
+
149
+ this.list = [];
150
+ /**
151
+ * Operator tokens and which tokens are expected to end the sequence
152
+ */
153
+ this.operators = {
154
+ '"': '"',
155
+ '(': ')',
156
+ '<': '>',
157
+ ',': '',
158
+ ':': ';',
159
+ // Semicolons are not a legal delimiter per the RFC2822 grammar other
160
+ // than for terminating a group, but they are also not valid for any
161
+ // other use in this context. Given that some mail clients have
162
+ // historically allowed the semicolon as a delimiter equivalent to the
163
+ // comma in their UI, it makes sense to treat them the same as a comma
164
+ // when used outside of a group.
165
+ ';': ''
166
+ };
167
+ }
168
+
169
+ /**
170
+ * Tokenizes the original input string
171
+ *
172
+ * @return {Array} An array of operator|text tokens
173
+ */
174
+ tokenize() {
175
+ let chr,
176
+ list = [];
177
+ for (let i = 0, len = this.str.length; i < len; i++) {
178
+ chr = this.str.charAt(i);
179
+ this.checkChar(chr);
180
+ }
181
+
182
+ this.list.forEach(node => {
183
+ node.value = (node.value || '').toString().trim();
184
+ if (node.value) {
185
+ list.push(node);
186
+ }
187
+ });
188
+
189
+ return list;
190
+ }
191
+
192
+ /**
193
+ * Checks if a character is an operator or text and acts accordingly
194
+ *
195
+ * @param {String} chr Character from the address field
196
+ */
197
+ checkChar(chr) {
198
+ if (this.escaped) {
199
+ // ignore next condition blocks
200
+ } else if (chr === this.operatorExpecting) {
201
+ this.node = {
202
+ type: 'operator',
203
+ value: chr
204
+ };
205
+ this.list.push(this.node);
206
+ this.node = null;
207
+ this.operatorExpecting = '';
208
+ this.escaped = false;
209
+ return;
210
+ } else if (!this.operatorExpecting && chr in this.operators) {
211
+ this.node = {
212
+ type: 'operator',
213
+ value: chr
214
+ };
215
+ this.list.push(this.node);
216
+ this.node = null;
217
+ this.operatorExpecting = this.operators[chr];
218
+ this.escaped = false;
219
+ return;
220
+ } else if (['"', "'"].includes(this.operatorExpecting) && chr === '\\') {
221
+ this.escaped = true;
222
+ return;
223
+ }
224
+
225
+ if (!this.node) {
226
+ this.node = {
227
+ type: 'text',
228
+ value: ''
229
+ };
230
+ this.list.push(this.node);
231
+ }
232
+
233
+ if (chr === '\n') {
234
+ // Convert newlines to spaces. Carriage return is ignored as \r and \n usually
235
+ // go together anyway and there already is a WS for \n. Lone \r means something is fishy.
236
+ chr = ' ';
237
+ }
238
+
239
+ if (chr.charCodeAt(0) >= 0x21 || [' ', '\t'].includes(chr)) {
240
+ // skip command bytes
241
+ this.node.value += chr;
242
+ }
243
+
244
+ this.escaped = false;
245
+ }
246
+ }
247
+
248
+ /**
249
+ * Parses structured e-mail addresses from an address field
250
+ *
251
+ * Example:
252
+ *
253
+ * 'Name <address@domain>'
254
+ *
255
+ * will be converted to
256
+ *
257
+ * [{name: 'Name', address: 'address@domain'}]
258
+ *
259
+ * @param {String} str Address field
260
+ * @return {Array} An array of address objects
261
+ */
262
+ function addressParser(str, options) {
263
+ options = options || {};
264
+
265
+ let tokenizer = new Tokenizer(str);
266
+ let tokens = tokenizer.tokenize();
267
+
268
+ let addresses = [];
269
+ let address = [];
270
+ let parsedAddresses = [];
271
+
272
+ tokens.forEach(token => {
273
+ if (token.type === 'operator' && (token.value === ',' || token.value === ';')) {
274
+ if (address.length) {
275
+ addresses.push(address);
276
+ }
277
+ address = [];
278
+ } else {
279
+ address.push(token);
280
+ }
281
+ });
282
+
283
+ if (address.length) {
284
+ addresses.push(address);
285
+ }
286
+
287
+ addresses.forEach(address => {
288
+ address = _handleAddress(address);
289
+ if (address.length) {
290
+ parsedAddresses = parsedAddresses.concat(address);
291
+ }
292
+ });
293
+
294
+ if (options.flatten) {
295
+ let addresses = [];
296
+ let walkAddressList = list => {
297
+ list.forEach(address => {
298
+ if (address.group) {
299
+ return walkAddressList(address.group);
300
+ } else {
301
+ addresses.push(address);
302
+ }
303
+ });
304
+ };
305
+ walkAddressList(parsedAddresses);
306
+ return addresses;
307
+ }
308
+
309
+ return parsedAddresses;
310
+ }
311
+
312
+ // expose to the world
313
+ export default addressParser;
@@ -0,0 +1,50 @@
1
+ import { decodeBase64, blobToArrayBuffer } from './decode-strings.js';
2
+
3
+ export default class Base64Decoder {
4
+ constructor(opts) {
5
+ opts = opts || {};
6
+
7
+ this.decoder = opts.decoder || new TextDecoder();
8
+
9
+ this.maxChunkSize = 100 * 1024;
10
+
11
+ this.chunks = [];
12
+
13
+ this.remainder = '';
14
+ }
15
+
16
+ update(buffer) {
17
+ let str = this.decoder.decode(buffer);
18
+
19
+ if (/[^a-zA-Z0-9+\/]/.test(str)) {
20
+ str = str.replace(/[^a-zA-Z0-9+\/]+/g, '');
21
+ }
22
+
23
+ this.remainder += str;
24
+
25
+ if (this.remainder.length >= this.maxChunkSize) {
26
+ let allowedBytes = Math.floor(this.remainder.length / 4) * 4;
27
+ let base64Str;
28
+
29
+ if (allowedBytes === this.remainder.length) {
30
+ base64Str = this.remainder;
31
+ this.remainder = '';
32
+ } else {
33
+ base64Str = this.remainder.substr(0, allowedBytes);
34
+ this.remainder = this.remainder.substr(allowedBytes);
35
+ }
36
+
37
+ if (base64Str.length) {
38
+ this.chunks.push(decodeBase64(base64Str));
39
+ }
40
+ }
41
+ }
42
+
43
+ finalize() {
44
+ if (this.remainder && !/^=+$/.test(this.remainder)) {
45
+ this.chunks.push(decodeBase64(this.remainder));
46
+ }
47
+
48
+ return blobToArrayBuffer(new Blob(this.chunks, { type: 'application/octet-stream' }));
49
+ }
50
+ }
@@ -0,0 +1,268 @@
1
+ export const textEncoder = new TextEncoder();
2
+
3
+ const decoders = new Map();
4
+
5
+ const base64Chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
6
+
7
+ // Use a lookup table to find the index.
8
+ const base64Lookup = new Uint8Array(256);
9
+ for (var i = 0; i < base64Chars.length; i++) {
10
+ base64Lookup[base64Chars.charCodeAt(i)] = i;
11
+ }
12
+
13
+ export function decodeBase64(base64) {
14
+ let bufferLength = Math.ceil(base64.length / 4) * 3;
15
+ const len = base64.length;
16
+
17
+ let p = 0;
18
+
19
+ if (base64.length % 4 === 3) {
20
+ bufferLength--;
21
+ } else if (base64.length % 4 === 2) {
22
+ bufferLength -= 2;
23
+ } else if (base64[base64.length - 1] === '=') {
24
+ bufferLength--;
25
+ if (base64[base64.length - 2] === '=') {
26
+ bufferLength--;
27
+ }
28
+ }
29
+
30
+ const arrayBuffer = new ArrayBuffer(bufferLength);
31
+ const bytes = new Uint8Array(arrayBuffer);
32
+
33
+ for (let i = 0; i < len; i += 4) {
34
+ let encoded1 = base64Lookup[base64.charCodeAt(i)];
35
+ let encoded2 = base64Lookup[base64.charCodeAt(i + 1)];
36
+ let encoded3 = base64Lookup[base64.charCodeAt(i + 2)];
37
+ let encoded4 = base64Lookup[base64.charCodeAt(i + 3)];
38
+
39
+ bytes[p++] = (encoded1 << 2) | (encoded2 >> 4);
40
+ bytes[p++] = ((encoded2 & 15) << 4) | (encoded3 >> 2);
41
+ bytes[p++] = ((encoded3 & 3) << 6) | (encoded4 & 63);
42
+ }
43
+
44
+ return arrayBuffer;
45
+ }
46
+
47
+ export function getDecoder(charset) {
48
+ charset = charset || 'utf8';
49
+ if (decoders.has(charset)) {
50
+ return decoders.get(charset);
51
+ }
52
+ let decoder;
53
+ try {
54
+ decoder = new TextDecoder(charset);
55
+ } catch (err) {
56
+ if (charset === 'utf8') {
57
+ // is this even possible?
58
+ throw err;
59
+ }
60
+ // use default
61
+ return getDecoder();
62
+ }
63
+
64
+ decoders.set(charset, decoder);
65
+ return decoder;
66
+ }
67
+
68
+ /**
69
+ * Converts a Blob into an ArrayBuffer
70
+ * @param {Blob} blob Blob to convert
71
+ * @returns {ArrayBuffer} Converted value
72
+ */
73
+ export async function blobToArrayBuffer(blob) {
74
+ if ('arrayBuffer' in blob) {
75
+ return await blob.arrayBuffer();
76
+ }
77
+
78
+ const fr = new FileReader();
79
+
80
+ return new Promise((resolve, reject) => {
81
+ fr.onload = function (e) {
82
+ resolve(e.target.result);
83
+ };
84
+
85
+ fr.onerror = function (e) {
86
+ reject(fr.error);
87
+ };
88
+
89
+ fr.readAsArrayBuffer(blob);
90
+ });
91
+ }
92
+
93
+ export function getHex(c) {
94
+ if ((c >= 0x30 /* 0 */ && c <= 0x39) /* 9 */ || (c >= 0x61 /* a */ && c <= 0x66) /* f */ || (c >= 0x41 /* A */ && c <= 0x46) /* F */) {
95
+ return String.fromCharCode(c);
96
+ }
97
+ return false;
98
+ }
99
+
100
+ /**
101
+ * Decode a complete mime word encoded string
102
+ *
103
+ * @param {String} str Mime word encoded string
104
+ * @return {String} Decoded unicode string
105
+ */
106
+ export function decodeWord(charset, encoding, str) {
107
+ // RFC2231 added language tag to the encoding
108
+ // see: https://tools.ietf.org/html/rfc2231#section-5
109
+ // this implementation silently ignores this tag
110
+ let splitPos = charset.indexOf('*');
111
+ if (splitPos >= 0) {
112
+ charset = charset.substr(0, splitPos);
113
+ }
114
+
115
+ encoding = encoding.toUpperCase();
116
+
117
+ let byteStr;
118
+
119
+ if (encoding === 'Q') {
120
+ str = str
121
+ // remove spaces between = and hex char, this might indicate invalidly applied line splitting
122
+ .replace(/=\s+([0-9a-fA-F])/g, '=$1')
123
+ // convert all underscores to spaces
124
+ .replace(/[_\s]/g, ' ');
125
+
126
+ let buf = textEncoder.encode(str);
127
+ let encodedBytes = [];
128
+ for (let i = 0, len = buf.length; i < len; i++) {
129
+ let c = buf[i];
130
+ if (i <= len - 2 && c === 0x3d /* = */) {
131
+ let c1 = getHex(buf[i + 1]);
132
+ let c2 = getHex(buf[i + 2]);
133
+ if (c1 && c2) {
134
+ let c = parseInt(c1 + c2, 16);
135
+ encodedBytes.push(c);
136
+ i += 2;
137
+ continue;
138
+ }
139
+ }
140
+ encodedBytes.push(c);
141
+ }
142
+ byteStr = new ArrayBuffer(encodedBytes.length);
143
+ let dataView = new DataView(byteStr);
144
+ for (let i = 0, len = encodedBytes.length; i < len; i++) {
145
+ dataView.setUint8(i, encodedBytes[i]);
146
+ }
147
+ } else if (encoding === 'B') {
148
+ byteStr = decodeBase64(str.replace(/[^a-zA-Z0-9\+\/=]+/g, ''));
149
+ } else {
150
+ // keep as is, convert ArrayBuffer to unicode string, assume utf8
151
+ byteStr = textEncoder.encode(str);
152
+ }
153
+
154
+ return getDecoder(charset).decode(byteStr);
155
+ }
156
+
157
+ export function decodeWords(str) {
158
+ return (
159
+ (str || '')
160
+ .toString()
161
+ // find base64 words that can be joined
162
+ .replace(/(=\?([^?]+)\?[Bb]\?[^?]*\?=)\s*(?==\?([^?]+)\?[Bb]\?[^?]*\?=)/g, (match, left, chLeft, chRight) => {
163
+ // only mark b64 chunks to be joined if charsets match
164
+ if (chLeft === chRight) {
165
+ // set a joiner marker
166
+ return left + '__\x00JOIN\x00__';
167
+ }
168
+ return match;
169
+ })
170
+ // find QP words that can be joined
171
+ .replace(/(=\?([^?]+)\?[Qq]\?[^?]*\?=)\s*(?==\?([^?]+)\?[Qq]\?[^?]*\?=)/g, (match, left, chLeft, chRight) => {
172
+ // only mark QP chunks to be joined if charsets match
173
+ if (chLeft === chRight) {
174
+ // set a joiner marker
175
+ return left + '__\x00JOIN\x00__';
176
+ }
177
+ return match;
178
+ })
179
+ // join base64 encoded words
180
+ .replace(/(\?=)?__\x00JOIN\x00__(=\?([^?]+)\?[QqBb]\?)?/g, '')
181
+ // remove spaces between mime encoded words
182
+ .replace(/(=\?[^?]+\?[QqBb]\?[^?]*\?=)\s+(?==\?[^?]+\?[QqBb]\?[^?]*\?=)/g, '$1')
183
+ // decode words
184
+ .replace(/=\?([\w_\-*]+)\?([QqBb])\?([^?]*)\?=/g, (m, charset, encoding, text) => decodeWord(charset, encoding, text))
185
+ );
186
+ }
187
+
188
+ export function decodeURIComponentWithCharset(encodedStr, charset) {
189
+ charset = charset || 'utf-8';
190
+
191
+ let encodedBytes = [];
192
+ for (let i = 0; i < encodedStr.length; i++) {
193
+ let c = encodedStr.charAt(i);
194
+ if (c === '%' && /^[a-f0-9]{2}/i.test(encodedStr.substr(i + 1, 2))) {
195
+ // encoded sequence
196
+ let byte = encodedStr.substr(i + 1, 2);
197
+ i += 2;
198
+ encodedBytes.push(parseInt(byte, 16));
199
+ } else if (c.charCodeAt(0) > 126) {
200
+ c = textEncoder.encode(c);
201
+ for (let j = 0; j < c.length; j++) {
202
+ encodedBytes.push(c[j]);
203
+ }
204
+ } else {
205
+ // "normal" char
206
+ encodedBytes.push(c.charCodeAt(0));
207
+ }
208
+ }
209
+
210
+ const byteStr = new ArrayBuffer(encodedBytes.length);
211
+ const dataView = new DataView(byteStr);
212
+ for (let i = 0, len = encodedBytes.length; i < len; i++) {
213
+ dataView.setUint8(i, encodedBytes[i]);
214
+ }
215
+
216
+ return getDecoder(charset).decode(byteStr);
217
+ }
218
+
219
+ export function decodeParameterValueContinuations(header) {
220
+ // handle parameter value continuations
221
+ // https://tools.ietf.org/html/rfc2231#section-3
222
+
223
+ // preprocess values
224
+ let paramKeys = new Map();
225
+
226
+ Object.keys(header.params).forEach(key => {
227
+ let match = key.match(/\*((\d+)\*?)?$/);
228
+ if (!match) {
229
+ // nothing to do here, does not seem like a continuation param
230
+ return;
231
+ }
232
+
233
+ let actualKey = key.substr(0, match.index).toLowerCase();
234
+ let nr = Number(match[2]) || 0;
235
+
236
+ let paramVal;
237
+ if (!paramKeys.has(actualKey)) {
238
+ paramVal = {
239
+ charset: false,
240
+ values: []
241
+ };
242
+ paramKeys.set(actualKey, paramVal);
243
+ } else {
244
+ paramVal = paramKeys.get(actualKey);
245
+ }
246
+
247
+ let value = header.params[key];
248
+ if (nr === 0 && match[0].charAt(match[0].length - 1) === '*' && (match = value.match(/^([^']*)'[^']*'(.*)$/))) {
249
+ paramVal.charset = match[1] || 'utf-8';
250
+ value = match[2];
251
+ }
252
+
253
+ paramVal.values.push({ nr, value });
254
+
255
+ // remove the old reference
256
+ delete header.params[key];
257
+ });
258
+
259
+ paramKeys.forEach((paramVal, key) => {
260
+ header.params[key] = decodeURIComponentWithCharset(
261
+ paramVal.values
262
+ .sort((a, b) => a.nr - b.nr)
263
+ .map(a => a.value)
264
+ .join(''),
265
+ paramVal.charset
266
+ );
267
+ });
268
+ }