@ssttevee/streamsearch 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +57 -2
  2. package/lib/index.cjs +29 -265
  3. package/lib/index.cjs.map +1 -1
  4. package/lib/index.d.ts +13 -9
  5. package/lib/index.mjs +21 -256
  6. package/lib/index.mjs.map +1 -1
  7. package/lib/iterate-chunks-concatted.cjs +15 -0
  8. package/lib/iterate-chunks-concatted.cjs.map +1 -0
  9. package/lib/iterate-chunks-concatted.d.ts +2 -0
  10. package/lib/iterate-chunks-concatted.mjs +13 -0
  11. package/lib/iterate-chunks-concatted.mjs.map +1 -0
  12. package/lib/iterate-chunks.cjs +22 -0
  13. package/lib/iterate-chunks.cjs.map +1 -0
  14. package/lib/iterate-chunks.d.ts +2 -0
  15. package/lib/iterate-chunks.mjs +20 -0
  16. package/lib/iterate-chunks.mjs.map +1 -0
  17. package/lib/iterate-strings.cjs +16 -0
  18. package/lib/iterate-strings.cjs.map +1 -0
  19. package/lib/iterate-strings.d.ts +2 -0
  20. package/lib/iterate-strings.mjs +14 -0
  21. package/lib/iterate-strings.mjs.map +1 -0
  22. package/lib/iterator.cjs +42 -0
  23. package/lib/iterator.cjs.map +1 -0
  24. package/lib/iterator.d.ts +7 -0
  25. package/lib/iterator.mjs +40 -0
  26. package/lib/iterator.mjs.map +1 -0
  27. package/lib/queueable.cjs +55 -0
  28. package/lib/queueable.cjs.map +1 -0
  29. package/lib/queueable.d.ts +11 -11
  30. package/lib/queueable.mjs +53 -0
  31. package/lib/queueable.mjs.map +1 -0
  32. package/lib/readable.cjs +36 -0
  33. package/lib/readable.cjs.map +1 -0
  34. package/lib/readable.d.ts +7 -7
  35. package/lib/readable.mjs +34 -0
  36. package/lib/readable.mjs.map +1 -0
  37. package/lib/search-BXQkEs3p.js +97 -0
  38. package/lib/search-BXQkEs3p.js.map +1 -0
  39. package/lib/search-D_ihawrM.js +108 -0
  40. package/lib/search-D_ihawrM.js.map +1 -0
  41. package/lib/{index.js → search.cjs} +179 -263
  42. package/lib/search.cjs.map +1 -0
  43. package/lib/search.d.ts +13 -14
  44. package/lib/search.mjs +185 -0
  45. package/lib/search.mjs.map +1 -0
  46. package/lib/split.cjs +26 -0
  47. package/lib/split.cjs.map +1 -0
  48. package/lib/split.d.ts +1 -0
  49. package/lib/split.mjs +24 -0
  50. package/lib/split.mjs.map +1 -0
  51. package/package.json +85 -37
  52. package/lib/index.js.map +0 -1
@@ -1,272 +1,188 @@
1
1
  'use strict';
2
2
 
3
- Object.defineProperty(exports, '__esModule', { value: true });
3
+ var concat = require('uint8arrays/concat');
4
+ var fromString = require('uint8arrays/from-string');
4
5
 
5
- var u8Utils = require('@ssttevee/u8-utils');
6
-
7
- /*
8
- Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation
9
- by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
10
- */
11
- function coerce(a) {
12
- if (a instanceof Uint8Array) {
13
- return (index) => a[index];
14
- }
15
- return a;
16
- }
17
- function jsmemcmp(buf1, pos1, buf2, pos2, len) {
18
- const fn1 = coerce(buf1);
19
- const fn2 = coerce(buf2);
20
- for (var i = 0; i < len; ++i) {
21
- if (fn1(pos1 + i) !== fn2(pos2 + i)) {
22
- return false;
23
- }
24
- }
25
- return true;
26
- }
27
- function createOccurenceTable(s) {
28
- // Populate occurrence table with analysis of the needle,
29
- // ignoring last letter.
30
- const table = new Array(256).fill(s.length);
31
- if (s.length > 1) {
32
- for (let i = 0; i < s.length - 1; i++) {
33
- table[s[i]] = s.length - 1 - i;
34
- }
35
- }
36
- return table;
37
- }
38
- const MATCH = Symbol('Match');
39
- class StreamSearch {
40
- constructor(needle) {
41
- this._lookbehind = new Uint8Array();
42
- if (typeof needle === 'string') {
43
- this._needle = needle = u8Utils.stringToArray(needle);
44
- }
45
- else {
46
- this._needle = needle;
47
- }
48
- this._lastChar = needle[needle.length - 1];
49
- this._occ = createOccurenceTable(needle);
50
- }
51
- feed(chunk) {
52
- let pos = 0;
53
- let tokens;
54
- const allTokens = [];
55
- while (pos !== chunk.length) {
56
- [pos, ...tokens] = this._feed(chunk, pos);
57
- allTokens.push(...tokens);
58
- }
59
- return allTokens;
60
- }
61
- end() {
62
- const tail = this._lookbehind;
63
- this._lookbehind = new Uint8Array();
64
- return tail;
65
- }
66
- _feed(data, buf_pos) {
67
- const tokens = [];
68
- // Positive: points to a position in `data`
69
- // pos == 3 points to data[3]
70
- // Negative: points to a position in the lookbehind buffer
71
- // pos == -2 points to lookbehind[lookbehind_size - 2]
72
- let pos = -this._lookbehind.length;
73
- if (pos < 0) {
74
- // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
75
- // search with character lookup code that considers both the
76
- // lookbehind buffer and the current round's haystack data.
77
- //
78
- // Loop until (condition 1)
79
- // there is a match.
80
- // or until
81
- // we've moved past the position that requires the
82
- // lookbehind buffer. In this case we switch to the
83
- // optimized loop.
84
- // or until (condition 3)
85
- // the character to look at lies outside the haystack.
86
- while (pos < 0 && pos <= data.length - this._needle.length) {
87
- const ch = this._charAt(data, pos + this._needle.length - 1);
88
- if (ch === this._lastChar && this._memcmp(data, pos, this._needle.length - 1)) {
89
- if (pos > -this._lookbehind.length) {
90
- tokens.push(this._lookbehind.slice(0, this._lookbehind.length + pos));
91
- }
92
- tokens.push(MATCH);
93
- this._lookbehind = new Uint8Array();
94
- return [pos + this._needle.length, ...tokens];
95
- }
96
- else {
97
- pos += this._occ[ch];
98
- }
99
- }
100
- // No match.
101
- if (pos < 0) {
102
- // There's too little data for Boyer-Moore-Horspool to run,
103
- // so we'll use a different algorithm to skip as much as
104
- // we can.
105
- // Forward pos until
106
- // the trailing part of lookbehind + data
107
- // looks like the beginning of the needle
108
- // or until
109
- // pos == 0
110
- while (pos < 0 && !this._memcmp(data, pos, data.length - pos)) {
111
- pos++;
112
- }
113
- }
114
- if (pos >= 0) {
115
- // Discard lookbehind buffer.
116
- tokens.push(this._lookbehind);
117
- this._lookbehind = new Uint8Array();
118
- }
119
- else {
120
- // Cut off part of the lookbehind buffer that has
121
- // been processed and append the entire haystack
122
- // into it.
123
- const bytesToCutOff = this._lookbehind.length + pos;
124
- if (bytesToCutOff > 0) {
125
- // The cut off data is guaranteed not to contain the needle.
126
- tokens.push(this._lookbehind.slice(0, bytesToCutOff));
127
- this._lookbehind = this._lookbehind.slice(bytesToCutOff);
128
- }
129
- this._lookbehind = Uint8Array.from(new Array(this._lookbehind.length + data.length), (_, i) => this._charAt(data, i - this._lookbehind.length));
130
- return [data.length, ...tokens];
131
- }
132
- }
133
- pos += buf_pos;
134
- // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool
135
- // search with optimized character lookup code that only considers
136
- // the current round's haystack data.
137
- while (pos <= data.length - this._needle.length) {
138
- const ch = data[pos + this._needle.length - 1];
139
- if (ch === this._lastChar
140
- && data[pos] === this._needle[0]
141
- && jsmemcmp(this._needle, 0, data, pos, this._needle.length - 1)) {
142
- if (pos > buf_pos) {
143
- tokens.push(data.slice(buf_pos, pos));
144
- }
145
- tokens.push(MATCH);
146
- return [pos + this._needle.length, ...tokens];
147
- }
148
- else {
149
- pos += this._occ[ch];
150
- }
151
- }
152
- // There was no match. If there's trailing haystack data that we cannot
153
- // match yet using the Boyer-Moore-Horspool algorithm (because the trailing
154
- // data is less than the needle size) then match using a modified
155
- // algorithm that starts matching from the beginning instead of the end.
156
- // Whatever trailing data is left after running this algorithm is added to
157
- // the lookbehind buffer.
158
- if (pos < data.length) {
159
- while (pos < data.length && (data[pos] !== this._needle[0]
160
- || !jsmemcmp(data, pos, this._needle, 0, data.length - pos))) {
161
- ++pos;
162
- }
163
- if (pos < data.length) {
164
- this._lookbehind = data.slice(pos);
165
- }
166
- }
167
- // Everything until pos is guaranteed not to contain needle data.
168
- if (pos > 0) {
169
- tokens.push(data.slice(buf_pos, pos < data.length ? pos : data.length));
170
- }
171
- return [data.length, ...tokens];
172
- }
173
- _charAt(data, pos) {
174
- if (pos < 0) {
175
- return this._lookbehind[this._lookbehind.length + pos];
176
- }
177
- return data[pos];
178
- }
179
- ;
180
- _memcmp(data, pos, len) {
181
- return jsmemcmp(this._charAt.bind(this, data), pos, this._needle, 0, len);
182
- }
183
- ;
6
+ /*
7
+ Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation
8
+ by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
9
+ */
10
+ function jsmemcmp(buf1, pos1, buf2, pos2, len) {
11
+ for (let i = 0; i < len; i++) {
12
+ if (buf1[pos1 + i] !== buf2[pos2 + i]) {
13
+ return false;
14
+ }
15
+ }
16
+ return true;
184
17
  }
185
-
186
- class ReadableStreamSearch {
187
- constructor(needle, _readableStream) {
188
- this._readableStream = _readableStream;
189
- this._search = new StreamSearch(needle);
190
- }
191
- async *[Symbol.asyncIterator]() {
192
- const reader = this._readableStream.getReader();
193
- try {
194
- while (true) {
195
- const result = await reader.read();
196
- if (result.done) {
197
- break;
198
- }
199
- yield* this._search.feed(result.value);
200
- }
201
- const tail = this._search.end();
202
- if (tail.length) {
203
- yield tail;
204
- }
205
- }
206
- finally {
207
- reader.releaseLock();
208
- }
209
- }
18
+ function createOccurenceTable(s) {
19
+ // Populate occurrence table with analysis of the needle,
20
+ // ignoring last letter.
21
+ const table = new Array(256).fill(s.length);
22
+ if (s.length > 1) {
23
+ for (let i = 0; i < s.length - 1; i++) {
24
+ table[s[i]] = s.length - 1 - i;
25
+ }
26
+ }
27
+ return table;
210
28
  }
211
-
212
- function splitChunks(chunks, needle) {
213
- const search = new StreamSearch(needle);
214
- const outchunks = [[]];
215
- for (const chunk of chunks) {
216
- for (const token of search.feed(chunk)) {
217
- if (token === MATCH) {
218
- outchunks.push([]);
219
- }
220
- else {
221
- outchunks[outchunks.length - 1].push(token);
222
- }
223
- }
224
- }
225
- const end = search.end();
226
- outchunks[outchunks.length - 1].push(end);
227
- return outchunks.map((chunks) => u8Utils.mergeArrays(...chunks));
228
- }
229
- function split(buf, needle) {
230
- return splitChunks([buf], needle);
231
- }
232
- async function* chunksIterator(iter) {
233
- let chunks = [];
234
- for await (const value of iter) {
235
- if (value === MATCH) {
236
- yield chunks;
237
- chunks = [];
238
- }
239
- else {
240
- chunks.push(value);
241
- }
242
- }
243
- yield chunks;
244
- }
245
- async function* stringIterator(iter) {
246
- for await (const chunk of chunksIterator(iter)) {
247
- yield chunk.map(u8Utils.arrayToString).join('');
248
- }
249
- }
250
- async function allStrings(iter) {
251
- const segments = [];
252
- for await (const value of stringIterator(iter)) {
253
- segments.push(value);
254
- }
255
- return segments;
256
- }
257
- async function* arrayIterator(iter) {
258
- for await (const chunk of chunksIterator(iter)) {
259
- yield u8Utils.mergeArrays(...chunk);
260
- }
29
+ const MATCH = Symbol("Match");
30
+ class StreamSearch {
31
+ _needle;
32
+ _lastChar;
33
+ _occ;
34
+ _lookbehind = new Uint8Array();
35
+ constructor(needle) {
36
+ if (typeof needle === "string") {
37
+ this._needle = needle = fromString.fromString(needle);
38
+ }
39
+ else {
40
+ this._needle = needle;
41
+ }
42
+ this._lastChar = needle[needle.length - 1];
43
+ this._occ = createOccurenceTable(needle);
44
+ }
45
+ feed(chunk) {
46
+ let pos = 0;
47
+ let tokens;
48
+ const allTokens = [];
49
+ while (pos !== chunk.length) {
50
+ [pos, ...tokens] = this._feed(chunk, pos);
51
+ allTokens.push(...tokens);
52
+ }
53
+ return allTokens;
54
+ }
55
+ end() {
56
+ const tail = this._lookbehind;
57
+ this._lookbehind = new Uint8Array();
58
+ return tail;
59
+ }
60
+ _feed(data, buf_pos) {
61
+ const tokens = [];
62
+ // Positive: points to a position in `data`
63
+ // pos == 3 points to data[3]
64
+ // Negative: points to a position in the lookbehind buffer
65
+ // pos == -2 points to lookbehind[lookbehind_size - 2]
66
+ let pos = -this._lookbehind.length;
67
+ if (pos < 0) {
68
+ // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
69
+ // search with character lookup code that considers both the
70
+ // lookbehind buffer and the current round's haystack data.
71
+ //
72
+ // Loop until (condition 1)
73
+ // there is a match.
74
+ // or until
75
+ // we've moved past the position that requires the
76
+ // lookbehind buffer. In this case we switch to the
77
+ // optimized loop.
78
+ // or until (condition 3)
79
+ // the character to look at lies outside the haystack.
80
+ while (pos < 0 && pos <= data.length - this._needle.length) {
81
+ const cpos = pos + this._needle.length - 1;
82
+ const ch = cpos < 0
83
+ ? this._lookbehind[this._lookbehind.length + cpos]
84
+ : data[cpos];
85
+ if (ch === this._lastChar &&
86
+ this._memcmp(data, pos, this._needle.length - 1)) {
87
+ if (pos > -this._lookbehind.length) {
88
+ tokens.push(this._lookbehind.subarray(0, this._lookbehind.length + pos));
89
+ }
90
+ tokens.push(MATCH);
91
+ this._lookbehind = new Uint8Array();
92
+ return [pos + this._needle.length, ...tokens];
93
+ }
94
+ else {
95
+ pos += this._occ[ch];
96
+ }
97
+ }
98
+ // No match.
99
+ if (pos < 0) {
100
+ // There's too little data for Boyer-Moore-Horspool to run,
101
+ // so we'll use a different algorithm to skip as much as
102
+ // we can.
103
+ // Forward pos until
104
+ // the trailing part of lookbehind + data
105
+ // looks like the beginning of the needle
106
+ // or until
107
+ // pos == 0
108
+ while (pos < 0 && !this._memcmp(data, pos, data.length - pos)) {
109
+ pos++;
110
+ }
111
+ }
112
+ if (pos >= 0) {
113
+ // Discard lookbehind buffer.
114
+ tokens.push(this._lookbehind);
115
+ this._lookbehind = new Uint8Array();
116
+ }
117
+ else {
118
+ // Cut off part of the lookbehind buffer that has
119
+ // been processed and append the entire haystack
120
+ // into it.
121
+ const bytesToCutOff = this._lookbehind.length + pos;
122
+ if (bytesToCutOff > 0) {
123
+ // The cut off data is guaranteed not to contain the needle.
124
+ tokens.push(this._lookbehind.subarray(0, bytesToCutOff));
125
+ this._lookbehind = this._lookbehind.subarray(bytesToCutOff);
126
+ }
127
+ this._lookbehind = concat.concat([this._lookbehind, data]);
128
+ return [data.length, ...tokens];
129
+ }
130
+ }
131
+ pos += buf_pos;
132
+ // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool
133
+ // search with optimized character lookup code that only considers
134
+ // the current round's haystack data.
135
+ while (pos <= data.length - this._needle.length) {
136
+ const ch = data[pos + this._needle.length - 1];
137
+ if (ch === this._lastChar &&
138
+ data[pos] === this._needle[0] &&
139
+ jsmemcmp(this._needle, 0, data, pos, this._needle.length - 1)) {
140
+ if (pos > buf_pos) {
141
+ tokens.push(data.subarray(buf_pos, pos));
142
+ }
143
+ tokens.push(MATCH);
144
+ return [pos + this._needle.length, ...tokens];
145
+ }
146
+ else {
147
+ pos += this._occ[ch];
148
+ }
149
+ }
150
+ // There was no match. If there's trailing haystack data that we cannot
151
+ // match yet using the Boyer-Moore-Horspool algorithm (because the trailing
152
+ // data is less than the needle size) then match using a modified
153
+ // algorithm that starts matching from the beginning instead of the end.
154
+ // Whatever trailing data is left after running this algorithm is added to
155
+ // the lookbehind buffer.
156
+ if (pos < data.length) {
157
+ while (pos < data.length &&
158
+ (data[pos] !== this._needle[0] ||
159
+ !jsmemcmp(data, pos, this._needle, 0, data.length - pos))) {
160
+ ++pos;
161
+ }
162
+ if (pos < data.length) {
163
+ this._lookbehind = data.subarray(pos);
164
+ }
165
+ }
166
+ // Everything until pos is guaranteed not to contain needle data.
167
+ if (pos > 0) {
168
+ tokens.push(data.subarray(buf_pos, pos < data.length ? pos : data.length));
169
+ }
170
+ return [data.length, ...tokens];
171
+ }
172
+ _memcmp(data, pos, len) {
173
+ if (pos < 0) {
174
+ if (!jsmemcmp(this._lookbehind, this._lookbehind.length + pos, this._needle, 0, Math.min(-pos, len))) {
175
+ return false;
176
+ }
177
+ if (len < -pos) {
178
+ return true;
179
+ }
180
+ len += pos;
181
+ }
182
+ return jsmemcmp(data, Math.max(0, pos), this._needle, -Math.min(0, pos), len);
183
+ }
261
184
  }
262
185
 
263
186
  exports.MATCH = MATCH;
264
- exports.ReadableStreamSearch = ReadableStreamSearch;
265
187
  exports.StreamSearch = StreamSearch;
266
- exports.allStrings = allStrings;
267
- exports.arrayIterator = arrayIterator;
268
- exports.chunksIterator = chunksIterator;
269
- exports.split = split;
270
- exports.splitChunks = splitChunks;
271
- exports.stringIterator = stringIterator;
272
- //# sourceMappingURL=index.js.map
188
+ //# sourceMappingURL=search.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.cjs","sources":["../src/search.ts"],"sourcesContent":[null],"names":["fromString","concat"],"mappings":";;;;;AAAA;;;AAGE;AAKF,SAAS,QAAQ,CAChB,IAAgB,EAChB,IAAY,EACZ,IAAgB,EAChB,IAAY,EACZ,GAAW,EAAA;AAEX,IAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE;AAC7B,QAAA,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,EAAE;AACtC,YAAA,OAAO,KAAK;QACb;IACD;AAEA,IAAA,OAAO,IAAI;AACZ;AAEA,SAAS,oBAAoB,CAAC,CAAa,EAAA;;;AAG1C,IAAA,MAAM,KAAK,GAAG,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;AAC3C,IAAA,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE;AACjB,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;AACtC,YAAA,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC;QAC/B;IACD;AAEA,IAAA,OAAO,KAAK;AACb;MAEa,KAAK,GAAG,MAAM,CAAC,OAAO;MAItB,YAAY,CAAA;AAChB,IAAA,OAAO;AACP,IAAA,SAAS;AACT,IAAA,IAAI;AAEJ,IAAA,WAAW,GAAe,IAAI,UAAU,EAAE;AAElD,IAAA,WAAA,CAAmB,MAA2B,EAAA;AAC7C,QAAA,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE;YAC/B,IAAI,CAAC,OAAO,GAAG,MAAM,GAAGA,qBAAU,CAAC,MAAM,CAAC;QAC3C;aAAO;AACN,YAAA,IAAI,CAAC,OAAO,GAAG,MAAM;QACtB;QAEA,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;AAC1C,QAAA,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAC,MAAM,CAAC;IACzC;AAEO,IAAA,IAAI,CAAC,KAAiB,EAAA;QAC5B,IAAI,GAAG,GAAG,CAAC;AACX,QAAA,IAAI,MAAe;QACnB,MAAM,SAAS,GAAY,EAAE;AAC7B,QAAA,OAAO,GAAG,KAAK,KAAK,CAAC,MAAM,EAAE;AAC5B,YAAA,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC;AACzC,YAAA,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC1B;AAEA,QAAA,OAAO,SAAS;IACjB;IAEO,GAAG,GAAA;AACT,QAAA,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW;AAC7B,QAAA,IAAI,CAAC,WAAW,GAAG,IAAI,UAAU,EAAE;AACnC,QAAA,OAAO,IAAI;IACZ;IAEQ,KAAK,CAAC,IAAgB,EAAE,OAAe,EAAA;QAC9C,MAAM,MAAM,GAAY,EAAE;;;;;QAM1B,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM;AAElC,QAAA,IAAI,GAAG,GAAG,CAAC,EAAE;;;;;;;;;;;;;AAaZ,YAAA,OAAO,GAAG,GAAG,CAAC,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBAC3D,MAAM,IAAI,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;AAC1C,gBAAA,MAAM,EAAE,GACP,IAAI,GAAG;AACN,sBAAE,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,IAAI;AACjD,sBAAE,IAAI,CAAC,IAAI,CAAC;AAEd,gBAAA,IACC,EAAE,KAAK,IAAI,CAAC,SAAS;AACrB,oBAAA,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,EAC/C;oBACD,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;wBACnC,MAAM,CAAC,IAAI,CACV,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,GAAG,CAAC,CAC3D;oBACF;AAEA,oBAAA,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC;AAElB,oBAAA,IAAI,CAAC,WAAW,GAAG,IAAI,UAAU,EAAE;AAEnC,oBAAA,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,MAAM,CAAC;gBAC9C;qBAAO;AACN,oBAAA,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrB;YACD;;AAIA,YAAA,IAAI,GAAG,GAAG,CAAC,EAAE;;;;;;;;;gBASZ,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,EAAE,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,EAAE;AAC9D,oBAAA,GAAG,EAAE;gBACN;YACD;AAEA,YAAA,IAAI,GAAG,IAAI,CAAC,EAAE;;AAEb,gBAAA,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;AAC7B,gBAAA,IAAI,CAAC,WAAW,GAAG,IAAI,UAAU,EAAE;YACpC;iBAAO;;;;gBAIN,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,GAAG;AAEnD,gBAAA,IAAI,aAAa,GAAG,CAAC,EAAE;;AAEtB,oBAAA,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;oBACxD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,aAAa,CAAC;gBAC5D;AAEA,gBAAA,IAAI,CAAC,WAAW,GAAGC,aAAM,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;gBAEnD,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,MAAM,CAAC;YAChC;QACD;QAEA,GAAG,IAAI,OAAO;;;;AAKd,QAAA,OAAO,GAAG,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;AAChD,YAAA,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;AAE9C,YAAA,IACC,EAAE,KAAK,IAAI,CAAC,SAAS;gBACrB,IAAI,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7B,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,EAC5D;AACD,gBAAA,IAAI,GAAG,GAAG,OAAO,EAAE;AAClB,oBAAA,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;gBACzC;AAEA,gBAAA,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC;AAElB,gBAAA,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,MAAM,CAAC;YAC9C;iBAAO;AACN,gBAAA,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACrB;QACD;;;;;;;AAQA,QAAA,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE;AACtB,YAAA,OACC,GAAG,GAAG,IAAI,CAAC,MAAM;iBAChB,IAAI,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;oBAC7B,CAAC,QAAQ,CAAC,IAAI,EAAE,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,EAAE,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,EACzD;AACD,gBAAA,EAAE,GAAG;YACN;AAEA,YAAA,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE;gBACtB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;YACtC;QACD;;AAGA,QAAA,IAAI,GAAG,GAAG,CAAC,EAAE;YACZ,MAAM,CAAC,IAAI,CACV,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,GAAG,GAAG,IAAI,CAAC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAC7D;QACF;QAEA,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,MAAM,CAAC;IAChC;AAEQ,IAAA,OAAO,CAAC,IAAgB,EAAE,GAAW,EAAE,GAAW,EAAA;AACzD,QAAA,IAAI,GAAG,GAAG,CAAC,EAAE;AACZ,YAAA,IACC,CAAC,QAAQ,CACR,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,GAAG,EAC7B,IAAI,CAAC,OAAO,EACZ,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CACnB,EACA;AACD,gBAAA,OAAO,KAAK;YACb;AAEA,YAAA,IAAI,GAAG,GAAG,CAAC,GAAG,EAAE;AACf,gBAAA,OAAO,IAAI;YACZ;YAEA,GAAG,IAAI,GAAG;QACX;AAEA,QAAA,OAAO,QAAQ,CACd,IAAI,EACJ,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,EAChB,IAAI,CAAC,OAAO,EACZ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,EACjB,GAAG,CACH;IACF;AACA;;;;;"}
package/lib/search.d.ts CHANGED
@@ -1,14 +1,13 @@
1
- export declare const MATCH: unique symbol;
2
- export declare type Token = Uint8Array | typeof MATCH;
3
- export declare class StreamSearch {
4
- private _needle;
5
- private _lastChar;
6
- private _occ;
7
- private _lookbehind;
8
- constructor(needle: Uint8Array | string);
9
- feed(chunk: Uint8Array): Token[];
10
- end(): Uint8Array;
11
- private _feed;
12
- private _charAt;
13
- private _memcmp;
14
- }
1
+ export declare const MATCH: unique symbol;
2
+ export type Token = Uint8Array | typeof MATCH;
3
+ export declare class StreamSearch {
4
+ private _needle;
5
+ private _lastChar;
6
+ private _occ;
7
+ private _lookbehind;
8
+ constructor(needle: Uint8Array | string);
9
+ feed(chunk: Uint8Array): Token[];
10
+ end(): Uint8Array;
11
+ private _feed;
12
+ private _memcmp;
13
+ }
package/lib/search.mjs ADDED
@@ -0,0 +1,185 @@
1
+ import { concat } from 'uint8arrays/concat';
2
+ import { fromString } from 'uint8arrays/from-string';
3
+
4
+ /*
5
+ Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation
6
+ by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
7
+ */
8
+ function jsmemcmp(buf1, pos1, buf2, pos2, len) {
9
+ for (let i = 0; i < len; i++) {
10
+ if (buf1[pos1 + i] !== buf2[pos2 + i]) {
11
+ return false;
12
+ }
13
+ }
14
+ return true;
15
+ }
16
+ function createOccurenceTable(s) {
17
+ // Populate occurrence table with analysis of the needle,
18
+ // ignoring last letter.
19
+ const table = new Array(256).fill(s.length);
20
+ if (s.length > 1) {
21
+ for (let i = 0; i < s.length - 1; i++) {
22
+ table[s[i]] = s.length - 1 - i;
23
+ }
24
+ }
25
+ return table;
26
+ }
27
+ const MATCH = Symbol("Match");
28
+ class StreamSearch {
29
+ _needle;
30
+ _lastChar;
31
+ _occ;
32
+ _lookbehind = new Uint8Array();
33
+ constructor(needle) {
34
+ if (typeof needle === "string") {
35
+ this._needle = needle = fromString(needle);
36
+ }
37
+ else {
38
+ this._needle = needle;
39
+ }
40
+ this._lastChar = needle[needle.length - 1];
41
+ this._occ = createOccurenceTable(needle);
42
+ }
43
+ feed(chunk) {
44
+ let pos = 0;
45
+ let tokens;
46
+ const allTokens = [];
47
+ while (pos !== chunk.length) {
48
+ [pos, ...tokens] = this._feed(chunk, pos);
49
+ allTokens.push(...tokens);
50
+ }
51
+ return allTokens;
52
+ }
53
+ end() {
54
+ const tail = this._lookbehind;
55
+ this._lookbehind = new Uint8Array();
56
+ return tail;
57
+ }
58
+ _feed(data, buf_pos) {
59
+ const tokens = [];
60
+ // Positive: points to a position in `data`
61
+ // pos == 3 points to data[3]
62
+ // Negative: points to a position in the lookbehind buffer
63
+ // pos == -2 points to lookbehind[lookbehind_size - 2]
64
+ let pos = -this._lookbehind.length;
65
+ if (pos < 0) {
66
+ // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
67
+ // search with character lookup code that considers both the
68
+ // lookbehind buffer and the current round's haystack data.
69
+ //
70
+ // Loop until (condition 1)
71
+ // there is a match.
72
+ // or until
73
+ // we've moved past the position that requires the
74
+ // lookbehind buffer. In this case we switch to the
75
+ // optimized loop.
76
+ // or until (condition 3)
77
+ // the character to look at lies outside the haystack.
78
+ while (pos < 0 && pos <= data.length - this._needle.length) {
79
+ const cpos = pos + this._needle.length - 1;
80
+ const ch = cpos < 0
81
+ ? this._lookbehind[this._lookbehind.length + cpos]
82
+ : data[cpos];
83
+ if (ch === this._lastChar &&
84
+ this._memcmp(data, pos, this._needle.length - 1)) {
85
+ if (pos > -this._lookbehind.length) {
86
+ tokens.push(this._lookbehind.subarray(0, this._lookbehind.length + pos));
87
+ }
88
+ tokens.push(MATCH);
89
+ this._lookbehind = new Uint8Array();
90
+ return [pos + this._needle.length, ...tokens];
91
+ }
92
+ else {
93
+ pos += this._occ[ch];
94
+ }
95
+ }
96
+ // No match.
97
+ if (pos < 0) {
98
+ // There's too little data for Boyer-Moore-Horspool to run,
99
+ // so we'll use a different algorithm to skip as much as
100
+ // we can.
101
+ // Forward pos until
102
+ // the trailing part of lookbehind + data
103
+ // looks like the beginning of the needle
104
+ // or until
105
+ // pos == 0
106
+ while (pos < 0 && !this._memcmp(data, pos, data.length - pos)) {
107
+ pos++;
108
+ }
109
+ }
110
+ if (pos >= 0) {
111
+ // Discard lookbehind buffer.
112
+ tokens.push(this._lookbehind);
113
+ this._lookbehind = new Uint8Array();
114
+ }
115
+ else {
116
+ // Cut off part of the lookbehind buffer that has
117
+ // been processed and append the entire haystack
118
+ // into it.
119
+ const bytesToCutOff = this._lookbehind.length + pos;
120
+ if (bytesToCutOff > 0) {
121
+ // The cut off data is guaranteed not to contain the needle.
122
+ tokens.push(this._lookbehind.subarray(0, bytesToCutOff));
123
+ this._lookbehind = this._lookbehind.subarray(bytesToCutOff);
124
+ }
125
+ this._lookbehind = concat([this._lookbehind, data]);
126
+ return [data.length, ...tokens];
127
+ }
128
+ }
129
+ pos += buf_pos;
130
+ // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool
131
+ // search with optimized character lookup code that only considers
132
+ // the current round's haystack data.
133
+ while (pos <= data.length - this._needle.length) {
134
+ const ch = data[pos + this._needle.length - 1];
135
+ if (ch === this._lastChar &&
136
+ data[pos] === this._needle[0] &&
137
+ jsmemcmp(this._needle, 0, data, pos, this._needle.length - 1)) {
138
+ if (pos > buf_pos) {
139
+ tokens.push(data.subarray(buf_pos, pos));
140
+ }
141
+ tokens.push(MATCH);
142
+ return [pos + this._needle.length, ...tokens];
143
+ }
144
+ else {
145
+ pos += this._occ[ch];
146
+ }
147
+ }
148
+ // There was no match. If there's trailing haystack data that we cannot
149
+ // match yet using the Boyer-Moore-Horspool algorithm (because the trailing
150
+ // data is less than the needle size) then match using a modified
151
+ // algorithm that starts matching from the beginning instead of the end.
152
+ // Whatever trailing data is left after running this algorithm is added to
153
+ // the lookbehind buffer.
154
+ if (pos < data.length) {
155
+ while (pos < data.length &&
156
+ (data[pos] !== this._needle[0] ||
157
+ !jsmemcmp(data, pos, this._needle, 0, data.length - pos))) {
158
+ ++pos;
159
+ }
160
+ if (pos < data.length) {
161
+ this._lookbehind = data.subarray(pos);
162
+ }
163
+ }
164
+ // Everything until pos is guaranteed not to contain needle data.
165
+ if (pos > 0) {
166
+ tokens.push(data.subarray(buf_pos, pos < data.length ? pos : data.length));
167
+ }
168
+ return [data.length, ...tokens];
169
+ }
170
+ _memcmp(data, pos, len) {
171
+ if (pos < 0) {
172
+ if (!jsmemcmp(this._lookbehind, this._lookbehind.length + pos, this._needle, 0, Math.min(-pos, len))) {
173
+ return false;
174
+ }
175
+ if (len < -pos) {
176
+ return true;
177
+ }
178
+ len += pos;
179
+ }
180
+ return jsmemcmp(data, Math.max(0, pos), this._needle, -Math.min(0, pos), len);
181
+ }
182
+ }
183
+
184
+ export { MATCH, StreamSearch };
185
+ //# sourceMappingURL=search.mjs.map