@gmod/trix 1.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # v2.0.2
2
+
3
+ - Publish src directory for better source maps
4
+
5
+ # v2.0.1
6
+
7
+ - Redeploy to npm with preversion script added
8
+
9
+ # v2.0.0
10
+
11
+ - Fix issue with infinite loop
12
+ - Add abortsignal support
13
+ - Only query first word when string with multiple words is entered
14
+
1
15
  # v1.0.0
2
16
 
3
17
  - Change result format from just the "result" string returned to be "term,result"
package/README.md CHANGED
@@ -1,4 +1,7 @@
1
+ [![Build Status](https://img.shields.io/github/workflow/status/GMOD/trix-js/Push/main?logo=github&style=for-the-badge)](https://github.com/GMOD/trix-js/actions?query=branch%3Amain+workflow%3APush+)
2
+
1
3
  # trix-js
4
+
2
5
  Read UCSC Trix indexes in pure JavaScript
3
6
 
4
7
  ## Usage
@@ -9,37 +12,43 @@ import { RemoteFile } from 'generic-filehandle'
9
12
 
10
13
  // any filehandle object that supports the Nodejs FileHandle API will work.
11
14
  // We use generic-filehandle here to demonstrate searching files on remote servers.
12
- const ixxFile = new RemoteFile('https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ixx');
13
- const ixFile = new RemoteFile('https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ix');
15
+ const ixxFile = new RemoteFile(
16
+ 'https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ixx',
17
+ )
18
+ const ixFile = new RemoteFile(
19
+ 'https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ix',
20
+ )
14
21
 
15
- const trix = new Trix(ixxFile, ixFile);
22
+ const trix = new Trix(ixxFile, ixFile)
16
23
 
17
24
  async function doStuff() {
18
- const results = await trix.search('oca');
19
- console.log(results);
25
+ const results = await trix.search('oca')
26
+ console.log(results)
20
27
  }
21
- doStuff();
22
-
28
+ doStuff()
23
29
  ```
24
30
 
25
31
  ## Documentation
32
+
26
33
  ### Trix constructor
34
+
27
35
  The Trix class constructor accepts arguments:
36
+
28
37
  - `ixxFile` - a filehandle object for the trix .ixx file
29
38
  - `ixFile` - a filehandle object for the trix .ix file
30
39
  - `maxResults = 20` - an optional number specifying the maximum number of results to return on `trix.search()`
31
40
 
32
-
33
41
  ### Trix search
42
+
34
43
  **Search the index files for a term and find its keys.**<br>
35
44
  **In the case of searching with multiple words, `trix.search()` finds the intersection of the result sets.**<br>
36
45
  The Trix search function accepts argument:
46
+
37
47
  - `searchString` - a string of space-separated words for what to search the index file and find keys for<br>
38
-
48
+
39
49
  The Trix search function returns: <br>
40
- - `Promise<string[]>` - a promised array of strings where each string is an itemId result
41
-
42
50
 
51
+ - `Promise<[term,result][] as [string,string][]>` - an array of [term, result] pairs where each term is the left column in the trix and the right column is the trix match
43
52
 
44
53
  ## Examples
45
54
 
@@ -47,49 +56,37 @@ The Trix search function returns: <br>
47
56
  import { LocalFile } from 'generic-filehandle'
48
57
  import Trix from '@gmod/trix'
49
58
 
50
- const ixxFile = new LocalFile('out.ixx');
51
- const ixFile = new LocalFile('out.ix');
59
+ const ixxFile = new LocalFile('out.ixx')
60
+ const ixFile = new LocalFile('out.ix')
52
61
 
53
62
  // limit maxResults to 5
54
- const trix = new Trix(ixxFile, ixFile, 5);
63
+ const trix = new Trix(ixxFile, ixFile, 5)
55
64
 
56
65
  async function doStuff() {
57
- const results1 = await trix.search('herc');
58
- console.log(results1);
66
+ const results1 = await trix.search('herc')
67
+ console.log(results1)
59
68
 
60
69
  // increase maxResults to 30
61
- trix.maxResults = 30;
70
+ trix.maxResults = 30
62
71
 
63
- const results2 = await trix.search('linc');
64
- console.log(results2);
72
+ const results2 = await trix.search('linc')
73
+ console.log(results2)
65
74
  }
66
75
 
67
- doStuff();
76
+ doStuff()
68
77
  ```
69
- <br><br>
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
-
80
-
81
-
82
-
83
78
 
79
+ <br><br>
84
80
 
85
81
  ## Development
86
82
 
87
-
88
83
  ### Test trix-js
84
+
89
85
  First, clone this repo and install npm packages. <br>
90
86
  Then, run `npm test`. <br>
91
87
 
92
88
  ### Test the USCS TrixSearch - Requires Linux
89
+
93
90
  First, clone this repo.
94
91
  To run test searches on a track hub using the USCS `TrixSearch`, navigate to `tests/testdata/test#` and run `bash test#script.sh` where # is the test number.
95
92
  To change search terms, edit `searchterms.txt`.
@@ -97,7 +94,9 @@ To change search terms, edit `searchterms.txt`.
97
94
  **Wondering what to search for?**<br>
98
95
  Open up `tests/testdata/test#/input.txt`.
99
96
 
100
-
101
97
  **How to test my own .gff.gz data?**<br>
102
98
  Navigate to `/test/rawGenomes` and create a directory with your .gff.gz file in it. From within that directory, run `bash ../../programs/gff3ToInput.sh <.gff3.gz FILE> <OUTPUT NAME>`.
103
99
 
100
+ ## Reference
101
+
102
+ See https://genome.ucsc.edu/goldenPath/help/trix.html for basic concepts of trix and https://github.com/GMOD/ixixx-js for a javascript implementation of the ixIxx command
package/dist/index.d.ts CHANGED
@@ -1,59 +1,12 @@
1
1
  import type { GenericFilehandle } from 'generic-filehandle';
2
2
  export default class Trix {
3
- private index;
4
3
  private ixFile;
4
+ private ixxFile;
5
5
  maxResults: number;
6
- /**
7
- * @param ixxFile [anyFile] the second-level trix index file produced by ixIxx.
8
- * @param ixFile [anyFile] the first-level trix index file produced by ixIxx.
9
- * @param maxResults [number] the maximum number of results to return. Default is set to 20.
10
- */
11
6
  constructor(ixxFile: GenericFilehandle, ixFile: GenericFilehandle, maxResults?: number);
12
- /**
13
- * Search trix for the given searchWord(s). Return up to {this.maxResults} results.
14
- * This method matches each index prefix against each searchWord. It does not do fuzzy matching.
15
- *
16
- * @param searchString [string] term(s) separated by spaces to search for id(s).
17
- * @returns results [Array<string>] where each string is a corresponding itemId.
18
- */
19
- search(searchString: string): Promise<string[]>;
20
- /**
21
- * Seek ahead to the correct position in the .ix file,
22
- * then load that chunk of .ix into a buffer.
23
- *
24
- * @param searchWord [string]
25
- * @returns a Buffer holding the sections we want to search.
26
- */
7
+ search(searchString: string, opts?: {
8
+ signal?: AbortSignal;
9
+ }): Promise<[string, string][]>;
10
+ private getIndex;
27
11
  private _getBuffer;
28
- /**
29
- * Given the end position of the last buffer,
30
- * load the next chunk of .ix data into a buffer and return it.
31
- *
32
- * @param seekPosStart [number] where to start loading data into the new buffer.
33
- * @returns a Buffer holding the chunk we want to search.
34
- */
35
- private _getNextChunk;
36
- /**
37
- * Create and return a buffer given the start and end position
38
- * of what to load from the .ix file.
39
- *
40
- * @param seekPosStart [number] byte the buffer should start reading from file.
41
- * @param seekPosEnd [number] byte the buffer should stop reading from file.
42
- * @returns a Buffer holding the chunk of data.
43
- */
44
- private _createBuffer;
45
- /**
46
- * Takes in a hit string and returns an array of result terms.
47
- *
48
- * @param line [string] The line of .ix that is a hit.
49
- * @returns results [Array<hit>]. Each hit contains the itemId [string], and wordPos [number].
50
- */
51
- private _parseHitString;
52
- /**
53
- * Parses ixx file and constructs a map of {word: ixFileLocation}
54
- *
55
- * @param ixxFile [anyFile] second level index that is produced by ixIxx.
56
- * @returns a ParsedIxx map.
57
- */
58
- private _parseIxx;
59
12
  }
package/dist/index.js CHANGED
@@ -1,4 +1,15 @@
1
1
  "use strict";
2
+ var __assign = (this && this.__assign) || function () {
3
+ __assign = Object.assign || function(t) {
4
+ for (var s, i = 1, n = arguments.length; i < n; i++) {
5
+ s = arguments[i];
6
+ for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
7
+ t[p] = s[p];
8
+ }
9
+ return t;
10
+ };
11
+ return __assign.apply(this, arguments);
12
+ };
2
13
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
14
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
15
  return new (P || (P = Promise))(function (resolve, reject) {
@@ -35,17 +46,6 @@ var __generator = (this && this.__generator) || function (thisArg, body) {
35
46
  if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
36
47
  }
37
48
  };
38
- var __values = (this && this.__values) || function(o) {
39
- var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
40
- if (m) return m.call(o);
41
- if (o && typeof o.length === "number") return {
42
- next: function () {
43
- if (o && i >= o.length) o = void 0;
44
- return { value: o && o[i++], done: !o };
45
- }
46
- };
47
- throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
48
- };
49
49
  var __read = (this && this.__read) || function (o, n) {
50
50
  var m = typeof Symbol === "function" && o[Symbol.iterator];
51
51
  if (!m) return o;
@@ -63,364 +63,160 @@ var __read = (this && this.__read) || function (o, n) {
63
63
  return ar;
64
64
  };
65
65
  Object.defineProperty(exports, "__esModule", { value: true });
66
- var trixPrefixSize = 5;
67
- // Define this object with .ixx and .ix files.
68
- // Then use the trixSearch() function to search for a word.
66
+ var TRIX_PREFIX_SIZE = 5;
67
+ var CHUNK_SIZE = 65536;
68
+ // https://stackoverflow.com/a/9229821/2129219
69
+ function uniqBy(a, key) {
70
+ var seen = new Set();
71
+ return a.filter(function (item) {
72
+ var k = key(item);
73
+ return seen.has(k) ? false : seen.add(k);
74
+ });
75
+ }
69
76
  var Trix = /** @class */ (function () {
70
- /**
71
- * @param ixxFile [anyFile] the second-level trix index file produced by ixIxx.
72
- * @param ixFile [anyFile] the first-level trix index file produced by ixIxx.
73
- * @param maxResults [number] the maximum number of results to return. Default is set to 20.
74
- */
75
77
  function Trix(ixxFile, ixFile, maxResults) {
76
78
  if (maxResults === void 0) { maxResults = 20; }
77
- this.index = this._parseIxx(ixxFile);
78
79
  this.ixFile = ixFile;
80
+ this.ixxFile = ixxFile;
79
81
  this.maxResults = maxResults;
80
82
  }
81
- /**
82
- * Search trix for the given searchWord(s). Return up to {this.maxResults} results.
83
- * This method matches each index prefix against each searchWord. It does not do fuzzy matching.
84
- *
85
- * @param searchString [string] term(s) separated by spaces to search for id(s).
86
- * @returns results [Array<string>] where each string is a corresponding itemId.
87
- */
88
- Trix.prototype.search = function (searchString) {
83
+ Trix.prototype.search = function (searchString, opts) {
89
84
  return __awaiter(this, void 0, void 0, function () {
90
- var resultArr, firstWord, initialSet, searchWords, w, searchWord, bufData, buf, bufPos, resultSet, linePtr, numValues, startsWith, done, i, tempBufData, cur, line, arr, arr_1, arr_1_1, hit;
91
- var e_1, _a;
92
- return __generator(this, function (_b) {
93
- switch (_b.label) {
85
+ var resultArr, searchWords, searchWord, res, seekPosEnd, buffer, done, _loop_1, this_1, state_1;
86
+ return __generator(this, function (_a) {
87
+ switch (_a.label) {
94
88
  case 0:
95
89
  resultArr = [];
96
- firstWord = true;
97
- initialSet = new Set();
98
90
  searchWords = searchString.split(' ');
99
- w = 0;
100
- _b.label = 1;
91
+ searchWord = searchWords[0].toLowerCase();
92
+ return [4 /*yield*/, this._getBuffer(searchWord, opts)];
101
93
  case 1:
102
- if (!(w < searchWords.length)) return [3 /*break*/, 10];
103
- searchWord = searchWords[w];
104
- searchWord = searchWord.toLowerCase();
105
- return [4 /*yield*/, this._getBuffer(searchWord)];
106
- case 2:
107
- bufData = _b.sent();
108
- buf = bufData.buf;
109
- bufPos = bufData.bufEndPos;
110
- resultSet = new Set();
111
- linePtr = 0;
112
- numValues = 0;
113
- _b.label = 3;
114
- case 3:
115
- if (!(linePtr < buf.byteLength)) return [3 /*break*/, 8];
116
- startsWith = true;
117
- done = false;
118
- i = linePtr;
119
- _b.label = 4;
120
- case 4:
121
- if (!(buf[i] != 10)) return [3 /*break*/, 7];
122
- if (!(i >= buf.byteLength)) return [3 /*break*/, 6];
123
- return [4 /*yield*/, this._getNextChunk(bufPos)];
124
- case 5:
125
- tempBufData = _b.sent();
126
- if (tempBufData) {
127
- buf = tempBufData.buf;
128
- bufPos = tempBufData.bufEndPos;
129
- i = 0;
130
- linePtr = 0;
131
- }
132
- else {
133
- // If tempBufData is null, we reached the end of the file, so we are done.
134
- done = true;
135
- return [3 /*break*/, 7];
136
- }
137
- _b.label = 6;
138
- case 6:
139
- if (startsWith) {
140
- cur = String.fromCharCode(buf[i]);
141
- if (i < linePtr + searchWord.length &&
142
- searchWord[i - linePtr] > cur) {
143
- // searchWord[i] > cur, so keep looping.
144
- startsWith = false;
145
- }
146
- else if (i < linePtr + searchWord.length &&
147
- searchWord[i - linePtr] < cur) {
148
- // searchWord[i] < cur, so we lexicographically will not find any more results.
149
- startsWith = false;
150
- done = true;
151
- return [3 /*break*/, 7];
152
- }
153
- else {
154
- // This condition indicates we found a match.
155
- if (buf[i] === 44) {
156
- // We found a ',' so increment numValues by one.
157
- numValues++;
158
- // If we're searching for one word and we have enough results, break out at the next space.
159
- if (numValues >= this.maxResults && searchWords.length === 1) {
160
- while (buf[i] != 32)
161
- i++;
162
- return [3 /*break*/, 7];
163
- }
164
- }
165
- }
94
+ res = _a.sent();
95
+ if (!res) {
96
+ return [2 /*return*/, []];
166
97
  }
167
- i++;
168
- return [3 /*break*/, 4];
169
- case 7:
170
- if (done)
171
- return [3 /*break*/, 8];
172
- // If the line starts with the searchWord, we have a hit!
173
- if (startsWith) {
174
- line = buf.slice(linePtr, i).toString();
175
- arr = this._parseHitString(line);
176
- if (searchWords.length === 1) {
177
- // Only a single search word so add results to array.
178
- resultArr = resultArr.concat(arr);
179
- // Once we have enough results, stop searching.
180
- if (resultArr.length >= this.maxResults)
181
- return [3 /*break*/, 8];
182
- }
183
- else {
184
- try {
185
- // Handle multiple words using sets.
186
- for (arr_1 = (e_1 = void 0, __values(arr)), arr_1_1 = arr_1.next(); !arr_1_1.done; arr_1_1 = arr_1.next()) {
187
- hit = arr_1_1.value;
188
- hit = hit.toLowerCase();
189
- if (firstWord) {
190
- resultSet.add(hit);
191
- }
192
- else {
193
- if (initialSet.has(hit)) {
194
- resultSet.add(hit);
195
- // If it is on the last iteration of words, break after we reach maxResults
196
- if (w === searchWords.length - 1 &&
197
- resultSet.size >= this.maxResults)
198
- break;
98
+ seekPosEnd = res.seekPosEnd, buffer = res.buffer;
99
+ done = false;
100
+ _loop_1 = function () {
101
+ var foundSomething, str, lines, hits, res2;
102
+ return __generator(this, function (_b) {
103
+ switch (_b.label) {
104
+ case 0:
105
+ foundSomething = false;
106
+ str = buffer.toString();
107
+ lines = str
108
+ .slice(0, str.lastIndexOf('\n'))
109
+ .split('\n')
110
+ .filter(function (f) { return !!f; });
111
+ hits = lines
112
+ // eslint-disable-next-line @typescript-eslint/no-loop-func
113
+ .filter(function (line) {
114
+ var word = line.split(' ')[0];
115
+ var match = word.startsWith(searchWord);
116
+ if (!foundSomething && match) {
117
+ foundSomething = true;
118
+ }
119
+ // we are done scanning if we are lexicographically greater than the
120
+ // search string
121
+ if (word > searchWord) {
122
+ done = true;
199
123
  }
124
+ return match;
125
+ })
126
+ .map(function (line) {
127
+ var _a = __read(line.split(' ')), term = _a[0], parts = _a.slice(1);
128
+ return parts.map(function (elt) { return [term, elt.split(',')[0]]; });
129
+ })
130
+ .flat();
131
+ if (!(resultArr.length + hits.length < this_1.maxResults && !done)) return [3 /*break*/, 2];
132
+ return [4 /*yield*/, this_1.ixFile.read(Buffer.alloc(CHUNK_SIZE), 0, CHUNK_SIZE, seekPosEnd, opts)
133
+ // early break if empty response
134
+ ];
135
+ case 1:
136
+ res2 = _b.sent();
137
+ // early break if empty response
138
+ if (!res2.bytesRead) {
139
+ resultArr = resultArr.concat(hits);
140
+ return [2 /*return*/, "break"];
200
141
  }
201
- }
202
- }
203
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
204
- finally {
205
- try {
206
- if (arr_1_1 && !arr_1_1.done && (_a = arr_1.return)) _a.call(arr_1);
207
- }
208
- finally { if (e_1) throw e_1.error; }
142
+ buffer = Buffer.concat([buffer, res2.buffer]);
143
+ seekPosEnd += CHUNK_SIZE;
144
+ return [3 /*break*/, 3];
145
+ case 2:
146
+ if (resultArr.length + hits.length >= this_1.maxResults || done) {
147
+ resultArr = resultArr.concat(hits);
148
+ return [2 /*return*/, "break"];
149
+ }
150
+ _b.label = 3;
151
+ case 3: return [2 /*return*/];
209
152
  }
210
- }
211
- }
212
- linePtr = i + 1;
213
- return [3 /*break*/, 3];
214
- case 8:
215
- initialSet = resultSet;
216
- firstWord = false;
217
- // If there aren't any results, stop looping, because an intersection with an empty set is an empty set.
218
- if (resultArr.length === 0 && initialSet.size === 0)
219
- return [2 /*return*/, []];
220
- _b.label = 9;
221
- case 9:
222
- w++;
223
- return [3 /*break*/, 1];
224
- case 10:
225
- // 4. Return the hitList [list of string]
226
- if (searchWords.length === 1) {
227
- return [2 /*return*/, resultArr];
228
- }
229
- // Else we need to return our set converted to an array
230
- resultArr = Array.from(initialSet);
231
- if (resultArr.length > this.maxResults)
232
- return [2 /*return*/, resultArr.slice(0, this.maxResults)];
233
- return [2 /*return*/, resultArr];
153
+ });
154
+ };
155
+ this_1 = this;
156
+ _a.label = 2;
157
+ case 2:
158
+ if (!!done) return [3 /*break*/, 4];
159
+ return [5 /*yield**/, _loop_1()];
160
+ case 3:
161
+ state_1 = _a.sent();
162
+ if (state_1 === "break")
163
+ return [3 /*break*/, 4];
164
+ return [3 /*break*/, 2];
165
+ case 4:
166
+ // deduplicate results based on the detail column (resultArr[1])
167
+ return [2 /*return*/, uniqBy(resultArr, function (elt) { return elt[1]; }).slice(0, this.maxResults)];
234
168
  }
235
169
  });
236
170
  });
237
171
  };
238
- // Private Methods:
239
- /**
240
- * Seek ahead to the correct position in the .ix file,
241
- * then load that chunk of .ix into a buffer.
242
- *
243
- * @param searchWord [string]
244
- * @returns a Buffer holding the sections we want to search.
245
- */
246
- Trix.prototype._getBuffer = function (searchWord) {
172
+ Trix.prototype.getIndex = function (opts) {
247
173
  return __awaiter(this, void 0, void 0, function () {
248
- var seekPosStart, seekPosEnd, indexes, indexes_1, indexes_1_1, _a, key, value, trimmedKey;
249
- var e_2, _b;
250
- return __generator(this, function (_c) {
251
- switch (_c.label) {
252
- case 0:
253
- seekPosStart = 0;
254
- seekPosEnd = -1;
255
- return [4 /*yield*/, this.index];
174
+ var file;
175
+ return __generator(this, function (_a) {
176
+ switch (_a.label) {
177
+ case 0: return [4 /*yield*/, this.ixxFile.readFile(__assign({ encoding: 'utf8' }, opts))];
256
178
  case 1:
257
- indexes = _c.sent();
258
- try {
259
- for (indexes_1 = __values(indexes), indexes_1_1 = indexes_1.next(); !indexes_1_1.done; indexes_1_1 = indexes_1.next()) {
260
- _a = __read(indexes_1_1.value, 2), key = _a[0], value = _a[1];
261
- trimmedKey = key.slice(0, searchWord.length);
262
- if (seekPosEnd === -1) {
263
- if (trimmedKey >= searchWord) {
264
- // We reached the end pos in the file.
265
- seekPosEnd = value - 1;
266
- break;
267
- }
268
- else {
269
- seekPosStart = value;
270
- }
271
- }
272
- }
273
- }
274
- catch (e_2_1) { e_2 = { error: e_2_1 }; }
275
- finally {
276
- try {
277
- if (indexes_1_1 && !indexes_1_1.done && (_b = indexes_1.return)) _b.call(indexes_1);
278
- }
279
- finally { if (e_2) throw e_2.error; }
280
- }
281
- // Return the buffer and its end position in the file.
282
- return [2 /*return*/, this._createBuffer(seekPosStart, seekPosEnd)];
179
+ file = _a.sent();
180
+ return [2 /*return*/, file
181
+ .split('\n')
182
+ .filter(function (f) { return !!f; })
183
+ .map(function (line) {
184
+ var prefix = line.slice(0, TRIX_PREFIX_SIZE);
185
+ var posStr = line.slice(TRIX_PREFIX_SIZE);
186
+ var pos = Number.parseInt(posStr, 16);
187
+ return [prefix, pos];
188
+ })];
283
189
  }
284
190
  });
285
191
  });
286
192
  };
287
- /**
288
- * Given the end position of the last buffer,
289
- * load the next chunk of .ix data into a buffer and return it.
290
- *
291
- * @param seekPosStart [number] where to start loading data into the new buffer.
292
- * @returns a Buffer holding the chunk we want to search.
293
- */
294
- Trix.prototype._getNextChunk = function (seekPosStart) {
193
+ Trix.prototype._getBuffer = function (searchWord, opts) {
295
194
  return __awaiter(this, void 0, void 0, function () {
296
- var seekPosEnd, indexes, indexes_2, indexes_2_1, _a, key, value;
297
- var e_3, _b;
298
- return __generator(this, function (_c) {
299
- switch (_c.label) {
195
+ var seekPosStart, seekPosEnd, indexes, len, res;
196
+ return __generator(this, function (_a) {
197
+ switch (_a.label) {
300
198
  case 0:
301
- if (seekPosStart == -1)
302
- return [2 /*return*/, null];
199
+ seekPosStart = 0;
303
200
  seekPosEnd = -1;
304
- return [4 /*yield*/, this.index];
201
+ return [4 /*yield*/, this.getIndex(opts)];
305
202
  case 1:
306
- indexes = _c.sent();
307
- try {
308
- for (indexes_2 = __values(indexes), indexes_2_1 = indexes_2.next(); !indexes_2_1.done; indexes_2_1 = indexes_2.next()) {
309
- _a = __read(indexes_2_1.value, 2), key = _a[0], value = _a[1];
310
- if (value <= seekPosStart + 1)
311
- continue;
312
- seekPosEnd = value;
313
- break;
314
- }
315
- }
316
- catch (e_3_1) { e_3 = { error: e_3_1 }; }
317
- finally {
318
- try {
319
- if (indexes_2_1 && !indexes_2_1.done && (_b = indexes_2.return)) _b.call(indexes_2);
203
+ indexes = _a.sent();
204
+ indexes.forEach(function (_a) {
205
+ var _b = __read(_a, 2), key = _b[0], value = _b[1];
206
+ var trimmedKey = key.slice(0, searchWord.length);
207
+ if (trimmedKey < searchWord) {
208
+ seekPosStart = value;
209
+ seekPosEnd = value + 65536;
320
210
  }
321
- finally { if (e_3) throw e_3.error; }
211
+ });
212
+ len = seekPosEnd - seekPosStart;
213
+ if (len < 0) {
214
+ return [2 /*return*/, undefined];
322
215
  }
323
- seekPosStart--;
324
- // Return the buffer and its end position in the file.
325
- return [2 /*return*/, this._createBuffer(seekPosStart, seekPosEnd)];
326
- }
327
- });
328
- });
329
- };
330
- /**
331
- * Create and return a buffer given the start and end position
332
- * of what to load from the .ix file.
333
- *
334
- * @param seekPosStart [number] byte the buffer should start reading from file.
335
- * @param seekPosEnd [number] byte the buffer should stop reading from file.
336
- * @returns a Buffer holding the chunk of data.
337
- */
338
- Trix.prototype._createBuffer = function (seekPosStart, seekPosEnd) {
339
- return __awaiter(this, void 0, void 0, function () {
340
- var bufLength, stat, buf;
341
- return __generator(this, function (_a) {
342
- switch (_a.label) {
343
- case 0:
344
- if (!(seekPosEnd < 0)) return [3 /*break*/, 2];
345
- return [4 /*yield*/, this.ixFile.stat()];
346
- case 1:
347
- stat = _a.sent();
348
- bufLength = stat.size - seekPosStart;
349
- return [3 /*break*/, 3];
216
+ return [4 /*yield*/, this.ixFile.read(Buffer.alloc(len), 0, len, seekPosStart, opts)];
350
217
  case 2:
351
- bufLength = seekPosEnd - seekPosStart;
352
- _a.label = 3;
353
- case 3:
354
- buf = Buffer.alloc(bufLength);
355
- return [4 /*yield*/, this.ixFile.read(buf, 0, bufLength, seekPosStart)];
356
- case 4:
357
- _a.sent();
358
- // Return the buffer and its end position in the file.
359
- return [2 /*return*/, { buf: buf, bufEndPos: seekPosEnd }];
360
- }
361
- });
362
- });
363
- };
364
- /**
365
- * Takes in a hit string and returns an array of result terms.
366
- *
367
- * @param line [string] The line of .ix that is a hit.
368
- * @returns results [Array<hit>]. Each hit contains the itemId [string], and wordPos [number].
369
- */
370
- Trix.prototype._parseHitString = function (line) {
371
- var e_4, _a;
372
- var arr = [];
373
- var _b = __read(line.split(' ')), term = _b[0], parts = _b.slice(1); // skip term
374
- try {
375
- // Each result is of format: "{itemId},{wordPos}"
376
- // Parse the entire line of these and return
377
- for (var parts_1 = __values(parts), parts_1_1 = parts_1.next(); !parts_1_1.done; parts_1_1 = parts_1.next()) {
378
- var part = parts_1_1.value;
379
- var pair = part.split(',');
380
- if (pair.length === 2) {
381
- var itemId = pair[0];
382
- var wordPos = Number.parseInt(pair[1]);
383
- if (typeof wordPos !== 'number' || isNaN(wordPos))
384
- throw new Error("Error in ix index format at term " + itemId + " for word " + parts[0]);
385
- arr.push(term + "," + itemId);
386
- }
387
- else if (pair.length > 1) {
388
- throw new Error("Error in ix index format at word " + parts[0]);
389
- }
390
- }
391
- }
392
- catch (e_4_1) { e_4 = { error: e_4_1 }; }
393
- finally {
394
- try {
395
- if (parts_1_1 && !parts_1_1.done && (_a = parts_1.return)) _a.call(parts_1);
396
- }
397
- finally { if (e_4) throw e_4.error; }
398
- }
399
- return arr;
400
- };
401
- /**
402
- * Parses ixx file and constructs a map of {word: ixFileLocation}
403
- *
404
- * @param ixxFile [anyFile] second level index that is produced by ixIxx.
405
- * @returns a ParsedIxx map.
406
- */
407
- Trix.prototype._parseIxx = function (ixxFile) {
408
- return __awaiter(this, void 0, void 0, function () {
409
- var file, lines;
410
- return __generator(this, function (_a) {
411
- switch (_a.label) {
412
- case 0: return [4 /*yield*/, ixxFile.readFile('utf8')];
413
- case 1:
414
- file = (_a.sent());
415
- lines = file.split('\n');
416
- return [2 /*return*/, new Map(lines
417
- .filter(function (f) { return !!f; })
418
- .map(function (line) {
419
- var prefix = line.substr(0, trixPrefixSize);
420
- var posStr = line.substr(trixPrefixSize);
421
- var pos = Number.parseInt(posStr, 16);
422
- return [prefix, pos];
423
- }))];
218
+ res = _a.sent();
219
+ return [2 /*return*/, __assign(__assign({}, res), { seekPosEnd: seekPosEnd })];
424
220
  }
425
221
  });
426
222
  });
@@ -428,3 +224,4 @@ var Trix = /** @class */ (function () {
428
224
  return Trix;
429
225
  }());
430
226
  exports.default = Trix;
227
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,IAAM,gBAAgB,GAAG,CAAC,CAAA;AAE1B,IAAM,UAAU,GAAG,KAAK,CAAA;AAExB,8CAA8C;AAC9C,SAAS,MAAM,CAAC,CAAqB,EAAE,GAAsC;IAC3E,IAAM,IAAI,GAAG,IAAI,GAAG,EAAE,CAAA;IACtB,OAAO,CAAC,CAAC,MAAM,CAAC,UAAA,IAAI;QAClB,IAAM,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAA;QACnB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;IAC1C,CAAC,CAAC,CAAA;AACJ,CAAC;AAED;IAOE,cACE,OAA0B,EAC1B,MAAyB,EACzB,UAAe;QAAf,2BAAA,EAAA,eAAe;QAEf,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;QACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAA;IAC9B,CAAC;IAEK,qBAAM,GAAZ,UAAa,YAAoB,EAAE,IAA+B;;;;;;wBAC5D,SAAS,GAAG,EAAwB,CAAA;wBAClC,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;wBAGrC,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;wBACnC,qBAAM,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,IAAI,CAAC,EAAA;;wBAA7C,GAAG,GAAG,SAAuC;wBACnD,IAAI,CAAC,GAAG,EAAE;4BACR,sBAAO,EAAE,EAAA;yBACV;wBAEK,UAAU,GAAa,GAAG,WAAhB,EAAE,MAAM,GAAK,GAAG,OAAR,CAAQ;wBAC5B,IAAI,GAAG,KAAK,CAAA;;;;;;wCAEV,cAAc,GAAG,KAAK,CAAA;wCACpB,GAAG,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;wCAIvB,KAAK,GAAG,GAAG;6CACd,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;6CAC/B,KAAK,CAAC,IAAI,CAAC;6CACX,MAAM,CAAC,UAAA,CAAC,IAAI,OAAA,CAAC,CAAC,CAAC,EAAH,CAAG,CAAC,CAAA;wCAEb,IAAI,GAAG,KAAK;4CAChB,2DAA2D;6CAC1D,MAAM,CAAC,UAAA,IAAI;4CACV,IAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;4CAC/B,IAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,CAAA;4CACzC,IAAI,CAAC,cAAc,IAAI,KAAK,EAAE;gDAC5B,cAAc,GAAG,IAAI,CAAA;6CACtB;4CAED,oEAAoE;4CACpE,gBAAgB;4CAChB,IAAI,IAAI,GAAG,UAAU,EAAE;gDACrB,IAAI,GAAG,IAAI,CAAA;6CACZ;4CACD,OAAO,KAAK,CAAA;wCACd,CAAC,CAAC;6CACD,GAAG,CAAC,UAAA,IAAI;4CACD,IAAA,KAAA,OAAmB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA,EAAjC,IAAI,QAAA,EAAK,KAAK,cAAmB,CAAA;4CACxC,OAAO,KAAK,CAAC,GAAG,CAAC,UAAA,GAAG,IAAI,OAAA,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAzB,CAAyB,CAAC,CAAA;wCACpD,CAAC,CAAC;6CACD,IAAI,EAAwB,CAAA;6CAI3B,CAAA,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,OAAK,UAAU,IAAI,CAAC,IAAI,CAAA,EAAzD,wBAAyD;wCAE9C,qBAAM,OAAK,MAAM,CAAC,IAAI,CACjC,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,EACxB,CAAC,EACD,UAAU,EACV,UAAU,EACV,IAAI,CACL;4CAED,gCAAgC;0CAF/B;;wCANK,IAAI,GAAG,SAMZ;wCAED,gCAAgC;wCAChC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE;4CACnB,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;;yCAEnC;wCACD,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAA;wCAC7C,UAAU,IAAI,UAAU,CAAA;;;wCAKrB,IAAI,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,OAAK,UAAU,IAAI,IAAI,EAAE;4CAClE,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;;yCAEnC;;;;;;;;;6BA3DI,CAAC,IAAI;;;;;;;;oBA8DZ,gEAAgE;oBAChE,sBAAO,MAAM,CAAC,SAAS,EAAE,UAAA,GAAG,IAAI,OAAA,GAAG,CAAC,CAAC,CAAC,EAAN,CAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,EAAA;;;;KAClE;IAEa,uBAAQ,GAAtB,UAAuB,IAA+B;;;;;4BACvC,qBAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,YACtC,QAAQ,EAAE,MAAM,IACb,IAAI,EACP,EAAA;;wBAHI,IAAI,GAAG,SAGX;wBACF,sBAAO,IAAI;iCACR,KAAK,CAAC,IAAI,CAAC;iCACX,MAAM,CAAC,UAAA,CAAC,IAAI,OAAA,CAAC,CAAC,CAAC,EAAH,CAAG,CAAC;iCAChB,GAAG,CAAC,UAAA,IAAI;gCACP,IAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC,CAAA;gCAC9C,IAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAA;gCAC3C,IAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;gCACvC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAqB,CAAA;4BAC1C,CAAC,CAAC,EAAA;;;;KACL;IAEa,yBAAU,GAAxB,UACE,UAAkB,EAClB,IAA+B;;;;;;wBAE3B,YAAY,GAAG,CAAC,CAAA;wBAChB,UAAU,GAAG,CAAC,CAAC,CAAA;wBACH,qBAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAA;;wBAAnC,OAAO,GAAG,SAAyB;wBACzC,OAAO,CAAC,OAAO,CAAC,UAAC,EAAY;gCAAZ,KAAA,aAAY,EAAX,GAAG,QAAA,EAAE,KAAK,QAAA;4BAC1B,IAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAA;4BAClD,IAAI,UAAU,GAAG,UAAU,EAAE;gCAC3B,YAAY,GAAG,KAAK,CAAA;gCACpB,UAAU,GAAG,KAAK,GAAG,KAAK,CAAA;6BAC3B;wBACH,CAAC,CAAC,CAAA;wBAGI,GAAG,GAAG,UAAU,GAAG,YAAY,CAAA;wBACrC,IAAI,GAAG,GAAG,CAAC,EAAE;4BACX,sBAAO,SAAS,EAAA;yBACjB;wBACW,qBAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,EACjB,CAAC,EACD,GAAG,EACH,YAAY,EACZ,IAAI,CACL,EAAA;;wBANK,GAAG,GAAG,SAMX;wBACD,4CACK,GAAG,KACN,UAAU,YAAA,KACX;;;;KACF;IACH,WAAC;AAAD,CAAC,AAhJD,IAgJC"}
package/esm/index.d.ts ADDED
@@ -0,0 +1,12 @@
1
+ import type { GenericFilehandle } from 'generic-filehandle';
2
+ export default class Trix {
3
+ private ixFile;
4
+ private ixxFile;
5
+ maxResults: number;
6
+ constructor(ixxFile: GenericFilehandle, ixFile: GenericFilehandle, maxResults?: number);
7
+ search(searchString: string, opts?: {
8
+ signal?: AbortSignal;
9
+ }): Promise<[string, string][]>;
10
+ private getIndex;
11
+ private _getBuffer;
12
+ }
package/esm/index.js ADDED
@@ -0,0 +1,121 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const TRIX_PREFIX_SIZE = 5;
4
+ const CHUNK_SIZE = 65536;
5
+ // https://stackoverflow.com/a/9229821/2129219
6
+ function uniqBy(a, key) {
7
+ const seen = new Set();
8
+ return a.filter(item => {
9
+ const k = key(item);
10
+ return seen.has(k) ? false : seen.add(k);
11
+ });
12
+ }
13
+ class Trix {
14
+ constructor(ixxFile, ixFile, maxResults = 20) {
15
+ this.ixFile = ixFile;
16
+ this.ixxFile = ixxFile;
17
+ this.maxResults = maxResults;
18
+ }
19
+ async search(searchString, opts) {
20
+ let resultArr = [];
21
+ const searchWords = searchString.split(' ');
22
+ // we only search one word at a time
23
+ const searchWord = searchWords[0].toLowerCase();
24
+ const res = await this._getBuffer(searchWord, opts);
25
+ if (!res) {
26
+ return [];
27
+ }
28
+ let { seekPosEnd, buffer } = res;
29
+ let done = false;
30
+ while (!done) {
31
+ let foundSomething = false;
32
+ const str = buffer.toString();
33
+ // slice to lastIndexOf('\n') to make sure we get complete records
34
+ // since the buffer fetch could get halfway into a record
35
+ const lines = str
36
+ .slice(0, str.lastIndexOf('\n'))
37
+ .split('\n')
38
+ .filter(f => !!f);
39
+ const hits = lines
40
+ // eslint-disable-next-line @typescript-eslint/no-loop-func
41
+ .filter(line => {
42
+ const word = line.split(' ')[0];
43
+ const match = word.startsWith(searchWord);
44
+ if (!foundSomething && match) {
45
+ foundSomething = true;
46
+ }
47
+ // we are done scanning if we are lexicographically greater than the
48
+ // search string
49
+ if (word > searchWord) {
50
+ done = true;
51
+ }
52
+ return match;
53
+ })
54
+ .map(line => {
55
+ const [term, ...parts] = line.split(' ');
56
+ return parts.map(elt => [term, elt.split(',')[0]]);
57
+ })
58
+ .flat();
59
+ // if we are not done, and we haven't filled up maxResults with hits yet,
60
+ // then refetch
61
+ if (resultArr.length + hits.length < this.maxResults && !done) {
62
+ // eslint-disable-next-line no-await-in-loop
63
+ const res2 = await this.ixFile.read(Buffer.alloc(CHUNK_SIZE), 0, CHUNK_SIZE, seekPosEnd, opts);
64
+ // early break if empty response
65
+ if (!res2.bytesRead) {
66
+ resultArr = resultArr.concat(hits);
67
+ break;
68
+ }
69
+ buffer = Buffer.concat([buffer, res2.buffer]);
70
+ seekPosEnd += CHUNK_SIZE;
71
+ }
72
+ // if we have filled up the hits, or we are detected to be done via the
73
+ // filtering, then return
74
+ else if (resultArr.length + hits.length >= this.maxResults || done) {
75
+ resultArr = resultArr.concat(hits);
76
+ break;
77
+ }
78
+ }
79
+ // deduplicate results based on the detail column (resultArr[1])
80
+ return uniqBy(resultArr, elt => elt[1]).slice(0, this.maxResults);
81
+ }
82
+ async getIndex(opts) {
83
+ const file = await this.ixxFile.readFile({
84
+ encoding: 'utf8',
85
+ ...opts,
86
+ });
87
+ return file
88
+ .split('\n')
89
+ .filter(f => !!f)
90
+ .map(line => {
91
+ const prefix = line.slice(0, TRIX_PREFIX_SIZE);
92
+ const posStr = line.slice(TRIX_PREFIX_SIZE);
93
+ const pos = Number.parseInt(posStr, 16);
94
+ return [prefix, pos];
95
+ });
96
+ }
97
+ async _getBuffer(searchWord, opts) {
98
+ let seekPosStart = 0;
99
+ let seekPosEnd = -1;
100
+ const indexes = await this.getIndex(opts);
101
+ indexes.forEach(([key, value]) => {
102
+ const trimmedKey = key.slice(0, searchWord.length);
103
+ if (trimmedKey < searchWord) {
104
+ seekPosStart = value;
105
+ seekPosEnd = value + 65536;
106
+ }
107
+ });
108
+ // Return the buffer and its end position in the file.
109
+ const len = seekPosEnd - seekPosStart;
110
+ if (len < 0) {
111
+ return undefined;
112
+ }
113
+ const res = await this.ixFile.read(Buffer.alloc(len), 0, len, seekPosStart, opts);
114
+ return {
115
+ ...res,
116
+ seekPosEnd,
117
+ };
118
+ }
119
+ }
120
+ exports.default = Trix;
121
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;AAEA,MAAM,gBAAgB,GAAG,CAAC,CAAA;AAE1B,MAAM,UAAU,GAAG,KAAK,CAAA;AAExB,8CAA8C;AAC9C,SAAS,MAAM,CAAC,CAAqB,EAAE,GAAsC;IAC3E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAE,CAAA;IACtB,OAAO,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;QACrB,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAA;QACnB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;IAC1C,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,MAAqB,IAAI;IAOvB,YACE,OAA0B,EAC1B,MAAyB,EACzB,UAAU,GAAG,EAAE;QAEf,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;QACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAA;IAC9B,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,YAAoB,EAAE,IAA+B;QAChE,IAAI,SAAS,GAAG,EAAwB,CAAA;QACxC,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAE3C,oCAAoC;QACpC,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;QAC/C,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,IAAI,CAAC,CAAA;QACnD,IAAI,CAAC,GAAG,EAAE;YACR,OAAO,EAAE,CAAA;SACV;QAED,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,GAAG,CAAA;QAChC,IAAI,IAAI,GAAG,KAAK,CAAA;QAChB,OAAO,CAAC,IAAI,EAAE;YACZ,IAAI,cAAc,GAAG,KAAK,CAAA;YAC1B,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;YAE7B,kEAAkE;YAClE,yDAAyD;YACzD,MAAM,KAAK,GAAG,GAAG;iBACd,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;iBAC/B,KAAK,CAAC,IAAI,CAAC;iBACX,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YAEnB,MAAM,IAAI,GAAG,KAAK;gBAChB,2DAA2D;iBAC1D,MAAM,CAAC,IAAI,CAAC,EAAE;gBACb,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;gBAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,CAAA;gBACzC,IAAI,CAAC,cAAc,IAAI,KAAK,EAAE;oBAC5B,cAAc,GAAG,IAAI,CAAA;iBACtB;gBAED,oEAAoE;gBACpE,gBAAgB;gBAChB,IAAI,IAAI,GAAG,UAAU,EAAE;oBACrB,IAAI,GAAG,IAAI,CAAA;iBACZ;gBACD,OAAO,KAAK,CAAA;YACd,CAAC,CAAC;iBACD,GAAG,CAAC,IAAI,CAAC,EAAE;gBACV,MAAM,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;gBACxC,OAAO,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YACpD,CAAC,CAAC;iBACD,IAAI,EAAwB,CAAA;YAE/B,yEAAyE;YACzE,eAAe;YACf,IAAI,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,EAAE;gBAC7D,4CAA4C;gBAC5C,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CACjC,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,EACxB,CAAC,EACD,UAAU,EACV,UAAU,EACV,IAAI,CACL,CAAA;gBAED,gCAAgC;gBAChC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE;oBACnB,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;oBAClC,MAAK;iBACN;gBACD,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAA;gBAC7C,UAAU,IAAI,UAAU,CAAA;aACzB;YAED,uEAAuE;YACvE,yBAAyB;iBACpB,IAAI,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,EAAE;gBAClE,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;gBAClC,MAAK;aACN;SACF;QAED,gEAAgE;QAChE,OAAO,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAA;IACnE,CAAC;IAEO,KAAK,CAAC,QAAQ,CAAC,IAA+B;QACpD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC;YACvC,QAAQ,EAAE,MAAM;YAChB,GAAG,IAAI;SACR,CAAC,CAAA;QACF,OAAO,IAAI;aACR,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;aAChB,GAAG,CAAC,IAAI,CAAC,EAAE;YACV,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC,CAAA;YAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAA;YAC3C,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;YACvC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAqB,CAAA;QAC1C,CAAC,CAAC,CAAA;IACN,CAAC;IAEO,KAAK,CAAC,UAAU,CACtB,UAAkB,EAClB,IAA+B;QAE/B,IAAI,YAAY,GAAG,CAAC,CAAA;QACpB,IAAI,UAAU,GAAG,CAAC,CAAC,CAAA;QACnB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;QACzC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;YAC/B,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAA;YAClD,IAAI,UAAU,GAAG,UAAU,EAAE;gBAC3B,YAAY,GAAG,KAAK,CAAA;gBACpB,UAAU,GAAG,KAAK,GAAG,KAAK,CAAA;aAC3B;QACH,CAAC,CAAC,CAAA;QAEF,sDAAsD;QACtD,MAAM,GAAG,GAAG,UAAU,GAAG,YAAY,CAAA;QACrC,IAAI,GAAG,GAAG,CAAC,EAAE;YACX,OAAO,SAAS,CAAA;SACjB;QACD,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,EACjB,CAAC,EACD,GAAG,EACH,YAAY,EACZ,IAAI,CACL,CAAA;QACD,OAAO;YACL,GAAG,GAAG;YACN,UAAU;SACX,CAAA;IACH,CAAC;CACF;AAhJD,uBAgJC"}
package/package.json CHANGED
@@ -1,33 +1,43 @@
1
1
  {
2
- "version": "1.0.0",
2
+ "version": "2.0.2",
3
3
  "license": "Apache-2.0",
4
4
  "main": "dist/index.js",
5
+ "module": "esm/index.js",
5
6
  "files": [
6
- "dist"
7
+ "dist",
8
+ "esm",
9
+ "src"
7
10
  ],
8
11
  "engines": {
9
12
  "node": ">=10"
10
13
  },
11
14
  "scripts": {
12
- "prebuild": "rimraf dist",
13
- "build": "tsc",
15
+ "lint": "eslint --report-unused-disable-directives --max-warnings 0 --ext .js,.ts src ",
16
+ "prebuild": "rimraf dist esm",
17
+ "build:esm": "tsc --target es2018 --outDir esm",
18
+ "build:es5": "tsc --target es5 --outDir dist",
19
+ "build": "npm run build:esm && npm run build:es5",
20
+ "preversion": "npm run lint && npm test && npm run build",
14
21
  "postversion": "git push --follow-tags",
15
22
  "test": "jest"
16
23
  },
17
- "prettier": {
18
- "printWidth": 80,
19
- "semi": true,
20
- "singleQuote": true,
21
- "trailingComma": "es5"
22
- },
23
24
  "name": "@gmod/trix",
24
25
  "author": "Matt Morgan",
25
26
  "repository": "GMOD/trix-js",
26
27
  "devDependencies": {
27
- "@types/jest": "^26.0.24",
28
- "@types/node": "^14.14.37",
28
+ "@types/jest": "^27.0.3",
29
+ "@types/node": "^16.11.13",
30
+ "@typescript-eslint/eslint-plugin": "^5.7.0",
31
+ "@typescript-eslint/parser": "^5.7.0",
32
+ "eslint": "^7.0.0",
33
+ "eslint-config-airbnb-base": "^15.0.0",
34
+ "eslint-config-airbnb-typescript": "^16.1.0",
35
+ "eslint-config-prettier": "^8.3.0",
36
+ "eslint-plugin-import": "^2.25.3",
37
+ "eslint-plugin-prettier": "^4.0.0",
29
38
  "generic-filehandle": "^2.1.0",
30
39
  "jest": "^27.0.6",
40
+ "prettier": "^2.5.1",
31
41
  "rimraf": "^3.0.2",
32
42
  "ts-jest": "^27.0.4",
33
43
  "typescript": "^4.3.5"
package/src/index.ts ADDED
@@ -0,0 +1,160 @@
1
+ import type { GenericFilehandle } from 'generic-filehandle'
2
+
3
+ const TRIX_PREFIX_SIZE = 5
4
+
5
+ const CHUNK_SIZE = 65536
6
+
7
+ // https://stackoverflow.com/a/9229821/2129219
8
+ function uniqBy(a: [string, string][], key: (elt: [string, string]) => string) {
9
+ const seen = new Set()
10
+ return a.filter(item => {
11
+ const k = key(item)
12
+ return seen.has(k) ? false : seen.add(k)
13
+ })
14
+ }
15
+
16
+ export default class Trix {
17
+ private ixFile: GenericFilehandle
18
+
19
+ private ixxFile: GenericFilehandle
20
+
21
+ maxResults: number
22
+
23
+ constructor(
24
+ ixxFile: GenericFilehandle,
25
+ ixFile: GenericFilehandle,
26
+ maxResults = 20,
27
+ ) {
28
+ this.ixFile = ixFile
29
+ this.ixxFile = ixxFile
30
+ this.maxResults = maxResults
31
+ }
32
+
33
+ async search(searchString: string, opts?: { signal?: AbortSignal }) {
34
+ let resultArr = [] as [string, string][]
35
+ const searchWords = searchString.split(' ')
36
+
37
+ // we only search one word at a time
38
+ const searchWord = searchWords[0].toLowerCase()
39
+ const res = await this._getBuffer(searchWord, opts)
40
+ if (!res) {
41
+ return []
42
+ }
43
+
44
+ let { seekPosEnd, buffer } = res
45
+ let done = false
46
+ while (!done) {
47
+ let foundSomething = false
48
+ const str = buffer.toString()
49
+
50
+ // slice to lastIndexOf('\n') to make sure we get complete records
51
+ // since the buffer fetch could get halfway into a record
52
+ const lines = str
53
+ .slice(0, str.lastIndexOf('\n'))
54
+ .split('\n')
55
+ .filter(f => !!f)
56
+
57
+ const hits = lines
58
+ // eslint-disable-next-line @typescript-eslint/no-loop-func
59
+ .filter(line => {
60
+ const word = line.split(' ')[0]
61
+ const match = word.startsWith(searchWord)
62
+ if (!foundSomething && match) {
63
+ foundSomething = true
64
+ }
65
+
66
+ // we are done scanning if we are lexicographically greater than the
67
+ // search string
68
+ if (word > searchWord) {
69
+ done = true
70
+ }
71
+ return match
72
+ })
73
+ .map(line => {
74
+ const [term, ...parts] = line.split(' ')
75
+ return parts.map(elt => [term, elt.split(',')[0]])
76
+ })
77
+ .flat() as [string, string][]
78
+
79
+ // if we are not done, and we haven't filled up maxResults with hits yet,
80
+ // then refetch
81
+ if (resultArr.length + hits.length < this.maxResults && !done) {
82
+ // eslint-disable-next-line no-await-in-loop
83
+ const res2 = await this.ixFile.read(
84
+ Buffer.alloc(CHUNK_SIZE),
85
+ 0,
86
+ CHUNK_SIZE,
87
+ seekPosEnd,
88
+ opts,
89
+ )
90
+
91
+ // early break if empty response
92
+ if (!res2.bytesRead) {
93
+ resultArr = resultArr.concat(hits)
94
+ break
95
+ }
96
+ buffer = Buffer.concat([buffer, res2.buffer])
97
+ seekPosEnd += CHUNK_SIZE
98
+ }
99
+
100
+ // if we have filled up the hits, or we are detected to be done via the
101
+ // filtering, then return
102
+ else if (resultArr.length + hits.length >= this.maxResults || done) {
103
+ resultArr = resultArr.concat(hits)
104
+ break
105
+ }
106
+ }
107
+
108
+ // deduplicate results based on the detail column (resultArr[1])
109
+ return uniqBy(resultArr, elt => elt[1]).slice(0, this.maxResults)
110
+ }
111
+
112
+ private async getIndex(opts?: { signal?: AbortSignal }) {
113
+ const file = await this.ixxFile.readFile({
114
+ encoding: 'utf8',
115
+ ...opts,
116
+ })
117
+ return file
118
+ .split('\n')
119
+ .filter(f => !!f)
120
+ .map(line => {
121
+ const prefix = line.slice(0, TRIX_PREFIX_SIZE)
122
+ const posStr = line.slice(TRIX_PREFIX_SIZE)
123
+ const pos = Number.parseInt(posStr, 16)
124
+ return [prefix, pos] as [string, number]
125
+ })
126
+ }
127
+
128
+ private async _getBuffer(
129
+ searchWord: string,
130
+ opts?: { signal?: AbortSignal },
131
+ ) {
132
+ let seekPosStart = 0
133
+ let seekPosEnd = -1
134
+ const indexes = await this.getIndex(opts)
135
+ indexes.forEach(([key, value]) => {
136
+ const trimmedKey = key.slice(0, searchWord.length)
137
+ if (trimmedKey < searchWord) {
138
+ seekPosStart = value
139
+ seekPosEnd = value + 65536
140
+ }
141
+ })
142
+
143
+ // Return the buffer and its end position in the file.
144
+ const len = seekPosEnd - seekPosStart
145
+ if (len < 0) {
146
+ return undefined
147
+ }
148
+ const res = await this.ixFile.read(
149
+ Buffer.alloc(len),
150
+ 0,
151
+ len,
152
+ seekPosStart,
153
+ opts,
154
+ )
155
+ return {
156
+ ...res,
157
+ seekPosEnd,
158
+ }
159
+ }
160
+ }