@gmod/trix 0.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,19 @@
1
+ - Fix issue with infinite loop
2
+ - Add abortsignal support
3
+ - Only query first word when string with multiple words is entered
4
+
5
+ # v1.0.0
6
+
7
+ - Change result format from just the "result" string returned to be "term,result"
8
+
9
+ # v0.2.1
10
+
11
+ - Fix error when identifiers contain commas
12
+
13
+ # v0.2.0
14
+
15
+ - Improve performance of fetches with sequential chunk parsing
16
+
17
+ # v0.1.1
18
+
19
+ - Initial release
package/README.md CHANGED
@@ -1,4 +1,7 @@
1
+ [![Build Status](https://img.shields.io/github/workflow/status/GMOD/trix-js/Push/main?logo=github&style=for-the-badge)](https://github.com/GMOD/trix-js/actions?query=branch%3Amain+workflow%3APush+)
2
+
1
3
  # trix-js
4
+
2
5
  Read UCSC Trix indexes in pure JavaScript
3
6
 
4
7
  ## Usage
@@ -9,37 +12,43 @@ import { RemoteFile } from 'generic-filehandle'
9
12
 
10
13
  // any filehandle object that supports the Nodejs FileHandle API will work.
11
14
  // We use generic-filehandle here to demonstrate searching files on remote servers.
12
- const ixxFile = new RemoteFile('https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ixx');
13
- const ixFile = new RemoteFile('https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ix');
15
+ const ixxFile = new RemoteFile(
16
+ 'https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ixx',
17
+ )
18
+ const ixFile = new RemoteFile(
19
+ 'https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ix',
20
+ )
14
21
 
15
- const trix = new Trix(ixxFile, ixFile);
22
+ const trix = new Trix(ixxFile, ixFile)
16
23
 
17
24
  async function doStuff() {
18
- const results = await trix.search('oca');
19
- console.log(results);
25
+ const results = await trix.search('oca')
26
+ console.log(results)
20
27
  }
21
- doStuff();
22
-
28
+ doStuff()
23
29
  ```
24
30
 
25
31
  ## Documentation
32
+
26
33
  ### Trix constructor
34
+
27
35
  The Trix class constructor accepts arguments:
36
+
28
37
  - `ixxFile` - a filehandle object for the trix .ixx file
29
38
  - `ixFile` - a filehandle object for the trix .ix file
30
39
  - `maxResults = 20` - an optional number specifying the maximum number of results to return on `trix.search()`
31
40
 
32
-
33
41
  ### Trix search
42
+
34
43
  **Search the index files for a term and find its keys.**<br>
35
44
  **In the case of searching with multiple words, `trix.search()` finds the intersection of the result sets.**<br>
36
45
  The Trix search function accepts argument:
46
+
37
47
  - `searchString` - a string of space-separated words for what to search the index file and find keys for<br>
38
-
48
+
39
49
  The Trix search function returns: <br>
40
- - `Promise<string[]>` - a promised array of strings where each string is an itemId result
41
-
42
50
 
51
+ - `Promise<[term,result][] as [string,string][]>` - an array of [term, result] pairs where each term is the left column in the trix and the right column is the trix match
43
52
 
44
53
  ## Examples
45
54
 
@@ -47,49 +56,37 @@ The Trix search function returns: <br>
47
56
  import { LocalFile } from 'generic-filehandle'
48
57
  import Trix from '@gmod/trix'
49
58
 
50
- const ixxFile = new LocalFile('out.ixx');
51
- const ixFile = new LocalFile('out.ix');
59
+ const ixxFile = new LocalFile('out.ixx')
60
+ const ixFile = new LocalFile('out.ix')
52
61
 
53
62
  // limit maxResults to 5
54
- const trix = new Trix(ixxFile, ixFile, 5);
63
+ const trix = new Trix(ixxFile, ixFile, 5)
55
64
 
56
65
  async function doStuff() {
57
- const results1 = await trix.search('herc');
58
- console.log(results1);
66
+ const results1 = await trix.search('herc')
67
+ console.log(results1)
59
68
 
60
69
  // increase maxResults to 30
61
- trix.maxResults = 30;
70
+ trix.maxResults = 30
62
71
 
63
- const results2 = await trix.search('linc');
64
- console.log(results2);
72
+ const results2 = await trix.search('linc')
73
+ console.log(results2)
65
74
  }
66
75
 
67
- doStuff();
76
+ doStuff()
68
77
  ```
69
- <br><br>
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
-
80
-
81
-
82
-
83
78
 
79
+ <br><br>
84
80
 
85
81
  ## Development
86
82
 
87
-
88
83
  ### Test trix-js
84
+
89
85
  First, clone this repo and install npm packages. <br>
90
86
  Then, run `npm test`. <br>
91
87
 
92
88
  ### Test the USCS TrixSearch - Requires Linux
89
+
93
90
  First, clone this repo.
94
91
  To run test searches on a track hub using the USCS `TrixSearch`, navigate to `tests/testdata/test#` and run `bash test#script.sh` where # is the test number.
95
92
  To change search terms, edit `searchterms.txt`.
@@ -97,7 +94,9 @@ To change search terms, edit `searchterms.txt`.
97
94
  **Wondering what to search for?**<br>
98
95
  Open up `tests/testdata/test#/input.txt`.
99
96
 
100
-
101
97
  **How to test my own .gff.gz data?**<br>
102
98
  Navigate to `/test/rawGenomes` and create a directory with your .gff.gz file in it. From within that directory, run `bash ../../programs/gff3ToInput.sh <.gff3.gz FILE> <OUTPUT NAME>`.
103
99
 
100
+ ## Reference
101
+
102
+ See https://genome.ucsc.edu/goldenPath/help/trix.html for basic concepts of trix and https://github.com/GMOD/ixixx-js for a javascript implementation of the ixIxx command
package/dist/index.d.ts CHANGED
@@ -1,63 +1,13 @@
1
- /// <reference types="node" />
2
- import type { FileHandle } from 'fs/promises';
3
- import type { LocalFile, RemoteFile, BlobFile } from 'generic-filehandle';
4
- declare type AnyFile = LocalFile | RemoteFile | BlobFile | FileHandle;
5
- export default class Trix {
6
- private index;
7
- private ixFile;
8
- maxResults: number;
9
- /**
10
- * @param ixxFile [anyFile] the second-level trix index file produced by ixIxx.
11
- * @param ixFile [anyFile] the first-level trix index file produced by ixIxx.
12
- * @param maxResults [number] the maximum number of results to return. Default is set to 20.
13
- */
14
- constructor(ixxFile: AnyFile, ixFile: AnyFile, maxResults?: number);
15
- /**
16
- * Search trix for the given searchWord(s). Return up to {this.maxResults} results.
17
- * This method matches each index prefix against each searchWord. It does not do fuzzy matching.
18
- *
19
- * @param searchString [string] term(s) separated by spaces to search for id(s).
20
- * @returns results [Array<string>] where each string is a corresponding itemId.
21
- */
22
- search(searchString: string): Promise<string[]>;
23
- /**
24
- * Seek ahead to the correct position in the .ix file,
25
- * then load that chunk of .ix into a buffer.
26
- *
27
- * @param searchWord [string]
28
- * @returns a Buffer holding the sections we want to search.
29
- */
30
- private _getBuffer;
31
- /**
32
- * Given the end position of the last buffer,
33
- * load the next chunk of .ix data into a buffer and return it.
34
- *
35
- * @param seekPosStart [number] where to start loading data into the new buffer.
36
- * @returns a Buffer holding the chunk we want to search.
37
- */
38
- private _getNextChunk;
39
- /**
40
- * Create and return a buffer given the start and end position
41
- * of what to load from the .ix file.
42
- *
43
- * @param seekPosStart [number] byte the buffer should start reading from file.
44
- * @param seekPosEnd [number] byte the buffer should stop reading from file.
45
- * @returns a Buffer holding the chunk of data.
46
- */
47
- private _createBuffer;
48
- /**
49
- * Takes in a hit string and returns an array of result terms.
50
- *
51
- * @param line [string] The line of .ix that is a hit.
52
- * @returns results [Array<hit>]. Each hit contains the itemId [string], and wordPos [number].
53
- */
54
- private _parseHitString;
55
- /**
56
- * Parses ixx file and constructs a map of {word: ixFileLocation}
57
- *
58
- * @param ixxFile [anyFile] second level index that is produced by ixIxx.
59
- * @returns a ParsedIxx map.
60
- */
61
- private _parseIxx;
62
- }
63
- export {};
1
+ import type { GenericFilehandle } from 'generic-filehandle';
2
+ export default class Trix {
3
+ private ixFile;
4
+ private ixxFile;
5
+ maxResults: number;
6
+ constructor(ixxFile: GenericFilehandle, ixFile: GenericFilehandle, maxResults?: number);
7
+ search(searchString: string, opts?: {
8
+ signal?: AbortSignal;
9
+ }): Promise<string[][]>;
10
+ private getIndex;
11
+ private _getBuffer;
12
+ private _parseIxx;
13
+ }
package/dist/index.js CHANGED
@@ -1,8 +1,280 @@
1
-
2
- 'use strict'
3
-
4
- if (process.env.NODE_ENV === 'production') {
5
- module.exports = require('./trix.cjs.production.min.js')
6
- } else {
7
- module.exports = require('./trix.cjs.development.js')
8
- }
1
+ "use strict";
2
+ var __assign = (this && this.__assign) || function () {
3
+ __assign = Object.assign || function(t) {
4
+ for (var s, i = 1, n = arguments.length; i < n; i++) {
5
+ s = arguments[i];
6
+ for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
7
+ t[p] = s[p];
8
+ }
9
+ return t;
10
+ };
11
+ return __assign.apply(this, arguments);
12
+ };
13
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
14
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
15
+ return new (P || (P = Promise))(function (resolve, reject) {
16
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
17
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
18
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
19
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
20
+ });
21
+ };
22
+ var __generator = (this && this.__generator) || function (thisArg, body) {
23
+ var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
24
+ return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
25
+ function verb(n) { return function (v) { return step([n, v]); }; }
26
+ function step(op) {
27
+ if (f) throw new TypeError("Generator is already executing.");
28
+ while (_) try {
29
+ if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
30
+ if (y = 0, t) op = [op[0] & 2, t.value];
31
+ switch (op[0]) {
32
+ case 0: case 1: t = op; break;
33
+ case 4: _.label++; return { value: op[1], done: false };
34
+ case 5: _.label++; y = op[1]; op = [0]; continue;
35
+ case 7: op = _.ops.pop(); _.trys.pop(); continue;
36
+ default:
37
+ if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
38
+ if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
39
+ if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
40
+ if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
41
+ if (t[2]) _.ops.pop();
42
+ _.trys.pop(); continue;
43
+ }
44
+ op = body.call(thisArg, _);
45
+ } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
46
+ if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
47
+ }
48
+ };
49
+ var __read = (this && this.__read) || function (o, n) {
50
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
51
+ if (!m) return o;
52
+ var i = m.call(o), r, ar = [], e;
53
+ try {
54
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
55
+ }
56
+ catch (error) { e = { error: error }; }
57
+ finally {
58
+ try {
59
+ if (r && !r.done && (m = i["return"])) m.call(i);
60
+ }
61
+ finally { if (e) throw e.error; }
62
+ }
63
+ return ar;
64
+ };
65
+ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
66
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
67
+ if (ar || !(i in from)) {
68
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
69
+ ar[i] = from[i];
70
+ }
71
+ }
72
+ return to.concat(ar || Array.prototype.slice.call(from));
73
+ };
74
+ var __values = (this && this.__values) || function(o) {
75
+ var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
76
+ if (m) return m.call(o);
77
+ if (o && typeof o.length === "number") return {
78
+ next: function () {
79
+ if (o && i >= o.length) o = void 0;
80
+ return { value: o && o[i++], done: !o };
81
+ }
82
+ };
83
+ throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
84
+ };
85
+ Object.defineProperty(exports, "__esModule", { value: true });
86
+ var trixPrefixSize = 5;
87
+ var CHUNKSIZE = 65536;
88
+ // Define this object with .ixx and .ix files.
89
+ // Then use the trixSearch() function to search for a word.
90
+ var Trix = /** @class */ (function () {
91
+ function Trix(ixxFile, ixFile, maxResults) {
92
+ if (maxResults === void 0) { maxResults = 20; }
93
+ this.ixFile = ixFile;
94
+ this.ixxFile = ixxFile;
95
+ this.maxResults = maxResults;
96
+ }
97
+ Trix.prototype.search = function (searchString, opts) {
98
+ return __awaiter(this, void 0, void 0, function () {
99
+ var resultArr, searchWords, _loop_1, this_1, w;
100
+ return __generator(this, function (_a) {
101
+ switch (_a.label) {
102
+ case 0:
103
+ resultArr = [];
104
+ searchWords = searchString.split(' ');
105
+ _loop_1 = function (w) {
106
+ var searchWord, done, res, prevLen, _loop_2, state_1;
107
+ return __generator(this, function (_b) {
108
+ switch (_b.label) {
109
+ case 0:
110
+ searchWord = searchWords[w].toLowerCase();
111
+ done = false;
112
+ return [4 /*yield*/, this_1._getBuffer(searchWord, opts)];
113
+ case 1:
114
+ res = _b.sent();
115
+ prevLen = void 0;
116
+ _loop_2 = function () {
117
+ var seekPosEnd, buffer, foundSomething, str, lines, hits, res_1;
118
+ return __generator(this, function (_c) {
119
+ switch (_c.label) {
120
+ case 0:
121
+ seekPosEnd = res.seekPosEnd, buffer = res.buffer;
122
+ foundSomething = false;
123
+ str = buffer.toString();
124
+ lines = str
125
+ .slice(0, str.lastIndexOf('\n'))
126
+ .split('\n')
127
+ .filter(function (f) { return !!f; });
128
+ hits = lines
129
+ .filter(function (line) {
130
+ var word = line.split(' ')[0];
131
+ var match = word.startsWith(searchString);
132
+ if (!foundSomething && match) {
133
+ foundSomething = true;
134
+ }
135
+ else if (foundSomething && !match) {
136
+ done = true;
137
+ }
138
+ else if (word > searchString) {
139
+ done = true;
140
+ }
141
+ return match;
142
+ })
143
+ .map(function (line) {
144
+ var _a = __read(line.split(' ')), term = _a[0], parts = _a.slice(1);
145
+ return parts.map(function (elt) { return [term, elt.split(',')[0]]; });
146
+ })
147
+ .flat();
148
+ if (!hits.length) {
149
+ done = true;
150
+ }
151
+ if (prevLen === hits.length) {
152
+ done = true;
153
+ }
154
+ if (!(resultArr.length + hits.length < this_1.maxResults && !done)) return [3 /*break*/, 2];
155
+ return [4 /*yield*/, this_1.ixFile.read(Buffer.alloc(CHUNKSIZE), 0, CHUNKSIZE, seekPosEnd, opts)
156
+ //early break if empty response
157
+ ];
158
+ case 1:
159
+ res_1 = _c.sent();
160
+ //early break if empty response
161
+ if (!res_1.bytesRead) {
162
+ resultArr = resultArr.concat(hits);
163
+ return [2 /*return*/, "break"];
164
+ }
165
+ buffer = Buffer.concat([buffer, res_1.buffer]);
166
+ seekPosEnd += CHUNKSIZE;
167
+ prevLen = hits.length;
168
+ return [3 /*break*/, 3];
169
+ case 2:
170
+ if (resultArr.length + hits.length >= this_1.maxResults || done) {
171
+ resultArr = resultArr.concat(hits);
172
+ return [2 /*return*/, "break"];
173
+ }
174
+ _c.label = 3;
175
+ case 3: return [2 /*return*/];
176
+ }
177
+ });
178
+ };
179
+ _b.label = 2;
180
+ case 2:
181
+ if (!(res && !done)) return [3 /*break*/, 4];
182
+ return [5 /*yield**/, _loop_2()];
183
+ case 3:
184
+ state_1 = _b.sent();
185
+ if (state_1 === "break")
186
+ return [3 /*break*/, 4];
187
+ return [3 /*break*/, 2];
188
+ case 4: return [2 /*return*/];
189
+ }
190
+ });
191
+ };
192
+ this_1 = this;
193
+ w = 0;
194
+ _a.label = 1;
195
+ case 1:
196
+ if (!(w < searchWords.length)) return [3 /*break*/, 4];
197
+ return [5 /*yield**/, _loop_1(w)];
198
+ case 2:
199
+ _a.sent();
200
+ _a.label = 3;
201
+ case 3:
202
+ w++;
203
+ return [3 /*break*/, 1];
204
+ case 4: return [2 /*return*/, __spreadArray([], __read(resultArr), false).slice(0, this.maxResults)];
205
+ }
206
+ });
207
+ });
208
+ };
209
+ Trix.prototype.getIndex = function (opts) {
210
+ return this._parseIxx(this.ixxFile, opts);
211
+ };
212
+ Trix.prototype._getBuffer = function (searchWord, opts) {
213
+ return __awaiter(this, void 0, void 0, function () {
214
+ var seekPosStart, seekPosEnd, indexes, indexes_1, indexes_1_1, _a, key, value, trimmedKey, len, res;
215
+ var e_1, _b;
216
+ return __generator(this, function (_c) {
217
+ switch (_c.label) {
218
+ case 0:
219
+ seekPosStart = 0;
220
+ seekPosEnd = -1;
221
+ return [4 /*yield*/, this.getIndex(opts)];
222
+ case 1:
223
+ indexes = _c.sent();
224
+ try {
225
+ for (indexes_1 = __values(indexes), indexes_1_1 = indexes_1.next(); !indexes_1_1.done; indexes_1_1 = indexes_1.next()) {
226
+ _a = __read(indexes_1_1.value, 2), key = _a[0], value = _a[1];
227
+ trimmedKey = key.slice(0, searchWord.length);
228
+ if (trimmedKey >= searchWord) {
229
+ break;
230
+ }
231
+ else {
232
+ seekPosStart = value;
233
+ seekPosEnd = value + 65536;
234
+ }
235
+ }
236
+ }
237
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
238
+ finally {
239
+ try {
240
+ if (indexes_1_1 && !indexes_1_1.done && (_b = indexes_1.return)) _b.call(indexes_1);
241
+ }
242
+ finally { if (e_1) throw e_1.error; }
243
+ }
244
+ len = seekPosEnd - seekPosStart;
245
+ if (len < 0) {
246
+ return [2 /*return*/, undefined];
247
+ }
248
+ return [4 /*yield*/, this.ixFile.read(Buffer.alloc(len), 0, len, seekPosStart, opts)];
249
+ case 2:
250
+ res = _c.sent();
251
+ return [2 /*return*/, __assign(__assign({}, res), { seekPosEnd: seekPosEnd })];
252
+ }
253
+ });
254
+ });
255
+ };
256
+ Trix.prototype._parseIxx = function (ixxFile, opts) {
257
+ return __awaiter(this, void 0, void 0, function () {
258
+ var file;
259
+ return __generator(this, function (_a) {
260
+ switch (_a.label) {
261
+ case 0: return [4 /*yield*/, ixxFile.readFile(__assign({ encoding: 'utf8' }, opts))];
262
+ case 1:
263
+ file = (_a.sent());
264
+ return [2 /*return*/, new Map(file
265
+ .split('\n')
266
+ .filter(function (f) { return !!f; })
267
+ .map(function (line) {
268
+ var prefix = line.slice(0, trixPrefixSize);
269
+ var posStr = line.slice(trixPrefixSize);
270
+ var pos = Number.parseInt(posStr, 16);
271
+ return [prefix, pos];
272
+ }))];
273
+ }
274
+ });
275
+ });
276
+ };
277
+ return Trix;
278
+ }());
279
+ exports.default = Trix;
280
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,IAAM,cAAc,GAAG,CAAC,CAAA;AAExB,IAAM,SAAS,GAAG,KAAK,CAAA;AAEvB,8CAA8C;AAC9C,2DAA2D;AAC3D;IAKE,cACE,OAA0B,EAC1B,MAAyB,EACzB,UAAe;QAAf,2BAAA,EAAA,eAAe;QAEf,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;QACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAA;IAC9B,CAAC;IAEK,qBAAM,GAAZ,UAAa,YAAoB,EAAE,IAA+B;;;;;;wBAC5D,SAAS,GAAG,EAAgB,CAAA;wBAC1B,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;4CAClC,CAAC;;;;;wCACF,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;wCAC3C,IAAI,GAAG,KAAK,CAAA;wCACJ,qBAAM,OAAK,UAAU,CAAC,UAAU,EAAE,IAAI,CAAC,EAAA;;wCAA7C,GAAG,GAAG,SAAuC;wCAC/C,OAAO,SAAA,CAAA;;;;;;wDAGH,UAAU,GAAa,GAAG,WAAhB,EAAE,MAAM,GAAK,GAAG,OAAR,CAAQ;wDAC5B,cAAc,GAAG,KAAK,CAAA;wDACpB,GAAG,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;wDAIvB,KAAK,GAAG,GAAG;6DACd,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;6DAC/B,KAAK,CAAC,IAAI,CAAC;6DACX,MAAM,CAAC,UAAA,CAAC,IAAI,OAAA,CAAC,CAAC,CAAC,EAAH,CAAG,CAAC,CAAA;wDAEb,IAAI,GAAG,KAAK;6DACf,MAAM,CAAC,UAAA,IAAI;4DACV,IAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;4DAC/B,IAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,CAAA;4DAC3C,IAAI,CAAC,cAAc,IAAI,KAAK,EAAE;gEAC5B,cAAc,GAAG,IAAI,CAAA;6DACtB;iEAAM,IAAI,cAAc,IAAI,CAAC,KAAK,EAAE;gEACnC,IAAI,GAAG,IAAI,CAAA;6DACZ;iEAAM,IAAI,IAAI,GAAG,YAAY,EAAE;gEAC9B,IAAI,GAAG,IAAI,CAAA;6DACZ;4DACD,OAAO,KAAK,CAAA;wDACd,CAAC,CAAC;6DACD,GAAG,CAAC,UAAA,IAAI;4DACD,IAAA,KAAA,OAAmB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA,EAAjC,IAAI,QAAA,EAAK,KAAK,cAAmB,CAAA;4DACxC,OAAO,KAAK,CAAC,GAAG,CAAC,UAAA,GAAG,IAAI,OAAA,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAzB,CAAyB,CAAC,CAAA;wDACpD,CAAC,CAAC;6DACD,IAAI,EAAwB,CAAA;wDAE/B,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;4DAChB,IAAI,GAAG,IAAI,CAAA;yDACZ;wDACD,IAAI,OAAO,KAAK,IAAI,CAAC,MAAM,EAAE;4DAC3B,IAAI,GAAG,IAAI,CAAA;yDACZ;6DAEG,CAAA,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,OAAK,UAAU,IAAI,CAAC,IAAI,CAAA,EAAzD,wBAAyD;wDAC/C,qBAAM,OAAK,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,EACvB,CAAC,EACD,SAAS,EACT,UAAU,EACV,IAAI,CACL;4DAED,+BAA+B;0DAF9B;;wDANK,QAAM,SAMX;wDAED,+BAA+B;wDAC/B,IAAI,CAAC,KAAG,CAAC,SAAS,EAAE;4DAClB,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;;yDAEnC;wDACD,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,KAAG,CAAC,MAAM,CAAC,CAAC,CAAA;wDAC5C,UAAU,IAAI,SAAS,CAAA;wDACvB,OAAO,GAAG,IAAI,CAAC,MAAM,CAAA;;;wDAChB,IAAI,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,OAAK,UAAU,IAAI,IAAI,EAAE;4DACpE,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;;yDAEnC;;;;;;;;6CA1DI,CAAA,GAAG,IAAI,CAAC,IAAI,CAAA;;;;;;;;;;;;wBANZ,CAAC,GAAG,CAAC;;;6BAAE,CAAA,CAAC,GAAG,WAAW,CAAC,MAAM,CAAA;sDAA7B,CAAC;;;;;wBAA8B,CAAC,EAAE,CAAA;;4BAoE3C,sBAAO,yBAAI,SAAS,UAAE,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,EAAA;;;;KAChD;IAEO,uBAAQ,GAAhB,UAAiB,IAA+B;QAC9C,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;IAC3C,CAAC;IAEa,yBAAU,GAAxB,UACE,UAAkB,EAClB,IAA+B;;;;;;;wBAE3B,YAAY,GAAG,CAAC,CAAA;wBAChB,UAAU,GAAG,CAAC,CAAC,CAAA;wBACH,qBAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAA;;wBAAnC,OAAO,GAAG,SAAyB;;4BACzC,KAA2B,YAAA,SAAA,OAAO,CAAA,qFAAE;gCAAzB,KAAA,4BAAY,EAAX,GAAG,QAAA,EAAE,KAAK,QAAA;gCACd,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAA;gCAClD,IAAI,UAAU,IAAI,UAAU,EAAE;oCAC5B,MAAK;iCACN;qCAAM;oCACL,YAAY,GAAG,KAAK,CAAA;oCACpB,UAAU,GAAG,KAAK,GAAG,KAAK,CAAA;iCAC3B;6BACF;;;;;;;;;wBAGK,GAAG,GAAG,UAAU,GAAG,YAAY,CAAA;wBACrC,IAAI,GAAG,GAAG,CAAC,EAAE;4BACX,sBAAO,SAAS,EAAA;yBACjB;wBACW,qBAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,EACjB,CAAC,EACD,GAAG,EACH,YAAY,EACZ,IAAI,CACL,EAAA;;wBANK,GAAG,GAAG,SAMX;wBACD,4CACK,GAAG,KACN,UAAU,YAAA,KACX;;;;KACF;IAEa,wBAAS,GAAvB,UACE,OAA0B,EAC1B,IAA+B;;;;;4BAEjB,qBAAM,OAAO,CAAC,QAAQ,YAClC,QAAQ,EAAE,MAAM,IACb,IAAI,EACP,EAAA;;wBAHI,IAAI,GAAG,CAAC,SAGZ,CAAW;wBACb,sBAAO,IAAI,GAAG,CACZ,IAAI;iCACD,KAAK,CAAC,IAAI,CAAC;iCACX,MAAM,CAAC,UAAA,CAAC,IAAI,OAAA,CAAC,CAAC,CAAC,EAAH,CAAG,CAAC;iCAChB,GAAG,CAAC,UAAA,IAAI;gCACP,IAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAA;gCAC5C,IAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAA;gCACzC,IAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;gCACvC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;4BACtB,CAAC,CAAC,CACL,EAAA;;;;KACF;IACH,WAAC;AAAD,CAAC,AApJD,IAoJC"}
package/esm/index.d.ts ADDED
@@ -0,0 +1,12 @@
1
+ import type { GenericFilehandle } from 'generic-filehandle';
2
+ export default class Trix {
3
+ private ixFile;
4
+ private ixxFile;
5
+ maxResults: number;
6
+ constructor(ixxFile: GenericFilehandle, ixFile: GenericFilehandle, maxResults?: number);
7
+ search(searchString: string, opts?: {
8
+ signal?: AbortSignal;
9
+ }): Promise<[string, string][]>;
10
+ private getIndex;
11
+ private _getBuffer;
12
+ }
package/esm/index.js ADDED
@@ -0,0 +1,120 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const TRIX_PREFIX_SIZE = 5;
4
+ const CHUNK_SIZE = 65536;
5
+ // https://stackoverflow.com/a/9229821/2129219
6
+ function uniqBy(a, key) {
7
+ let seen = new Set();
8
+ return a.filter(item => {
9
+ let k = key(item);
10
+ return seen.has(k) ? false : seen.add(k);
11
+ });
12
+ }
13
+ class Trix {
14
+ constructor(ixxFile, ixFile, maxResults = 20) {
15
+ this.ixFile = ixFile;
16
+ this.ixxFile = ixxFile;
17
+ this.maxResults = maxResults;
18
+ }
19
+ async search(searchString, opts) {
20
+ let resultArr = [];
21
+ const searchWords = searchString.split(' ');
22
+ // we only search one word at a time
23
+ const searchWord = searchWords[0].toLowerCase();
24
+ const res = await this._getBuffer(searchWord, opts);
25
+ if (!res) {
26
+ return [];
27
+ }
28
+ let { seekPosEnd, buffer } = res;
29
+ let done = false;
30
+ while (!done) {
31
+ let foundSomething = false;
32
+ const str = buffer.toString();
33
+ // slice to lastIndexOf('\n') to make sure we get complete records
34
+ // since the buffer fetch could get halfway into a record
35
+ const lines = str
36
+ .slice(0, str.lastIndexOf('\n'))
37
+ .split('\n')
38
+ .filter(f => !!f);
39
+ const hits = lines
40
+ // eslint-disable-next-line @typescript-eslint/no-loop-func
41
+ .filter(line => {
42
+ const word = line.split(' ')[0];
43
+ const match = word.startsWith(searchWord);
44
+ if (!foundSomething && match) {
45
+ foundSomething = true;
46
+ }
47
+ // we are done scanning if we are lexicographically greater than the
48
+ // search string
49
+ if (word > searchWord) {
50
+ done = true;
51
+ }
52
+ return match;
53
+ })
54
+ .map(line => {
55
+ const [term, ...parts] = line.split(' ');
56
+ return parts.map(elt => [term, elt.split(',')[0]]);
57
+ })
58
+ .flat();
59
+ // if we are not done, and we haven't filled up maxResults with hits yet,
60
+ // then refetch
61
+ if (resultArr.length + hits.length < this.maxResults && !done) {
62
+ // eslint-disable-next-line no-await-in-loop
63
+ const res2 = await this.ixFile.read(Buffer.alloc(CHUNK_SIZE), 0, CHUNK_SIZE, seekPosEnd, opts);
64
+ // early break if empty response
65
+ if (!res2.bytesRead) {
66
+ resultArr = resultArr.concat(hits);
67
+ break;
68
+ }
69
+ buffer = Buffer.concat([buffer, res2.buffer]);
70
+ seekPosEnd += CHUNK_SIZE;
71
+ }
72
+ // if we have filled up the hits, or we are detected to be done via the
73
+ // filtering, then return
74
+ else if (resultArr.length + hits.length >= this.maxResults || done) {
75
+ resultArr = resultArr.concat(hits);
76
+ break;
77
+ }
78
+ }
79
+ // deduplicate results based on the detail column (resultArr[1])
80
+ return uniqBy(resultArr, elt => elt[1]).slice(0, this.maxResults);
81
+ }
82
+ async getIndex(opts) {
83
+ const file = await this.ixxFile.readFile({
84
+ encoding: 'utf8',
85
+ ...opts,
86
+ });
87
+ return file
88
+ .split('\n')
89
+ .filter(f => !!f)
90
+ .map(line => {
91
+ const prefix = line.slice(0, TRIX_PREFIX_SIZE);
92
+ const posStr = line.slice(TRIX_PREFIX_SIZE);
93
+ const pos = Number.parseInt(posStr, 16);
94
+ return [prefix, pos];
95
+ });
96
+ }
97
+ async _getBuffer(searchWord, opts) {
98
+ let seekPosStart = 0;
99
+ let seekPosEnd = -1;
100
+ const indexes = await this.getIndex(opts);
101
+ indexes.forEach(([key, value]) => {
102
+ const trimmedKey = key.slice(0, searchWord.length);
103
+ if (trimmedKey < searchWord) {
104
+ seekPosStart = value;
105
+ seekPosEnd = value + 65536;
106
+ }
107
+ });
108
+ // Return the buffer and its end position in the file.
109
+ const len = seekPosEnd - seekPosStart;
110
+ if (len < 0) {
111
+ return undefined;
112
+ }
113
+ const res = await this.ixFile.read(Buffer.alloc(len), 0, len, seekPosStart, opts);
114
+ return {
115
+ ...res,
116
+ seekPosEnd,
117
+ };
118
+ }
119
+ }
120
+ exports.default = Trix;