@gmod/trix 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/README.md +35 -36
- package/dist/index.d.ts +5 -51
- package/dist/index.js +160 -310
- package/dist/index.js.map +1 -0
- package/esm/index.d.ts +12 -0
- package/esm/index.js +120 -0
- package/esm/index.js.map +1 -0
- package/package.json +20 -12
package/CHANGELOG.md
CHANGED
package/README.md
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
[](https://github.com/GMOD/trix-js/actions?query=branch%3Amain+workflow%3APush+)
|
|
2
|
+
|
|
1
3
|
# trix-js
|
|
4
|
+
|
|
2
5
|
Read UCSC Trix indexes in pure JavaScript
|
|
3
6
|
|
|
4
7
|
## Usage
|
|
@@ -9,37 +12,43 @@ import { RemoteFile } from 'generic-filehandle'
|
|
|
9
12
|
|
|
10
13
|
// any filehandle object that supports the Nodejs FileHandle API will work.
|
|
11
14
|
// We use generic-filehandle here to demonstrate searching files on remote servers.
|
|
12
|
-
const ixxFile = new RemoteFile(
|
|
13
|
-
|
|
15
|
+
const ixxFile = new RemoteFile(
|
|
16
|
+
'https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ixx',
|
|
17
|
+
)
|
|
18
|
+
const ixFile = new RemoteFile(
|
|
19
|
+
'https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ix',
|
|
20
|
+
)
|
|
14
21
|
|
|
15
|
-
const trix = new Trix(ixxFile, ixFile)
|
|
22
|
+
const trix = new Trix(ixxFile, ixFile)
|
|
16
23
|
|
|
17
24
|
async function doStuff() {
|
|
18
|
-
const results = await trix.search('oca')
|
|
19
|
-
console.log(results)
|
|
25
|
+
const results = await trix.search('oca')
|
|
26
|
+
console.log(results)
|
|
20
27
|
}
|
|
21
|
-
doStuff()
|
|
22
|
-
|
|
28
|
+
doStuff()
|
|
23
29
|
```
|
|
24
30
|
|
|
25
31
|
## Documentation
|
|
32
|
+
|
|
26
33
|
### Trix constructor
|
|
34
|
+
|
|
27
35
|
The Trix class constructor accepts arguments:
|
|
36
|
+
|
|
28
37
|
- `ixxFile` - a filehandle object for the trix .ixx file
|
|
29
38
|
- `ixFile` - a filehandle object for the trix .ix file
|
|
30
39
|
- `maxResults = 20` - an optional number specifying the maximum number of results to return on `trix.search()`
|
|
31
40
|
|
|
32
|
-
|
|
33
41
|
### Trix search
|
|
42
|
+
|
|
34
43
|
**Search the index files for a term and find its keys.**<br>
|
|
35
44
|
**In the case of searching with multiple words, `trix.search()` finds the intersection of the result sets.**<br>
|
|
36
45
|
The Trix search function accepts argument:
|
|
46
|
+
|
|
37
47
|
- `searchString` - a string of space-separated words for what to search the index file and find keys for<br>
|
|
38
|
-
|
|
48
|
+
|
|
39
49
|
The Trix search function returns: <br>
|
|
40
|
-
- `Promise<string[]>` - a promised array of strings where each string is an itemId result
|
|
41
|
-
|
|
42
50
|
|
|
51
|
+
- `Promise<[term,result][] as [string,string][]>` - an array of [term, result] pairs where each term is the left column in the trix and the right column is the trix match
|
|
43
52
|
|
|
44
53
|
## Examples
|
|
45
54
|
|
|
@@ -47,49 +56,37 @@ The Trix search function returns: <br>
|
|
|
47
56
|
import { LocalFile } from 'generic-filehandle'
|
|
48
57
|
import Trix from '@gmod/trix'
|
|
49
58
|
|
|
50
|
-
const ixxFile = new LocalFile('out.ixx')
|
|
51
|
-
const ixFile = new LocalFile('out.ix')
|
|
59
|
+
const ixxFile = new LocalFile('out.ixx')
|
|
60
|
+
const ixFile = new LocalFile('out.ix')
|
|
52
61
|
|
|
53
62
|
// limit maxResults to 5
|
|
54
|
-
const trix = new Trix(ixxFile, ixFile, 5)
|
|
63
|
+
const trix = new Trix(ixxFile, ixFile, 5)
|
|
55
64
|
|
|
56
65
|
async function doStuff() {
|
|
57
|
-
const results1 = await trix.search('herc')
|
|
58
|
-
console.log(results1)
|
|
66
|
+
const results1 = await trix.search('herc')
|
|
67
|
+
console.log(results1)
|
|
59
68
|
|
|
60
69
|
// increase maxResults to 30
|
|
61
|
-
trix.maxResults = 30
|
|
70
|
+
trix.maxResults = 30
|
|
62
71
|
|
|
63
|
-
const results2 = await trix.search('linc')
|
|
64
|
-
console.log(results2)
|
|
72
|
+
const results2 = await trix.search('linc')
|
|
73
|
+
console.log(results2)
|
|
65
74
|
}
|
|
66
75
|
|
|
67
|
-
doStuff()
|
|
76
|
+
doStuff()
|
|
68
77
|
```
|
|
69
|
-
<br><br>
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
78
|
|
|
79
|
+
<br><br>
|
|
84
80
|
|
|
85
81
|
## Development
|
|
86
82
|
|
|
87
|
-
|
|
88
83
|
### Test trix-js
|
|
84
|
+
|
|
89
85
|
First, clone this repo and install npm packages. <br>
|
|
90
86
|
Then, run `npm test`. <br>
|
|
91
87
|
|
|
92
88
|
### Test the USCS TrixSearch - Requires Linux
|
|
89
|
+
|
|
93
90
|
First, clone this repo.
|
|
94
91
|
To run test searches on a track hub using the USCS `TrixSearch`, navigate to `tests/testdata/test#` and run `bash test#script.sh` where # is the test number.
|
|
95
92
|
To change search terms, edit `searchterms.txt`.
|
|
@@ -97,7 +94,9 @@ To change search terms, edit `searchterms.txt`.
|
|
|
97
94
|
**Wondering what to search for?**<br>
|
|
98
95
|
Open up `tests/testdata/test#/input.txt`.
|
|
99
96
|
|
|
100
|
-
|
|
101
97
|
**How to test my own .gff.gz data?**<br>
|
|
102
98
|
Navigate to `/test/rawGenomes` and create a directory with your .gff.gz file in it. From within that directory, run `bash ../../programs/gff3ToInput.sh <.gff3.gz FILE> <OUTPUT NAME>`.
|
|
103
99
|
|
|
100
|
+
## Reference
|
|
101
|
+
|
|
102
|
+
See https://genome.ucsc.edu/goldenPath/help/trix.html for basic concepts of trix and https://github.com/GMOD/ixixx-js for a javascript implementation of the ixIxx command
|
package/dist/index.d.ts
CHANGED
|
@@ -1,59 +1,13 @@
|
|
|
1
1
|
import type { GenericFilehandle } from 'generic-filehandle';
|
|
2
2
|
export default class Trix {
|
|
3
|
-
private index;
|
|
4
3
|
private ixFile;
|
|
4
|
+
private ixxFile;
|
|
5
5
|
maxResults: number;
|
|
6
|
-
/**
|
|
7
|
-
* @param ixxFile [anyFile] the second-level trix index file produced by ixIxx.
|
|
8
|
-
* @param ixFile [anyFile] the first-level trix index file produced by ixIxx.
|
|
9
|
-
* @param maxResults [number] the maximum number of results to return. Default is set to 20.
|
|
10
|
-
*/
|
|
11
6
|
constructor(ixxFile: GenericFilehandle, ixFile: GenericFilehandle, maxResults?: number);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
* @param searchString [string] term(s) separated by spaces to search for id(s).
|
|
17
|
-
* @returns results [Array<string>] where each string is a corresponding itemId.
|
|
18
|
-
*/
|
|
19
|
-
search(searchString: string): Promise<string[]>;
|
|
20
|
-
/**
|
|
21
|
-
* Seek ahead to the correct position in the .ix file,
|
|
22
|
-
* then load that chunk of .ix into a buffer.
|
|
23
|
-
*
|
|
24
|
-
* @param searchWord [string]
|
|
25
|
-
* @returns a Buffer holding the sections we want to search.
|
|
26
|
-
*/
|
|
7
|
+
search(searchString: string, opts?: {
|
|
8
|
+
signal?: AbortSignal;
|
|
9
|
+
}): Promise<string[][]>;
|
|
10
|
+
private getIndex;
|
|
27
11
|
private _getBuffer;
|
|
28
|
-
/**
|
|
29
|
-
* Given the end position of the last buffer,
|
|
30
|
-
* load the next chunk of .ix data into a buffer and return it.
|
|
31
|
-
*
|
|
32
|
-
* @param seekPosStart [number] where to start loading data into the new buffer.
|
|
33
|
-
* @returns a Buffer holding the chunk we want to search.
|
|
34
|
-
*/
|
|
35
|
-
private _getNextChunk;
|
|
36
|
-
/**
|
|
37
|
-
* Create and return a buffer given the start and end position
|
|
38
|
-
* of what to load from the .ix file.
|
|
39
|
-
*
|
|
40
|
-
* @param seekPosStart [number] byte the buffer should start reading from file.
|
|
41
|
-
* @param seekPosEnd [number] byte the buffer should stop reading from file.
|
|
42
|
-
* @returns a Buffer holding the chunk of data.
|
|
43
|
-
*/
|
|
44
|
-
private _createBuffer;
|
|
45
|
-
/**
|
|
46
|
-
* Takes in a hit string and returns an array of result terms.
|
|
47
|
-
*
|
|
48
|
-
* @param line [string] The line of .ix that is a hit.
|
|
49
|
-
* @returns results [Array<hit>]. Each hit contains the itemId [string], and wordPos [number].
|
|
50
|
-
*/
|
|
51
|
-
private _parseHitString;
|
|
52
|
-
/**
|
|
53
|
-
* Parses ixx file and constructs a map of {word: ixFileLocation}
|
|
54
|
-
*
|
|
55
|
-
* @param ixxFile [anyFile] second level index that is produced by ixIxx.
|
|
56
|
-
* @returns a ParsedIxx map.
|
|
57
|
-
*/
|
|
58
12
|
private _parseIxx;
|
|
59
13
|
}
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,15 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __assign = (this && this.__assign) || function () {
|
|
3
|
+
__assign = Object.assign || function(t) {
|
|
4
|
+
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
5
|
+
s = arguments[i];
|
|
6
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
|
|
7
|
+
t[p] = s[p];
|
|
8
|
+
}
|
|
9
|
+
return t;
|
|
10
|
+
};
|
|
11
|
+
return __assign.apply(this, arguments);
|
|
12
|
+
};
|
|
2
13
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
14
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
15
|
return new (P || (P = Promise))(function (resolve, reject) {
|
|
@@ -35,17 +46,6 @@ var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
|
35
46
|
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
|
|
36
47
|
}
|
|
37
48
|
};
|
|
38
|
-
var __values = (this && this.__values) || function(o) {
|
|
39
|
-
var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
|
|
40
|
-
if (m) return m.call(o);
|
|
41
|
-
if (o && typeof o.length === "number") return {
|
|
42
|
-
next: function () {
|
|
43
|
-
if (o && i >= o.length) o = void 0;
|
|
44
|
-
return { value: o && o[i++], done: !o };
|
|
45
|
-
}
|
|
46
|
-
};
|
|
47
|
-
throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
|
|
48
|
-
};
|
|
49
49
|
var __read = (this && this.__read) || function (o, n) {
|
|
50
50
|
var m = typeof Symbol === "function" && o[Symbol.iterator];
|
|
51
51
|
if (!m) return o;
|
|
@@ -62,362 +62,211 @@ var __read = (this && this.__read) || function (o, n) {
|
|
|
62
62
|
}
|
|
63
63
|
return ar;
|
|
64
64
|
};
|
|
65
|
+
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
66
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
67
|
+
if (ar || !(i in from)) {
|
|
68
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
69
|
+
ar[i] = from[i];
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
73
|
+
};
|
|
74
|
+
var __values = (this && this.__values) || function(o) {
|
|
75
|
+
var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
|
|
76
|
+
if (m) return m.call(o);
|
|
77
|
+
if (o && typeof o.length === "number") return {
|
|
78
|
+
next: function () {
|
|
79
|
+
if (o && i >= o.length) o = void 0;
|
|
80
|
+
return { value: o && o[i++], done: !o };
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
|
|
84
|
+
};
|
|
65
85
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
66
86
|
var trixPrefixSize = 5;
|
|
87
|
+
var CHUNKSIZE = 65536;
|
|
67
88
|
// Define this object with .ixx and .ix files.
|
|
68
89
|
// Then use the trixSearch() function to search for a word.
|
|
69
90
|
var Trix = /** @class */ (function () {
|
|
70
|
-
/**
|
|
71
|
-
* @param ixxFile [anyFile] the second-level trix index file produced by ixIxx.
|
|
72
|
-
* @param ixFile [anyFile] the first-level trix index file produced by ixIxx.
|
|
73
|
-
* @param maxResults [number] the maximum number of results to return. Default is set to 20.
|
|
74
|
-
*/
|
|
75
91
|
function Trix(ixxFile, ixFile, maxResults) {
|
|
76
92
|
if (maxResults === void 0) { maxResults = 20; }
|
|
77
|
-
this.index = this._parseIxx(ixxFile);
|
|
78
93
|
this.ixFile = ixFile;
|
|
94
|
+
this.ixxFile = ixxFile;
|
|
79
95
|
this.maxResults = maxResults;
|
|
80
96
|
}
|
|
81
|
-
|
|
82
|
-
* Search trix for the given searchWord(s). Return up to {this.maxResults} results.
|
|
83
|
-
* This method matches each index prefix against each searchWord. It does not do fuzzy matching.
|
|
84
|
-
*
|
|
85
|
-
* @param searchString [string] term(s) separated by spaces to search for id(s).
|
|
86
|
-
* @returns results [Array<string>] where each string is a corresponding itemId.
|
|
87
|
-
*/
|
|
88
|
-
Trix.prototype.search = function (searchString) {
|
|
97
|
+
Trix.prototype.search = function (searchString, opts) {
|
|
89
98
|
return __awaiter(this, void 0, void 0, function () {
|
|
90
|
-
var resultArr,
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
switch (_b.label) {
|
|
99
|
+
var resultArr, searchWords, _loop_1, this_1, w;
|
|
100
|
+
return __generator(this, function (_a) {
|
|
101
|
+
switch (_a.label) {
|
|
94
102
|
case 0:
|
|
95
103
|
resultArr = [];
|
|
96
|
-
firstWord = true;
|
|
97
|
-
initialSet = new Set();
|
|
98
104
|
searchWords = searchString.split(' ');
|
|
105
|
+
_loop_1 = function (w) {
|
|
106
|
+
var searchWord, done, res, prevLen, _loop_2, state_1;
|
|
107
|
+
return __generator(this, function (_b) {
|
|
108
|
+
switch (_b.label) {
|
|
109
|
+
case 0:
|
|
110
|
+
searchWord = searchWords[w].toLowerCase();
|
|
111
|
+
done = false;
|
|
112
|
+
return [4 /*yield*/, this_1._getBuffer(searchWord, opts)];
|
|
113
|
+
case 1:
|
|
114
|
+
res = _b.sent();
|
|
115
|
+
prevLen = void 0;
|
|
116
|
+
_loop_2 = function () {
|
|
117
|
+
var seekPosEnd, buffer, foundSomething, str, lines, hits, res_1;
|
|
118
|
+
return __generator(this, function (_c) {
|
|
119
|
+
switch (_c.label) {
|
|
120
|
+
case 0:
|
|
121
|
+
seekPosEnd = res.seekPosEnd, buffer = res.buffer;
|
|
122
|
+
foundSomething = false;
|
|
123
|
+
str = buffer.toString();
|
|
124
|
+
lines = str
|
|
125
|
+
.slice(0, str.lastIndexOf('\n'))
|
|
126
|
+
.split('\n')
|
|
127
|
+
.filter(function (f) { return !!f; });
|
|
128
|
+
hits = lines
|
|
129
|
+
.filter(function (line) {
|
|
130
|
+
var word = line.split(' ')[0];
|
|
131
|
+
var match = word.startsWith(searchString);
|
|
132
|
+
if (!foundSomething && match) {
|
|
133
|
+
foundSomething = true;
|
|
134
|
+
}
|
|
135
|
+
else if (foundSomething && !match) {
|
|
136
|
+
done = true;
|
|
137
|
+
}
|
|
138
|
+
else if (word > searchString) {
|
|
139
|
+
done = true;
|
|
140
|
+
}
|
|
141
|
+
return match;
|
|
142
|
+
})
|
|
143
|
+
.map(function (line) {
|
|
144
|
+
var _a = __read(line.split(' ')), term = _a[0], parts = _a.slice(1);
|
|
145
|
+
return parts.map(function (elt) { return [term, elt.split(',')[0]]; });
|
|
146
|
+
})
|
|
147
|
+
.flat();
|
|
148
|
+
if (!hits.length) {
|
|
149
|
+
done = true;
|
|
150
|
+
}
|
|
151
|
+
if (prevLen === hits.length) {
|
|
152
|
+
done = true;
|
|
153
|
+
}
|
|
154
|
+
if (!(resultArr.length + hits.length < this_1.maxResults && !done)) return [3 /*break*/, 2];
|
|
155
|
+
return [4 /*yield*/, this_1.ixFile.read(Buffer.alloc(CHUNKSIZE), 0, CHUNKSIZE, seekPosEnd, opts)
|
|
156
|
+
//early break if empty response
|
|
157
|
+
];
|
|
158
|
+
case 1:
|
|
159
|
+
res_1 = _c.sent();
|
|
160
|
+
//early break if empty response
|
|
161
|
+
if (!res_1.bytesRead) {
|
|
162
|
+
resultArr = resultArr.concat(hits);
|
|
163
|
+
return [2 /*return*/, "break"];
|
|
164
|
+
}
|
|
165
|
+
buffer = Buffer.concat([buffer, res_1.buffer]);
|
|
166
|
+
seekPosEnd += CHUNKSIZE;
|
|
167
|
+
prevLen = hits.length;
|
|
168
|
+
return [3 /*break*/, 3];
|
|
169
|
+
case 2:
|
|
170
|
+
if (resultArr.length + hits.length >= this_1.maxResults || done) {
|
|
171
|
+
resultArr = resultArr.concat(hits);
|
|
172
|
+
return [2 /*return*/, "break"];
|
|
173
|
+
}
|
|
174
|
+
_c.label = 3;
|
|
175
|
+
case 3: return [2 /*return*/];
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
};
|
|
179
|
+
_b.label = 2;
|
|
180
|
+
case 2:
|
|
181
|
+
if (!(res && !done)) return [3 /*break*/, 4];
|
|
182
|
+
return [5 /*yield**/, _loop_2()];
|
|
183
|
+
case 3:
|
|
184
|
+
state_1 = _b.sent();
|
|
185
|
+
if (state_1 === "break")
|
|
186
|
+
return [3 /*break*/, 4];
|
|
187
|
+
return [3 /*break*/, 2];
|
|
188
|
+
case 4: return [2 /*return*/];
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
};
|
|
192
|
+
this_1 = this;
|
|
99
193
|
w = 0;
|
|
100
|
-
|
|
194
|
+
_a.label = 1;
|
|
101
195
|
case 1:
|
|
102
|
-
if (!(w < searchWords.length)) return [3 /*break*/,
|
|
103
|
-
|
|
104
|
-
searchWord = searchWord.toLowerCase();
|
|
105
|
-
return [4 /*yield*/, this._getBuffer(searchWord)];
|
|
196
|
+
if (!(w < searchWords.length)) return [3 /*break*/, 4];
|
|
197
|
+
return [5 /*yield**/, _loop_1(w)];
|
|
106
198
|
case 2:
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
bufPos = bufData.bufEndPos;
|
|
110
|
-
resultSet = new Set();
|
|
111
|
-
linePtr = 0;
|
|
112
|
-
numValues = 0;
|
|
113
|
-
_b.label = 3;
|
|
199
|
+
_a.sent();
|
|
200
|
+
_a.label = 3;
|
|
114
201
|
case 3:
|
|
115
|
-
if (!(linePtr < buf.byteLength)) return [3 /*break*/, 8];
|
|
116
|
-
startsWith = true;
|
|
117
|
-
done = false;
|
|
118
|
-
i = linePtr;
|
|
119
|
-
_b.label = 4;
|
|
120
|
-
case 4:
|
|
121
|
-
if (!(buf[i] != 10)) return [3 /*break*/, 7];
|
|
122
|
-
if (!(i >= buf.byteLength)) return [3 /*break*/, 6];
|
|
123
|
-
return [4 /*yield*/, this._getNextChunk(bufPos)];
|
|
124
|
-
case 5:
|
|
125
|
-
tempBufData = _b.sent();
|
|
126
|
-
if (tempBufData) {
|
|
127
|
-
buf = tempBufData.buf;
|
|
128
|
-
bufPos = tempBufData.bufEndPos;
|
|
129
|
-
i = 0;
|
|
130
|
-
linePtr = 0;
|
|
131
|
-
}
|
|
132
|
-
else {
|
|
133
|
-
// If tempBufData is null, we reached the end of the file, so we are done.
|
|
134
|
-
done = true;
|
|
135
|
-
return [3 /*break*/, 7];
|
|
136
|
-
}
|
|
137
|
-
_b.label = 6;
|
|
138
|
-
case 6:
|
|
139
|
-
if (startsWith) {
|
|
140
|
-
cur = String.fromCharCode(buf[i]);
|
|
141
|
-
if (i < linePtr + searchWord.length &&
|
|
142
|
-
searchWord[i - linePtr] > cur) {
|
|
143
|
-
// searchWord[i] > cur, so keep looping.
|
|
144
|
-
startsWith = false;
|
|
145
|
-
}
|
|
146
|
-
else if (i < linePtr + searchWord.length &&
|
|
147
|
-
searchWord[i - linePtr] < cur) {
|
|
148
|
-
// searchWord[i] < cur, so we lexicographically will not find any more results.
|
|
149
|
-
startsWith = false;
|
|
150
|
-
done = true;
|
|
151
|
-
return [3 /*break*/, 7];
|
|
152
|
-
}
|
|
153
|
-
else {
|
|
154
|
-
// This condition indicates we found a match.
|
|
155
|
-
if (buf[i] === 44) {
|
|
156
|
-
// We found a ',' so increment numValues by one.
|
|
157
|
-
numValues++;
|
|
158
|
-
// If we're searching for one word and we have enough results, break out at the next space.
|
|
159
|
-
if (numValues >= this.maxResults && searchWords.length === 1) {
|
|
160
|
-
while (buf[i] != 32)
|
|
161
|
-
i++;
|
|
162
|
-
return [3 /*break*/, 7];
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
i++;
|
|
168
|
-
return [3 /*break*/, 4];
|
|
169
|
-
case 7:
|
|
170
|
-
if (done)
|
|
171
|
-
return [3 /*break*/, 8];
|
|
172
|
-
// If the line starts with the searchWord, we have a hit!
|
|
173
|
-
if (startsWith) {
|
|
174
|
-
line = buf.slice(linePtr, i).toString();
|
|
175
|
-
arr = this._parseHitString(line);
|
|
176
|
-
if (searchWords.length === 1) {
|
|
177
|
-
// Only a single search word so add results to array.
|
|
178
|
-
resultArr = resultArr.concat(arr);
|
|
179
|
-
// Once we have enough results, stop searching.
|
|
180
|
-
if (resultArr.length >= this.maxResults)
|
|
181
|
-
return [3 /*break*/, 8];
|
|
182
|
-
}
|
|
183
|
-
else {
|
|
184
|
-
try {
|
|
185
|
-
// Handle multiple words using sets.
|
|
186
|
-
for (arr_1 = (e_1 = void 0, __values(arr)), arr_1_1 = arr_1.next(); !arr_1_1.done; arr_1_1 = arr_1.next()) {
|
|
187
|
-
hit = arr_1_1.value;
|
|
188
|
-
hit = hit.toLowerCase();
|
|
189
|
-
if (firstWord) {
|
|
190
|
-
resultSet.add(hit);
|
|
191
|
-
}
|
|
192
|
-
else {
|
|
193
|
-
if (initialSet.has(hit)) {
|
|
194
|
-
resultSet.add(hit);
|
|
195
|
-
// If it is on the last iteration of words, break after we reach maxResults
|
|
196
|
-
if (w === searchWords.length - 1 &&
|
|
197
|
-
resultSet.size >= this.maxResults)
|
|
198
|
-
break;
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
204
|
-
finally {
|
|
205
|
-
try {
|
|
206
|
-
if (arr_1_1 && !arr_1_1.done && (_a = arr_1.return)) _a.call(arr_1);
|
|
207
|
-
}
|
|
208
|
-
finally { if (e_1) throw e_1.error; }
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
linePtr = i + 1;
|
|
213
|
-
return [3 /*break*/, 3];
|
|
214
|
-
case 8:
|
|
215
|
-
initialSet = resultSet;
|
|
216
|
-
firstWord = false;
|
|
217
|
-
// If there aren't any results, stop looping, because an intersection with an empty set is an empty set.
|
|
218
|
-
if (resultArr.length === 0 && initialSet.size === 0)
|
|
219
|
-
return [2 /*return*/, []];
|
|
220
|
-
_b.label = 9;
|
|
221
|
-
case 9:
|
|
222
202
|
w++;
|
|
223
203
|
return [3 /*break*/, 1];
|
|
224
|
-
case
|
|
225
|
-
// 4. Return the hitList [list of string]
|
|
226
|
-
if (searchWords.length === 1) {
|
|
227
|
-
return [2 /*return*/, resultArr];
|
|
228
|
-
}
|
|
229
|
-
// Else we need to return our set converted to an array
|
|
230
|
-
resultArr = Array.from(initialSet);
|
|
231
|
-
if (resultArr.length > this.maxResults)
|
|
232
|
-
return [2 /*return*/, resultArr.slice(0, this.maxResults)];
|
|
233
|
-
return [2 /*return*/, resultArr];
|
|
204
|
+
case 4: return [2 /*return*/, __spreadArray([], __read(resultArr), false).slice(0, this.maxResults)];
|
|
234
205
|
}
|
|
235
206
|
});
|
|
236
207
|
});
|
|
237
208
|
};
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
*
|
|
243
|
-
* @param searchWord [string]
|
|
244
|
-
* @returns a Buffer holding the sections we want to search.
|
|
245
|
-
*/
|
|
246
|
-
Trix.prototype._getBuffer = function (searchWord) {
|
|
209
|
+
Trix.prototype.getIndex = function (opts) {
|
|
210
|
+
return this._parseIxx(this.ixxFile, opts);
|
|
211
|
+
};
|
|
212
|
+
Trix.prototype._getBuffer = function (searchWord, opts) {
|
|
247
213
|
return __awaiter(this, void 0, void 0, function () {
|
|
248
|
-
var seekPosStart, seekPosEnd, indexes, indexes_1, indexes_1_1, _a, key, value, trimmedKey;
|
|
249
|
-
var
|
|
214
|
+
var seekPosStart, seekPosEnd, indexes, indexes_1, indexes_1_1, _a, key, value, trimmedKey, len, res;
|
|
215
|
+
var e_1, _b;
|
|
250
216
|
return __generator(this, function (_c) {
|
|
251
217
|
switch (_c.label) {
|
|
252
218
|
case 0:
|
|
253
219
|
seekPosStart = 0;
|
|
254
220
|
seekPosEnd = -1;
|
|
255
|
-
return [4 /*yield*/, this.
|
|
221
|
+
return [4 /*yield*/, this.getIndex(opts)];
|
|
256
222
|
case 1:
|
|
257
223
|
indexes = _c.sent();
|
|
258
224
|
try {
|
|
259
225
|
for (indexes_1 = __values(indexes), indexes_1_1 = indexes_1.next(); !indexes_1_1.done; indexes_1_1 = indexes_1.next()) {
|
|
260
226
|
_a = __read(indexes_1_1.value, 2), key = _a[0], value = _a[1];
|
|
261
227
|
trimmedKey = key.slice(0, searchWord.length);
|
|
262
|
-
if (
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
else {
|
|
269
|
-
seekPosStart = value;
|
|
270
|
-
}
|
|
228
|
+
if (trimmedKey >= searchWord) {
|
|
229
|
+
break;
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
seekPosStart = value;
|
|
233
|
+
seekPosEnd = value + 65536;
|
|
271
234
|
}
|
|
272
235
|
}
|
|
273
236
|
}
|
|
274
|
-
catch (
|
|
237
|
+
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
275
238
|
finally {
|
|
276
239
|
try {
|
|
277
240
|
if (indexes_1_1 && !indexes_1_1.done && (_b = indexes_1.return)) _b.call(indexes_1);
|
|
278
241
|
}
|
|
279
|
-
finally { if (
|
|
242
|
+
finally { if (e_1) throw e_1.error; }
|
|
280
243
|
}
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
});
|
|
285
|
-
});
|
|
286
|
-
};
|
|
287
|
-
/**
|
|
288
|
-
* Given the end position of the last buffer,
|
|
289
|
-
* load the next chunk of .ix data into a buffer and return it.
|
|
290
|
-
*
|
|
291
|
-
* @param seekPosStart [number] where to start loading data into the new buffer.
|
|
292
|
-
* @returns a Buffer holding the chunk we want to search.
|
|
293
|
-
*/
|
|
294
|
-
Trix.prototype._getNextChunk = function (seekPosStart) {
|
|
295
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
296
|
-
var seekPosEnd, indexes, indexes_2, indexes_2_1, _a, key, value;
|
|
297
|
-
var e_3, _b;
|
|
298
|
-
return __generator(this, function (_c) {
|
|
299
|
-
switch (_c.label) {
|
|
300
|
-
case 0:
|
|
301
|
-
if (seekPosStart == -1)
|
|
302
|
-
return [2 /*return*/, null];
|
|
303
|
-
seekPosEnd = -1;
|
|
304
|
-
return [4 /*yield*/, this.index];
|
|
305
|
-
case 1:
|
|
306
|
-
indexes = _c.sent();
|
|
307
|
-
try {
|
|
308
|
-
for (indexes_2 = __values(indexes), indexes_2_1 = indexes_2.next(); !indexes_2_1.done; indexes_2_1 = indexes_2.next()) {
|
|
309
|
-
_a = __read(indexes_2_1.value, 2), key = _a[0], value = _a[1];
|
|
310
|
-
if (value <= seekPosStart + 1)
|
|
311
|
-
continue;
|
|
312
|
-
seekPosEnd = value;
|
|
313
|
-
break;
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
catch (e_3_1) { e_3 = { error: e_3_1 }; }
|
|
317
|
-
finally {
|
|
318
|
-
try {
|
|
319
|
-
if (indexes_2_1 && !indexes_2_1.done && (_b = indexes_2.return)) _b.call(indexes_2);
|
|
320
|
-
}
|
|
321
|
-
finally { if (e_3) throw e_3.error; }
|
|
244
|
+
len = seekPosEnd - seekPosStart;
|
|
245
|
+
if (len < 0) {
|
|
246
|
+
return [2 /*return*/, undefined];
|
|
322
247
|
}
|
|
323
|
-
seekPosStart
|
|
324
|
-
// Return the buffer and its end position in the file.
|
|
325
|
-
return [2 /*return*/, this._createBuffer(seekPosStart, seekPosEnd)];
|
|
326
|
-
}
|
|
327
|
-
});
|
|
328
|
-
});
|
|
329
|
-
};
|
|
330
|
-
/**
|
|
331
|
-
* Create and return a buffer given the start and end position
|
|
332
|
-
* of what to load from the .ix file.
|
|
333
|
-
*
|
|
334
|
-
* @param seekPosStart [number] byte the buffer should start reading from file.
|
|
335
|
-
* @param seekPosEnd [number] byte the buffer should stop reading from file.
|
|
336
|
-
* @returns a Buffer holding the chunk of data.
|
|
337
|
-
*/
|
|
338
|
-
Trix.prototype._createBuffer = function (seekPosStart, seekPosEnd) {
|
|
339
|
-
return __awaiter(this, void 0, void 0, function () {
|
|
340
|
-
var bufLength, stat, buf;
|
|
341
|
-
return __generator(this, function (_a) {
|
|
342
|
-
switch (_a.label) {
|
|
343
|
-
case 0:
|
|
344
|
-
if (!(seekPosEnd < 0)) return [3 /*break*/, 2];
|
|
345
|
-
return [4 /*yield*/, this.ixFile.stat()];
|
|
346
|
-
case 1:
|
|
347
|
-
stat = _a.sent();
|
|
348
|
-
bufLength = stat.size - seekPosStart;
|
|
349
|
-
return [3 /*break*/, 3];
|
|
248
|
+
return [4 /*yield*/, this.ixFile.read(Buffer.alloc(len), 0, len, seekPosStart, opts)];
|
|
350
249
|
case 2:
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
case 3:
|
|
354
|
-
buf = Buffer.alloc(bufLength);
|
|
355
|
-
return [4 /*yield*/, this.ixFile.read(buf, 0, bufLength, seekPosStart)];
|
|
356
|
-
case 4:
|
|
357
|
-
_a.sent();
|
|
358
|
-
// Return the buffer and its end position in the file.
|
|
359
|
-
return [2 /*return*/, { buf: buf, bufEndPos: seekPosEnd }];
|
|
250
|
+
res = _c.sent();
|
|
251
|
+
return [2 /*return*/, __assign(__assign({}, res), { seekPosEnd: seekPosEnd })];
|
|
360
252
|
}
|
|
361
253
|
});
|
|
362
254
|
});
|
|
363
255
|
};
|
|
364
|
-
|
|
365
|
-
* Takes in a hit string and returns an array of result terms.
|
|
366
|
-
*
|
|
367
|
-
* @param line [string] The line of .ix that is a hit.
|
|
368
|
-
* @returns results [Array<hit>]. Each hit contains the itemId [string], and wordPos [number].
|
|
369
|
-
*/
|
|
370
|
-
Trix.prototype._parseHitString = function (line) {
|
|
371
|
-
var e_4, _a;
|
|
372
|
-
var arr = [];
|
|
373
|
-
var _b = __read(line.split(' ')), term = _b[0], parts = _b.slice(1); // skip term
|
|
374
|
-
try {
|
|
375
|
-
// Each result is of format: "{itemId},{wordPos}"
|
|
376
|
-
// Parse the entire line of these and return
|
|
377
|
-
for (var parts_1 = __values(parts), parts_1_1 = parts_1.next(); !parts_1_1.done; parts_1_1 = parts_1.next()) {
|
|
378
|
-
var part = parts_1_1.value;
|
|
379
|
-
var pair = part.split(',');
|
|
380
|
-
if (pair.length === 2) {
|
|
381
|
-
var itemId = pair[0];
|
|
382
|
-
var wordPos = Number.parseInt(pair[1]);
|
|
383
|
-
if (typeof wordPos !== 'number' || isNaN(wordPos))
|
|
384
|
-
throw new Error("Error in ix index format at term " + itemId + " for word " + parts[0]);
|
|
385
|
-
arr.push(term + "," + itemId);
|
|
386
|
-
}
|
|
387
|
-
else if (pair.length > 1) {
|
|
388
|
-
throw new Error("Error in ix index format at word " + parts[0]);
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
catch (e_4_1) { e_4 = { error: e_4_1 }; }
|
|
393
|
-
finally {
|
|
394
|
-
try {
|
|
395
|
-
if (parts_1_1 && !parts_1_1.done && (_a = parts_1.return)) _a.call(parts_1);
|
|
396
|
-
}
|
|
397
|
-
finally { if (e_4) throw e_4.error; }
|
|
398
|
-
}
|
|
399
|
-
return arr;
|
|
400
|
-
};
|
|
401
|
-
/**
|
|
402
|
-
* Parses ixx file and constructs a map of {word: ixFileLocation}
|
|
403
|
-
*
|
|
404
|
-
* @param ixxFile [anyFile] second level index that is produced by ixIxx.
|
|
405
|
-
* @returns a ParsedIxx map.
|
|
406
|
-
*/
|
|
407
|
-
Trix.prototype._parseIxx = function (ixxFile) {
|
|
256
|
+
Trix.prototype._parseIxx = function (ixxFile, opts) {
|
|
408
257
|
return __awaiter(this, void 0, void 0, function () {
|
|
409
|
-
var file
|
|
258
|
+
var file;
|
|
410
259
|
return __generator(this, function (_a) {
|
|
411
260
|
switch (_a.label) {
|
|
412
|
-
case 0: return [4 /*yield*/, ixxFile.readFile('utf8')];
|
|
261
|
+
case 0: return [4 /*yield*/, ixxFile.readFile(__assign({ encoding: 'utf8' }, opts))];
|
|
413
262
|
case 1:
|
|
414
263
|
file = (_a.sent());
|
|
415
|
-
|
|
416
|
-
|
|
264
|
+
return [2 /*return*/, new Map(file
|
|
265
|
+
.split('\n')
|
|
417
266
|
.filter(function (f) { return !!f; })
|
|
418
267
|
.map(function (line) {
|
|
419
|
-
var prefix = line.
|
|
420
|
-
var posStr = line.
|
|
268
|
+
var prefix = line.slice(0, trixPrefixSize);
|
|
269
|
+
var posStr = line.slice(trixPrefixSize);
|
|
421
270
|
var pos = Number.parseInt(posStr, 16);
|
|
422
271
|
return [prefix, pos];
|
|
423
272
|
}))];
|
|
@@ -428,3 +277,4 @@ var Trix = /** @class */ (function () {
|
|
|
428
277
|
return Trix;
|
|
429
278
|
}());
|
|
430
279
|
exports.default = Trix;
|
|
280
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,IAAM,cAAc,GAAG,CAAC,CAAA;AAExB,IAAM,SAAS,GAAG,KAAK,CAAA;AAEvB,8CAA8C;AAC9C,2DAA2D;AAC3D;IAKE,cACE,OAA0B,EAC1B,MAAyB,EACzB,UAAe;QAAf,2BAAA,EAAA,eAAe;QAEf,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;QACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAA;IAC9B,CAAC;IAEK,qBAAM,GAAZ,UAAa,YAAoB,EAAE,IAA+B;;;;;;wBAC5D,SAAS,GAAG,EAAgB,CAAA;wBAC1B,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;4CAClC,CAAC;;;;;wCACF,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;wCAC3C,IAAI,GAAG,KAAK,CAAA;wCACJ,qBAAM,OAAK,UAAU,CAAC,UAAU,EAAE,IAAI,CAAC,EAAA;;wCAA7C,GAAG,GAAG,SAAuC;wCAC/C,OAAO,SAAA,CAAA;;;;;;wDAGH,UAAU,GAAa,GAAG,WAAhB,EAAE,MAAM,GAAK,GAAG,OAAR,CAAQ;wDAC5B,cAAc,GAAG,KAAK,CAAA;wDACpB,GAAG,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;wDAIvB,KAAK,GAAG,GAAG;6DACd,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;6DAC/B,KAAK,CAAC,IAAI,CAAC;6DACX,MAAM,CAAC,UAAA,CAAC,IAAI,OAAA,CAAC,CAAC,CAAC,EAAH,CAAG,CAAC,CAAA;wDAEb,IAAI,GAAG,KAAK;6DACf,MAAM,CAAC,UAAA,IAAI;4DACV,IAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;4DAC/B,IAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,CAAA;4DAC3C,IAAI,CAAC,cAAc,IAAI,KAAK,EAAE;gEAC5B,cAAc,GAAG,IAAI,CAAA;6DACtB;iEAAM,IAAI,cAAc,IAAI,CAAC,KAAK,EAAE;gEACnC,IAAI,GAAG,IAAI,CAAA;6DACZ;iEAAM,IAAI,IAAI,GAAG,YAAY,EAAE;gEAC9B,IAAI,GAAG,IAAI,CAAA;6DACZ;4DACD,OAAO,KAAK,CAAA;wDACd,CAAC,CAAC;6DACD,GAAG,CAAC,UAAA,IAAI;4DACD,IAAA,KAAA,OAAmB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA,EAAjC,IAAI,QAAA,EAAK,KAAK,cAAmB,CAAA;4DACxC,OAAO,KAAK,CAAC,GAAG,CAAC,UAAA,GAAG,IAAI,OAAA,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAzB,CAAyB,CAAC,CAAA;wDACpD,CAAC,CAAC;6DACD,IAAI,EAAwB,CAAA;wDAE/B,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;4DAChB,IAAI,GAAG,IAAI,CAAA;yDACZ;wDACD,IAAI,OAAO,KAAK,IAAI,CAAC,MAAM,EAAE;4DAC3B,IAAI,GAAG,IAAI,CAAA;yDACZ;6DAEG,CAAA,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,OAAK,UAAU,IAAI,CAAC,IAAI,CAAA,EAAzD,wBAAyD;wDAC/C,qBAAM,OAAK,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,EACvB,CAAC,EACD,SAAS,EACT,UAAU,EACV,IAAI,CACL;4DAED,+BAA+B;0DAF9B;;wDANK,QAAM,SAMX;wDAED,+BAA+B;wDAC/B,IAAI,CAAC,KAAG,CAAC,SAAS,EAAE;4DAClB,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;;yDAEnC;wDACD,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,KAAG,CAAC,MAAM,CAAC,CAAC,CAAA;wDAC5C,UAAU,IAAI,SAAS,CAAA;wDACvB,OAAO,GAAG,IAAI,CAAC,MAAM,CAAA;;;wDAChB,IAAI,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,OAAK,UAAU,IAAI,IAAI,EAAE;4DACpE,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;;yDAEnC;;;;;;;;6CA1DI,CAAA,GAAG,IAAI,CAAC,IAAI,CAAA;;;;;;;;;;;;wBANZ,CAAC,GAAG,CAAC;;;6BAAE,CAAA,CAAC,GAAG,WAAW,CAAC,MAAM,CAAA;sDAA7B,CAAC;;;;;wBAA8B,CAAC,EAAE,CAAA;;4BAoE3C,sBAAO,yBAAI,SAAS,UAAE,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,EAAA;;;;KAChD;IAEO,uBAAQ,GAAhB,UAAiB,IAA+B;QAC9C,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;IAC3C,CAAC;IAEa,yBAAU,GAAxB,UACE,UAAkB,EAClB,IAA+B;;;;;;;wBAE3B,YAAY,GAAG,CAAC,CAAA;wBAChB,UAAU,GAAG,CAAC,CAAC,CAAA;wBACH,qBAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAA;;wBAAnC,OAAO,GAAG,SAAyB;;4BACzC,KAA2B,YAAA,SAAA,OAAO,CAAA,qFAAE;gCAAzB,KAAA,4BAAY,EAAX,GAAG,QAAA,EAAE,KAAK,QAAA;gCACd,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAA;gCAClD,IAAI,UAAU,IAAI,UAAU,EAAE;oCAC5B,MAAK;iCACN;qCAAM;oCACL,YAAY,GAAG,KAAK,CAAA;oCACpB,UAAU,GAAG,KAAK,GAAG,KAAK,CAAA;iCAC3B;6BACF;;;;;;;;;wBAGK,GAAG,GAAG,UAAU,GAAG,YAAY,CAAA;wBACrC,IAAI,GAAG,GAAG,CAAC,EAAE;4BACX,sBAAO,SAAS,EAAA;yBACjB;wBACW,qBAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,EACjB,CAAC,EACD,GAAG,EACH,YAAY,EACZ,IAAI,CACL,EAAA;;wBANK,GAAG,GAAG,SAMX;wBACD,4CACK,GAAG,KACN,UAAU,YAAA,KACX;;;;KACF;IAEa,wBAAS,GAAvB,UACE,OAA0B,EAC1B,IAA+B;;;;;4BAEjB,qBAAM,OAAO,CAAC,QAAQ,YAClC,QAAQ,EAAE,MAAM,IACb,IAAI,EACP,EAAA;;wBAHI,IAAI,GAAG,CAAC,SAGZ,CAAW;wBACb,sBAAO,IAAI,GAAG,CACZ,IAAI;iCACD,KAAK,CAAC,IAAI,CAAC;iCACX,MAAM,CAAC,UAAA,CAAC,IAAI,OAAA,CAAC,CAAC,CAAC,EAAH,CAAG,CAAC;iCAChB,GAAG,CAAC,UAAA,IAAI;gCACP,IAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAA;gCAC5C,IAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAA;gCACzC,IAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;gCACvC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;4BACtB,CAAC,CAAC,CACL,EAAA;;;;KACF;IACH,WAAC;AAAD,CAAC,AApJD,IAoJC"}
|
package/esm/index.d.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { GenericFilehandle } from 'generic-filehandle';
|
|
2
|
+
export default class Trix {
|
|
3
|
+
private ixFile;
|
|
4
|
+
private ixxFile;
|
|
5
|
+
maxResults: number;
|
|
6
|
+
constructor(ixxFile: GenericFilehandle, ixFile: GenericFilehandle, maxResults?: number);
|
|
7
|
+
search(searchString: string, opts?: {
|
|
8
|
+
signal?: AbortSignal;
|
|
9
|
+
}): Promise<[string, string][]>;
|
|
10
|
+
private getIndex;
|
|
11
|
+
private _getBuffer;
|
|
12
|
+
}
|
package/esm/index.js
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const TRIX_PREFIX_SIZE = 5;
|
|
4
|
+
const CHUNK_SIZE = 65536;
|
|
5
|
+
// https://stackoverflow.com/a/9229821/2129219
|
|
6
|
+
function uniqBy(a, key) {
|
|
7
|
+
let seen = new Set();
|
|
8
|
+
return a.filter(item => {
|
|
9
|
+
let k = key(item);
|
|
10
|
+
return seen.has(k) ? false : seen.add(k);
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
class Trix {
|
|
14
|
+
constructor(ixxFile, ixFile, maxResults = 20) {
|
|
15
|
+
this.ixFile = ixFile;
|
|
16
|
+
this.ixxFile = ixxFile;
|
|
17
|
+
this.maxResults = maxResults;
|
|
18
|
+
}
|
|
19
|
+
async search(searchString, opts) {
|
|
20
|
+
let resultArr = [];
|
|
21
|
+
const searchWords = searchString.split(' ');
|
|
22
|
+
// we only search one word at a time
|
|
23
|
+
const searchWord = searchWords[0].toLowerCase();
|
|
24
|
+
const res = await this._getBuffer(searchWord, opts);
|
|
25
|
+
if (!res) {
|
|
26
|
+
return [];
|
|
27
|
+
}
|
|
28
|
+
let { seekPosEnd, buffer } = res;
|
|
29
|
+
let done = false;
|
|
30
|
+
while (!done) {
|
|
31
|
+
let foundSomething = false;
|
|
32
|
+
const str = buffer.toString();
|
|
33
|
+
// slice to lastIndexOf('\n') to make sure we get complete records
|
|
34
|
+
// since the buffer fetch could get halfway into a record
|
|
35
|
+
const lines = str
|
|
36
|
+
.slice(0, str.lastIndexOf('\n'))
|
|
37
|
+
.split('\n')
|
|
38
|
+
.filter(f => !!f);
|
|
39
|
+
const hits = lines
|
|
40
|
+
// eslint-disable-next-line @typescript-eslint/no-loop-func
|
|
41
|
+
.filter(line => {
|
|
42
|
+
const word = line.split(' ')[0];
|
|
43
|
+
const match = word.startsWith(searchWord);
|
|
44
|
+
if (!foundSomething && match) {
|
|
45
|
+
foundSomething = true;
|
|
46
|
+
}
|
|
47
|
+
// we are done scanning if we are lexicographically greater than the
|
|
48
|
+
// search string
|
|
49
|
+
if (word > searchWord) {
|
|
50
|
+
done = true;
|
|
51
|
+
}
|
|
52
|
+
return match;
|
|
53
|
+
})
|
|
54
|
+
.map(line => {
|
|
55
|
+
const [term, ...parts] = line.split(' ');
|
|
56
|
+
return parts.map(elt => [term, elt.split(',')[0]]);
|
|
57
|
+
})
|
|
58
|
+
.flat();
|
|
59
|
+
// if we are not done, and we haven't filled up maxResults with hits yet,
|
|
60
|
+
// then refetch
|
|
61
|
+
if (resultArr.length + hits.length < this.maxResults && !done) {
|
|
62
|
+
// eslint-disable-next-line no-await-in-loop
|
|
63
|
+
const res2 = await this.ixFile.read(Buffer.alloc(CHUNK_SIZE), 0, CHUNK_SIZE, seekPosEnd, opts);
|
|
64
|
+
// early break if empty response
|
|
65
|
+
if (!res2.bytesRead) {
|
|
66
|
+
resultArr = resultArr.concat(hits);
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
buffer = Buffer.concat([buffer, res2.buffer]);
|
|
70
|
+
seekPosEnd += CHUNK_SIZE;
|
|
71
|
+
}
|
|
72
|
+
// if we have filled up the hits, or we are detected to be done via the
|
|
73
|
+
// filtering, then return
|
|
74
|
+
else if (resultArr.length + hits.length >= this.maxResults || done) {
|
|
75
|
+
resultArr = resultArr.concat(hits);
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// deduplicate results based on the detail column (resultArr[1])
|
|
80
|
+
return uniqBy(resultArr, elt => elt[1]).slice(0, this.maxResults);
|
|
81
|
+
}
|
|
82
|
+
async getIndex(opts) {
|
|
83
|
+
const file = await this.ixxFile.readFile({
|
|
84
|
+
encoding: 'utf8',
|
|
85
|
+
...opts,
|
|
86
|
+
});
|
|
87
|
+
return file
|
|
88
|
+
.split('\n')
|
|
89
|
+
.filter(f => !!f)
|
|
90
|
+
.map(line => {
|
|
91
|
+
const prefix = line.slice(0, TRIX_PREFIX_SIZE);
|
|
92
|
+
const posStr = line.slice(TRIX_PREFIX_SIZE);
|
|
93
|
+
const pos = Number.parseInt(posStr, 16);
|
|
94
|
+
return [prefix, pos];
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
async _getBuffer(searchWord, opts) {
|
|
98
|
+
let seekPosStart = 0;
|
|
99
|
+
let seekPosEnd = -1;
|
|
100
|
+
const indexes = await this.getIndex(opts);
|
|
101
|
+
indexes.forEach(([key, value]) => {
|
|
102
|
+
const trimmedKey = key.slice(0, searchWord.length);
|
|
103
|
+
if (trimmedKey < searchWord) {
|
|
104
|
+
seekPosStart = value;
|
|
105
|
+
seekPosEnd = value + 65536;
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
// Return the buffer and its end position in the file.
|
|
109
|
+
const len = seekPosEnd - seekPosStart;
|
|
110
|
+
if (len < 0) {
|
|
111
|
+
return undefined;
|
|
112
|
+
}
|
|
113
|
+
const res = await this.ixFile.read(Buffer.alloc(len), 0, len, seekPosStart, opts);
|
|
114
|
+
return {
|
|
115
|
+
...res,
|
|
116
|
+
seekPosEnd,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
exports.default = Trix;
|
package/esm/index.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;AAEA,MAAM,gBAAgB,GAAG,CAAC,CAAA;AAE1B,MAAM,UAAU,GAAG,KAAK,CAAA;AAExB,8CAA8C;AAC9C,SAAS,MAAM,CAAC,CAAqB,EAAE,GAAsC;IAC3E,IAAI,IAAI,GAAG,IAAI,GAAG,EAAE,CAAA;IACpB,OAAO,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;QACrB,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAA;QACjB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;IAC1C,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,MAAqB,IAAI;IAOvB,YACE,OAA0B,EAC1B,MAAyB,EACzB,UAAU,GAAG,EAAE;QAEf,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;QACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAA;IAC9B,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,YAAoB,EAAE,IAA+B;QAChE,IAAI,SAAS,GAAG,EAAwB,CAAA;QACxC,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAE3C,oCAAoC;QACpC,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;QAC/C,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,IAAI,CAAC,CAAA;QACnD,IAAI,CAAC,GAAG,EAAE;YACR,OAAO,EAAE,CAAA;SACV;QAED,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,GAAG,CAAA;QAChC,IAAI,IAAI,GAAG,KAAK,CAAA;QAChB,OAAO,CAAC,IAAI,EAAE;YACZ,IAAI,cAAc,GAAG,KAAK,CAAA;YAC1B,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;YAE7B,kEAAkE;YAClE,yDAAyD;YACzD,MAAM,KAAK,GAAG,GAAG;iBACd,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;iBAC/B,KAAK,CAAC,IAAI,CAAC;iBACX,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YAEnB,MAAM,IAAI,GAAG,KAAK;gBAChB,2DAA2D;iBAC1D,MAAM,CAAC,IAAI,CAAC,EAAE;gBACb,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;gBAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,CAAA;gBACzC,IAAI,CAAC,cAAc,IAAI,KAAK,EAAE;oBAC5B,cAAc,GAAG,IAAI,CAAA;iBACtB;gBAED,oEAAoE;gBACpE,gBAAgB;gBAChB,IAAI,IAAI,GAAG,UAAU,EAAE;oBACrB,IAAI,GAAG,IAAI,CAAA;iBACZ;gBACD,OAAO,KAAK,CAAA;YACd,CAAC,CAAC;iBACD,GAAG,CAAC,IAAI,CAAC,EAAE;gBACV,MAAM,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;gBACxC,OAAO,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YACpD,CAAC,CAAC;iBACD,IAAI,EAAwB,CAAA;YAE/B,yEAAyE;YACzE,eAAe;YACf,IAAI,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,EAAE;gBAC7D,4CAA4C;gBAC5C,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CACjC,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,EACxB,CAAC,EACD,UAAU,EACV,UAAU,EACV,IAAI,CACL,CAAA;gBAED,gCAAgC;gBAChC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE;oBACnB,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;oBAClC,MAAK;iBACN;gBACD,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAA;gBAC7C,UAAU,IAAI,UAAU,CAAA;aACzB;YAED,uEAAuE;YACvE,yBAAyB;iBACpB,IAAI,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,EAAE;gBAClE,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;gBAClC,MAAK;aACN;SACF;QAED,gEAAgE;QAChE,OAAO,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAA;IACnE,CAAC;IAEO,KAAK,CAAC,QAAQ,CAAC,IAA+B;QACpD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC;YACvC,QAAQ,EAAE,MAAM;YAChB,GAAG,IAAI;SACR,CAAC,CAAA;QACF,OAAO,IAAI;aACR,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;aAChB,GAAG,CAAC,IAAI,CAAC,EAAE;YACV,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC,CAAA;YAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAA;YAC3C,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;YACvC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAqB,CAAA;QAC1C,CAAC,CAAC,CAAA;IACN,CAAC;IAEO,KAAK,CAAC,UAAU,CACtB,UAAkB,EAClB,IAA+B;QAE/B,IAAI,YAAY,GAAG,CAAC,CAAA;QACpB,IAAI,UAAU,GAAG,CAAC,CAAC,CAAA;QACnB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;QACzC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;YAC/B,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAA;YAClD,IAAI,UAAU,GAAG,UAAU,EAAE;gBAC3B,YAAY,GAAG,KAAK,CAAA;gBACpB,UAAU,GAAG,KAAK,GAAG,KAAK,CAAA;aAC3B;QACH,CAAC,CAAC,CAAA;QAEF,sDAAsD;QACtD,MAAM,GAAG,GAAG,UAAU,GAAG,YAAY,CAAA;QACrC,IAAI,GAAG,GAAG,CAAC,EAAE;YACX,OAAO,SAAS,CAAA;SACjB;QACD,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,EACjB,CAAC,EACD,GAAG,EACH,YAAY,EACZ,IAAI,CACL,CAAA;QACD,OAAO;YACL,GAAG,GAAG;YACN,UAAU;SACX,CAAA;IACH,CAAC;CACF;AAhJD,uBAgJC"}
|
package/package.json
CHANGED
|
@@ -1,33 +1,41 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "
|
|
2
|
+
"version": "2.0.0",
|
|
3
3
|
"license": "Apache-2.0",
|
|
4
4
|
"main": "dist/index.js",
|
|
5
|
+
"module": "esm/index.js",
|
|
5
6
|
"files": [
|
|
6
|
-
"dist"
|
|
7
|
+
"dist",
|
|
8
|
+
"esm"
|
|
7
9
|
],
|
|
8
10
|
"engines": {
|
|
9
11
|
"node": ">=10"
|
|
10
12
|
},
|
|
11
13
|
"scripts": {
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
+
"lint": "eslint --report-unused-disable-directives --max-warnings 0 --ext .js,.ts src ",
|
|
15
|
+
"prebuild": "rimraf dist esm",
|
|
16
|
+
"build:esm": "tsc --target es2018 --outDir esm",
|
|
17
|
+
"build:es5": "tsc --target es5 --outDir dist",
|
|
18
|
+
"build": "npm run build:esm && npm run build:es5",
|
|
14
19
|
"postversion": "git push --follow-tags",
|
|
15
20
|
"test": "jest"
|
|
16
21
|
},
|
|
17
|
-
"prettier": {
|
|
18
|
-
"printWidth": 80,
|
|
19
|
-
"semi": true,
|
|
20
|
-
"singleQuote": true,
|
|
21
|
-
"trailingComma": "es5"
|
|
22
|
-
},
|
|
23
22
|
"name": "@gmod/trix",
|
|
24
23
|
"author": "Matt Morgan",
|
|
25
24
|
"repository": "GMOD/trix-js",
|
|
26
25
|
"devDependencies": {
|
|
27
|
-
"@types/jest": "^
|
|
28
|
-
"@types/node": "^
|
|
26
|
+
"@types/jest": "^27.0.3",
|
|
27
|
+
"@types/node": "^16.11.13",
|
|
28
|
+
"@typescript-eslint/eslint-plugin": "^5.7.0",
|
|
29
|
+
"@typescript-eslint/parser": "^5.7.0",
|
|
30
|
+
"eslint": "^7.0.0",
|
|
31
|
+
"eslint-config-airbnb-base": "^15.0.0",
|
|
32
|
+
"eslint-config-airbnb-typescript": "^16.1.0",
|
|
33
|
+
"eslint-config-prettier": "^8.3.0",
|
|
34
|
+
"eslint-plugin-import": "^2.25.3",
|
|
35
|
+
"eslint-plugin-prettier": "^4.0.0",
|
|
29
36
|
"generic-filehandle": "^2.1.0",
|
|
30
37
|
"jest": "^27.0.6",
|
|
38
|
+
"prettier": "^2.5.1",
|
|
31
39
|
"rimraf": "^3.0.2",
|
|
32
40
|
"ts-jest": "^27.0.4",
|
|
33
41
|
"typescript": "^4.3.5"
|