@gmod/trix 0.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +35 -36
- package/dist/index.d.ts +13 -63
- package/dist/index.js +280 -8
- package/dist/index.js.map +1 -0
- package/esm/index.d.ts +12 -0
- package/esm/index.js +120 -0
- package/esm/index.js.map +1 -0
- package/package.json +25 -40
- package/dist/trix.cjs.development.js +0 -1471
- package/dist/trix.cjs.development.js.map +0 -1
- package/dist/trix.cjs.production.min.js +0 -2
- package/dist/trix.cjs.production.min.js.map +0 -1
- package/dist/trix.esm.js +0 -1467
- package/dist/trix.esm.js.map +0 -1
- package/src/index.ts +0 -341
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
- Fix issue with infinite loop
|
|
2
|
+
- Add abortsignal support
|
|
3
|
+
- Only query first word when string with multiple words is entered
|
|
4
|
+
|
|
5
|
+
# v1.0.0
|
|
6
|
+
|
|
7
|
+
- Change result format from just the "result" string returned to be "term,result"
|
|
8
|
+
|
|
9
|
+
# v0.2.1
|
|
10
|
+
|
|
11
|
+
- Fix error when identifiers contain commas
|
|
12
|
+
|
|
13
|
+
# v0.2.0
|
|
14
|
+
|
|
15
|
+
- Improve performance of fetches with sequential chunk parsing
|
|
16
|
+
|
|
17
|
+
# v0.1.1
|
|
18
|
+
|
|
19
|
+
- Initial release
|
package/README.md
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
[](https://github.com/GMOD/trix-js/actions?query=branch%3Amain+workflow%3APush+)
|
|
2
|
+
|
|
1
3
|
# trix-js
|
|
4
|
+
|
|
2
5
|
Read UCSC Trix indexes in pure JavaScript
|
|
3
6
|
|
|
4
7
|
## Usage
|
|
@@ -9,37 +12,43 @@ import { RemoteFile } from 'generic-filehandle'
|
|
|
9
12
|
|
|
10
13
|
// any filehandle object that supports the Nodejs FileHandle API will work.
|
|
11
14
|
// We use generic-filehandle here to demonstrate searching files on remote servers.
|
|
12
|
-
const ixxFile = new RemoteFile(
|
|
13
|
-
|
|
15
|
+
const ixxFile = new RemoteFile(
|
|
16
|
+
'https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ixx',
|
|
17
|
+
)
|
|
18
|
+
const ixFile = new RemoteFile(
|
|
19
|
+
'https://hgdownload.soe.ucsc.edu/gbdb/hg38/knownGene.ix',
|
|
20
|
+
)
|
|
14
21
|
|
|
15
|
-
const trix = new Trix(ixxFile, ixFile)
|
|
22
|
+
const trix = new Trix(ixxFile, ixFile)
|
|
16
23
|
|
|
17
24
|
async function doStuff() {
|
|
18
|
-
const results = await trix.search('oca')
|
|
19
|
-
console.log(results)
|
|
25
|
+
const results = await trix.search('oca')
|
|
26
|
+
console.log(results)
|
|
20
27
|
}
|
|
21
|
-
doStuff()
|
|
22
|
-
|
|
28
|
+
doStuff()
|
|
23
29
|
```
|
|
24
30
|
|
|
25
31
|
## Documentation
|
|
32
|
+
|
|
26
33
|
### Trix constructor
|
|
34
|
+
|
|
27
35
|
The Trix class constructor accepts arguments:
|
|
36
|
+
|
|
28
37
|
- `ixxFile` - a filehandle object for the trix .ixx file
|
|
29
38
|
- `ixFile` - a filehandle object for the trix .ix file
|
|
30
39
|
- `maxResults = 20` - an optional number specifying the maximum number of results to return on `trix.search()`
|
|
31
40
|
|
|
32
|
-
|
|
33
41
|
### Trix search
|
|
42
|
+
|
|
34
43
|
**Search the index files for a term and find its keys.**<br>
|
|
35
44
|
**In the case of searching with multiple words, `trix.search()` finds the intersection of the result sets.**<br>
|
|
36
45
|
The Trix search function accepts argument:
|
|
46
|
+
|
|
37
47
|
- `searchString` - a string of space-separated words for what to search the index file and find keys for<br>
|
|
38
|
-
|
|
48
|
+
|
|
39
49
|
The Trix search function returns: <br>
|
|
40
|
-
- `Promise<string[]>` - a promised array of strings where each string is an itemId result
|
|
41
|
-
|
|
42
50
|
|
|
51
|
+
- `Promise<[term,result][] as [string,string][]>` - an array of [term, result] pairs where each term is the left column in the trix and the right column is the trix match
|
|
43
52
|
|
|
44
53
|
## Examples
|
|
45
54
|
|
|
@@ -47,49 +56,37 @@ The Trix search function returns: <br>
|
|
|
47
56
|
import { LocalFile } from 'generic-filehandle'
|
|
48
57
|
import Trix from '@gmod/trix'
|
|
49
58
|
|
|
50
|
-
const ixxFile = new LocalFile('out.ixx')
|
|
51
|
-
const ixFile = new LocalFile('out.ix')
|
|
59
|
+
const ixxFile = new LocalFile('out.ixx')
|
|
60
|
+
const ixFile = new LocalFile('out.ix')
|
|
52
61
|
|
|
53
62
|
// limit maxResults to 5
|
|
54
|
-
const trix = new Trix(ixxFile, ixFile, 5)
|
|
63
|
+
const trix = new Trix(ixxFile, ixFile, 5)
|
|
55
64
|
|
|
56
65
|
async function doStuff() {
|
|
57
|
-
const results1 = await trix.search('herc')
|
|
58
|
-
console.log(results1)
|
|
66
|
+
const results1 = await trix.search('herc')
|
|
67
|
+
console.log(results1)
|
|
59
68
|
|
|
60
69
|
// increase maxResults to 30
|
|
61
|
-
trix.maxResults = 30
|
|
70
|
+
trix.maxResults = 30
|
|
62
71
|
|
|
63
|
-
const results2 = await trix.search('linc')
|
|
64
|
-
console.log(results2)
|
|
72
|
+
const results2 = await trix.search('linc')
|
|
73
|
+
console.log(results2)
|
|
65
74
|
}
|
|
66
75
|
|
|
67
|
-
doStuff()
|
|
76
|
+
doStuff()
|
|
68
77
|
```
|
|
69
|
-
<br><br>
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
78
|
|
|
79
|
+
<br><br>
|
|
84
80
|
|
|
85
81
|
## Development
|
|
86
82
|
|
|
87
|
-
|
|
88
83
|
### Test trix-js
|
|
84
|
+
|
|
89
85
|
First, clone this repo and install npm packages. <br>
|
|
90
86
|
Then, run `npm test`. <br>
|
|
91
87
|
|
|
92
88
|
### Test the USCS TrixSearch - Requires Linux
|
|
89
|
+
|
|
93
90
|
First, clone this repo.
|
|
94
91
|
To run test searches on a track hub using the USCS `TrixSearch`, navigate to `tests/testdata/test#` and run `bash test#script.sh` where # is the test number.
|
|
95
92
|
To change search terms, edit `searchterms.txt`.
|
|
@@ -97,7 +94,9 @@ To change search terms, edit `searchterms.txt`.
|
|
|
97
94
|
**Wondering what to search for?**<br>
|
|
98
95
|
Open up `tests/testdata/test#/input.txt`.
|
|
99
96
|
|
|
100
|
-
|
|
101
97
|
**How to test my own .gff.gz data?**<br>
|
|
102
98
|
Navigate to `/test/rawGenomes` and create a directory with your .gff.gz file in it. From within that directory, run `bash ../../programs/gff3ToInput.sh <.gff3.gz FILE> <OUTPUT NAME>`.
|
|
103
99
|
|
|
100
|
+
## Reference
|
|
101
|
+
|
|
102
|
+
See https://genome.ucsc.edu/goldenPath/help/trix.html for basic concepts of trix and https://github.com/GMOD/ixixx-js for a javascript implementation of the ixIxx command
|
package/dist/index.d.ts
CHANGED
|
@@ -1,63 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
constructor(ixxFile: AnyFile, ixFile: AnyFile, maxResults?: number);
|
|
15
|
-
/**
|
|
16
|
-
* Search trix for the given searchWord(s). Return up to {this.maxResults} results.
|
|
17
|
-
* This method matches each index prefix against each searchWord. It does not do fuzzy matching.
|
|
18
|
-
*
|
|
19
|
-
* @param searchString [string] term(s) separated by spaces to search for id(s).
|
|
20
|
-
* @returns results [Array<string>] where each string is a corresponding itemId.
|
|
21
|
-
*/
|
|
22
|
-
search(searchString: string): Promise<string[]>;
|
|
23
|
-
/**
|
|
24
|
-
* Seek ahead to the correct position in the .ix file,
|
|
25
|
-
* then load that chunk of .ix into a buffer.
|
|
26
|
-
*
|
|
27
|
-
* @param searchWord [string]
|
|
28
|
-
* @returns a Buffer holding the sections we want to search.
|
|
29
|
-
*/
|
|
30
|
-
private _getBuffer;
|
|
31
|
-
/**
|
|
32
|
-
* Given the end position of the last buffer,
|
|
33
|
-
* load the next chunk of .ix data into a buffer and return it.
|
|
34
|
-
*
|
|
35
|
-
* @param seekPosStart [number] where to start loading data into the new buffer.
|
|
36
|
-
* @returns a Buffer holding the chunk we want to search.
|
|
37
|
-
*/
|
|
38
|
-
private _getNextChunk;
|
|
39
|
-
/**
|
|
40
|
-
* Create and return a buffer given the start and end position
|
|
41
|
-
* of what to load from the .ix file.
|
|
42
|
-
*
|
|
43
|
-
* @param seekPosStart [number] byte the buffer should start reading from file.
|
|
44
|
-
* @param seekPosEnd [number] byte the buffer should stop reading from file.
|
|
45
|
-
* @returns a Buffer holding the chunk of data.
|
|
46
|
-
*/
|
|
47
|
-
private _createBuffer;
|
|
48
|
-
/**
|
|
49
|
-
* Takes in a hit string and returns an array of result terms.
|
|
50
|
-
*
|
|
51
|
-
* @param line [string] The line of .ix that is a hit.
|
|
52
|
-
* @returns results [Array<hit>]. Each hit contains the itemId [string], and wordPos [number].
|
|
53
|
-
*/
|
|
54
|
-
private _parseHitString;
|
|
55
|
-
/**
|
|
56
|
-
* Parses ixx file and constructs a map of {word: ixFileLocation}
|
|
57
|
-
*
|
|
58
|
-
* @param ixxFile [anyFile] second level index that is produced by ixIxx.
|
|
59
|
-
* @returns a ParsedIxx map.
|
|
60
|
-
*/
|
|
61
|
-
private _parseIxx;
|
|
62
|
-
}
|
|
63
|
-
export {};
|
|
1
|
+
import type { GenericFilehandle } from 'generic-filehandle';
|
|
2
|
+
export default class Trix {
|
|
3
|
+
private ixFile;
|
|
4
|
+
private ixxFile;
|
|
5
|
+
maxResults: number;
|
|
6
|
+
constructor(ixxFile: GenericFilehandle, ixFile: GenericFilehandle, maxResults?: number);
|
|
7
|
+
search(searchString: string, opts?: {
|
|
8
|
+
signal?: AbortSignal;
|
|
9
|
+
}): Promise<string[][]>;
|
|
10
|
+
private getIndex;
|
|
11
|
+
private _getBuffer;
|
|
12
|
+
private _parseIxx;
|
|
13
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,280 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
}
|
|
1
|
+
"use strict";
|
|
2
|
+
var __assign = (this && this.__assign) || function () {
|
|
3
|
+
__assign = Object.assign || function(t) {
|
|
4
|
+
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
5
|
+
s = arguments[i];
|
|
6
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
|
|
7
|
+
t[p] = s[p];
|
|
8
|
+
}
|
|
9
|
+
return t;
|
|
10
|
+
};
|
|
11
|
+
return __assign.apply(this, arguments);
|
|
12
|
+
};
|
|
13
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
14
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
15
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
16
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
17
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
18
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
19
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
20
|
+
});
|
|
21
|
+
};
|
|
22
|
+
var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
23
|
+
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
|
|
24
|
+
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
|
|
25
|
+
function verb(n) { return function (v) { return step([n, v]); }; }
|
|
26
|
+
function step(op) {
|
|
27
|
+
if (f) throw new TypeError("Generator is already executing.");
|
|
28
|
+
while (_) try {
|
|
29
|
+
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
|
|
30
|
+
if (y = 0, t) op = [op[0] & 2, t.value];
|
|
31
|
+
switch (op[0]) {
|
|
32
|
+
case 0: case 1: t = op; break;
|
|
33
|
+
case 4: _.label++; return { value: op[1], done: false };
|
|
34
|
+
case 5: _.label++; y = op[1]; op = [0]; continue;
|
|
35
|
+
case 7: op = _.ops.pop(); _.trys.pop(); continue;
|
|
36
|
+
default:
|
|
37
|
+
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
|
|
38
|
+
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
|
|
39
|
+
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
|
|
40
|
+
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
|
|
41
|
+
if (t[2]) _.ops.pop();
|
|
42
|
+
_.trys.pop(); continue;
|
|
43
|
+
}
|
|
44
|
+
op = body.call(thisArg, _);
|
|
45
|
+
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
|
|
46
|
+
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
var __read = (this && this.__read) || function (o, n) {
|
|
50
|
+
var m = typeof Symbol === "function" && o[Symbol.iterator];
|
|
51
|
+
if (!m) return o;
|
|
52
|
+
var i = m.call(o), r, ar = [], e;
|
|
53
|
+
try {
|
|
54
|
+
while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
|
|
55
|
+
}
|
|
56
|
+
catch (error) { e = { error: error }; }
|
|
57
|
+
finally {
|
|
58
|
+
try {
|
|
59
|
+
if (r && !r.done && (m = i["return"])) m.call(i);
|
|
60
|
+
}
|
|
61
|
+
finally { if (e) throw e.error; }
|
|
62
|
+
}
|
|
63
|
+
return ar;
|
|
64
|
+
};
|
|
65
|
+
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
66
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
67
|
+
if (ar || !(i in from)) {
|
|
68
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
69
|
+
ar[i] = from[i];
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
73
|
+
};
|
|
74
|
+
var __values = (this && this.__values) || function(o) {
|
|
75
|
+
var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
|
|
76
|
+
if (m) return m.call(o);
|
|
77
|
+
if (o && typeof o.length === "number") return {
|
|
78
|
+
next: function () {
|
|
79
|
+
if (o && i >= o.length) o = void 0;
|
|
80
|
+
return { value: o && o[i++], done: !o };
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
|
|
84
|
+
};
|
|
85
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
86
|
+
var trixPrefixSize = 5;
|
|
87
|
+
var CHUNKSIZE = 65536;
|
|
88
|
+
// Define this object with .ixx and .ix files.
|
|
89
|
+
// Then use the trixSearch() function to search for a word.
|
|
90
|
+
var Trix = /** @class */ (function () {
|
|
91
|
+
function Trix(ixxFile, ixFile, maxResults) {
|
|
92
|
+
if (maxResults === void 0) { maxResults = 20; }
|
|
93
|
+
this.ixFile = ixFile;
|
|
94
|
+
this.ixxFile = ixxFile;
|
|
95
|
+
this.maxResults = maxResults;
|
|
96
|
+
}
|
|
97
|
+
Trix.prototype.search = function (searchString, opts) {
|
|
98
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
99
|
+
var resultArr, searchWords, _loop_1, this_1, w;
|
|
100
|
+
return __generator(this, function (_a) {
|
|
101
|
+
switch (_a.label) {
|
|
102
|
+
case 0:
|
|
103
|
+
resultArr = [];
|
|
104
|
+
searchWords = searchString.split(' ');
|
|
105
|
+
_loop_1 = function (w) {
|
|
106
|
+
var searchWord, done, res, prevLen, _loop_2, state_1;
|
|
107
|
+
return __generator(this, function (_b) {
|
|
108
|
+
switch (_b.label) {
|
|
109
|
+
case 0:
|
|
110
|
+
searchWord = searchWords[w].toLowerCase();
|
|
111
|
+
done = false;
|
|
112
|
+
return [4 /*yield*/, this_1._getBuffer(searchWord, opts)];
|
|
113
|
+
case 1:
|
|
114
|
+
res = _b.sent();
|
|
115
|
+
prevLen = void 0;
|
|
116
|
+
_loop_2 = function () {
|
|
117
|
+
var seekPosEnd, buffer, foundSomething, str, lines, hits, res_1;
|
|
118
|
+
return __generator(this, function (_c) {
|
|
119
|
+
switch (_c.label) {
|
|
120
|
+
case 0:
|
|
121
|
+
seekPosEnd = res.seekPosEnd, buffer = res.buffer;
|
|
122
|
+
foundSomething = false;
|
|
123
|
+
str = buffer.toString();
|
|
124
|
+
lines = str
|
|
125
|
+
.slice(0, str.lastIndexOf('\n'))
|
|
126
|
+
.split('\n')
|
|
127
|
+
.filter(function (f) { return !!f; });
|
|
128
|
+
hits = lines
|
|
129
|
+
.filter(function (line) {
|
|
130
|
+
var word = line.split(' ')[0];
|
|
131
|
+
var match = word.startsWith(searchString);
|
|
132
|
+
if (!foundSomething && match) {
|
|
133
|
+
foundSomething = true;
|
|
134
|
+
}
|
|
135
|
+
else if (foundSomething && !match) {
|
|
136
|
+
done = true;
|
|
137
|
+
}
|
|
138
|
+
else if (word > searchString) {
|
|
139
|
+
done = true;
|
|
140
|
+
}
|
|
141
|
+
return match;
|
|
142
|
+
})
|
|
143
|
+
.map(function (line) {
|
|
144
|
+
var _a = __read(line.split(' ')), term = _a[0], parts = _a.slice(1);
|
|
145
|
+
return parts.map(function (elt) { return [term, elt.split(',')[0]]; });
|
|
146
|
+
})
|
|
147
|
+
.flat();
|
|
148
|
+
if (!hits.length) {
|
|
149
|
+
done = true;
|
|
150
|
+
}
|
|
151
|
+
if (prevLen === hits.length) {
|
|
152
|
+
done = true;
|
|
153
|
+
}
|
|
154
|
+
if (!(resultArr.length + hits.length < this_1.maxResults && !done)) return [3 /*break*/, 2];
|
|
155
|
+
return [4 /*yield*/, this_1.ixFile.read(Buffer.alloc(CHUNKSIZE), 0, CHUNKSIZE, seekPosEnd, opts)
|
|
156
|
+
//early break if empty response
|
|
157
|
+
];
|
|
158
|
+
case 1:
|
|
159
|
+
res_1 = _c.sent();
|
|
160
|
+
//early break if empty response
|
|
161
|
+
if (!res_1.bytesRead) {
|
|
162
|
+
resultArr = resultArr.concat(hits);
|
|
163
|
+
return [2 /*return*/, "break"];
|
|
164
|
+
}
|
|
165
|
+
buffer = Buffer.concat([buffer, res_1.buffer]);
|
|
166
|
+
seekPosEnd += CHUNKSIZE;
|
|
167
|
+
prevLen = hits.length;
|
|
168
|
+
return [3 /*break*/, 3];
|
|
169
|
+
case 2:
|
|
170
|
+
if (resultArr.length + hits.length >= this_1.maxResults || done) {
|
|
171
|
+
resultArr = resultArr.concat(hits);
|
|
172
|
+
return [2 /*return*/, "break"];
|
|
173
|
+
}
|
|
174
|
+
_c.label = 3;
|
|
175
|
+
case 3: return [2 /*return*/];
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
};
|
|
179
|
+
_b.label = 2;
|
|
180
|
+
case 2:
|
|
181
|
+
if (!(res && !done)) return [3 /*break*/, 4];
|
|
182
|
+
return [5 /*yield**/, _loop_2()];
|
|
183
|
+
case 3:
|
|
184
|
+
state_1 = _b.sent();
|
|
185
|
+
if (state_1 === "break")
|
|
186
|
+
return [3 /*break*/, 4];
|
|
187
|
+
return [3 /*break*/, 2];
|
|
188
|
+
case 4: return [2 /*return*/];
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
};
|
|
192
|
+
this_1 = this;
|
|
193
|
+
w = 0;
|
|
194
|
+
_a.label = 1;
|
|
195
|
+
case 1:
|
|
196
|
+
if (!(w < searchWords.length)) return [3 /*break*/, 4];
|
|
197
|
+
return [5 /*yield**/, _loop_1(w)];
|
|
198
|
+
case 2:
|
|
199
|
+
_a.sent();
|
|
200
|
+
_a.label = 3;
|
|
201
|
+
case 3:
|
|
202
|
+
w++;
|
|
203
|
+
return [3 /*break*/, 1];
|
|
204
|
+
case 4: return [2 /*return*/, __spreadArray([], __read(resultArr), false).slice(0, this.maxResults)];
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
});
|
|
208
|
+
};
|
|
209
|
+
Trix.prototype.getIndex = function (opts) {
|
|
210
|
+
return this._parseIxx(this.ixxFile, opts);
|
|
211
|
+
};
|
|
212
|
+
Trix.prototype._getBuffer = function (searchWord, opts) {
|
|
213
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
214
|
+
var seekPosStart, seekPosEnd, indexes, indexes_1, indexes_1_1, _a, key, value, trimmedKey, len, res;
|
|
215
|
+
var e_1, _b;
|
|
216
|
+
return __generator(this, function (_c) {
|
|
217
|
+
switch (_c.label) {
|
|
218
|
+
case 0:
|
|
219
|
+
seekPosStart = 0;
|
|
220
|
+
seekPosEnd = -1;
|
|
221
|
+
return [4 /*yield*/, this.getIndex(opts)];
|
|
222
|
+
case 1:
|
|
223
|
+
indexes = _c.sent();
|
|
224
|
+
try {
|
|
225
|
+
for (indexes_1 = __values(indexes), indexes_1_1 = indexes_1.next(); !indexes_1_1.done; indexes_1_1 = indexes_1.next()) {
|
|
226
|
+
_a = __read(indexes_1_1.value, 2), key = _a[0], value = _a[1];
|
|
227
|
+
trimmedKey = key.slice(0, searchWord.length);
|
|
228
|
+
if (trimmedKey >= searchWord) {
|
|
229
|
+
break;
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
seekPosStart = value;
|
|
233
|
+
seekPosEnd = value + 65536;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
238
|
+
finally {
|
|
239
|
+
try {
|
|
240
|
+
if (indexes_1_1 && !indexes_1_1.done && (_b = indexes_1.return)) _b.call(indexes_1);
|
|
241
|
+
}
|
|
242
|
+
finally { if (e_1) throw e_1.error; }
|
|
243
|
+
}
|
|
244
|
+
len = seekPosEnd - seekPosStart;
|
|
245
|
+
if (len < 0) {
|
|
246
|
+
return [2 /*return*/, undefined];
|
|
247
|
+
}
|
|
248
|
+
return [4 /*yield*/, this.ixFile.read(Buffer.alloc(len), 0, len, seekPosStart, opts)];
|
|
249
|
+
case 2:
|
|
250
|
+
res = _c.sent();
|
|
251
|
+
return [2 /*return*/, __assign(__assign({}, res), { seekPosEnd: seekPosEnd })];
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
});
|
|
255
|
+
};
|
|
256
|
+
Trix.prototype._parseIxx = function (ixxFile, opts) {
|
|
257
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
258
|
+
var file;
|
|
259
|
+
return __generator(this, function (_a) {
|
|
260
|
+
switch (_a.label) {
|
|
261
|
+
case 0: return [4 /*yield*/, ixxFile.readFile(__assign({ encoding: 'utf8' }, opts))];
|
|
262
|
+
case 1:
|
|
263
|
+
file = (_a.sent());
|
|
264
|
+
return [2 /*return*/, new Map(file
|
|
265
|
+
.split('\n')
|
|
266
|
+
.filter(function (f) { return !!f; })
|
|
267
|
+
.map(function (line) {
|
|
268
|
+
var prefix = line.slice(0, trixPrefixSize);
|
|
269
|
+
var posStr = line.slice(trixPrefixSize);
|
|
270
|
+
var pos = Number.parseInt(posStr, 16);
|
|
271
|
+
return [prefix, pos];
|
|
272
|
+
}))];
|
|
273
|
+
}
|
|
274
|
+
});
|
|
275
|
+
});
|
|
276
|
+
};
|
|
277
|
+
return Trix;
|
|
278
|
+
}());
|
|
279
|
+
exports.default = Trix;
|
|
280
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,IAAM,cAAc,GAAG,CAAC,CAAA;AAExB,IAAM,SAAS,GAAG,KAAK,CAAA;AAEvB,8CAA8C;AAC9C,2DAA2D;AAC3D;IAKE,cACE,OAA0B,EAC1B,MAAyB,EACzB,UAAe;QAAf,2BAAA,EAAA,eAAe;QAEf,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;QACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAA;IAC9B,CAAC;IAEK,qBAAM,GAAZ,UAAa,YAAoB,EAAE,IAA+B;;;;;;wBAC5D,SAAS,GAAG,EAAgB,CAAA;wBAC1B,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;4CAClC,CAAC;;;;;wCACF,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;wCAC3C,IAAI,GAAG,KAAK,CAAA;wCACJ,qBAAM,OAAK,UAAU,CAAC,UAAU,EAAE,IAAI,CAAC,EAAA;;wCAA7C,GAAG,GAAG,SAAuC;wCAC/C,OAAO,SAAA,CAAA;;;;;;wDAGH,UAAU,GAAa,GAAG,WAAhB,EAAE,MAAM,GAAK,GAAG,OAAR,CAAQ;wDAC5B,cAAc,GAAG,KAAK,CAAA;wDACpB,GAAG,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;wDAIvB,KAAK,GAAG,GAAG;6DACd,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;6DAC/B,KAAK,CAAC,IAAI,CAAC;6DACX,MAAM,CAAC,UAAA,CAAC,IAAI,OAAA,CAAC,CAAC,CAAC,EAAH,CAAG,CAAC,CAAA;wDAEb,IAAI,GAAG,KAAK;6DACf,MAAM,CAAC,UAAA,IAAI;4DACV,IAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;4DAC/B,IAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,CAAA;4DAC3C,IAAI,CAAC,cAAc,IAAI,KAAK,EAAE;gEAC5B,cAAc,GAAG,IAAI,CAAA;6DACtB;iEAAM,IAAI,cAAc,IAAI,CAAC,KAAK,EAAE;gEACnC,IAAI,GAAG,IAAI,CAAA;6DACZ;iEAAM,IAAI,IAAI,GAAG,YAAY,EAAE;gEAC9B,IAAI,GAAG,IAAI,CAAA;6DACZ;4DACD,OAAO,KAAK,CAAA;wDACd,CAAC,CAAC;6DACD,GAAG,CAAC,UAAA,IAAI;4DACD,IAAA,KAAA,OAAmB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA,EAAjC,IAAI,QAAA,EAAK,KAAK,cAAmB,CAAA;4DACxC,OAAO,KAAK,CAAC,GAAG,CAAC,UAAA,GAAG,IAAI,OAAA,CAAC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAzB,CAAyB,CAAC,CAAA;wDACpD,CAAC,CAAC;6DACD,IAAI,EAAwB,CAAA;wDAE/B,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;4DAChB,IAAI,GAAG,IAAI,CAAA;yDACZ;wDACD,IAAI,OAAO,KAAK,IAAI,CAAC,MAAM,EAAE;4DAC3B,IAAI,GAAG,IAAI,CAAA;yDACZ;6DAEG,CAAA,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,OAAK,UAAU,IAAI,CAAC,IAAI,CAAA,EAAzD,wBAAyD;wDAC/C,qBAAM,OAAK,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,EACvB,CAAC,EACD,SAAS,EACT,UAAU,EACV,IAAI,CACL;4DAED,+BAA+B;0DAF9B;;wDANK,QAAM,SAMX;wDAED,+BAA+B;wDAC/B,IAAI,CAAC,KAAG,CAAC,SAAS,EAAE;4DAClB,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;;yDAEnC;wDACD,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,KAAG,CAAC,MAAM,CAAC,CAAC,CAAA;wDAC5C,UAAU,IAAI,SAAS,CAAA;wDACvB,OAAO,GAAG,IAAI,CAAC,MAAM,CAAA;;;wDAChB,IAAI,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,OAAK,UAAU,IAAI,IAAI,EAAE;4DACpE,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;;yDAEnC;;;;;;;;6CA1DI,CAAA,GAAG,IAAI,CAAC,IAAI,CAAA;;;;;;;;;;;;wBANZ,CAAC,GAAG,CAAC;;;6BAAE,CAAA,CAAC,GAAG,WAAW,CAAC,MAAM,CAAA;sDAA7B,CAAC;;;;;wBAA8B,CAAC,EAAE,CAAA;;4BAoE3C,sBAAO,yBAAI,SAAS,UAAE,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,EAAA;;;;KAChD;IAEO,uBAAQ,GAAhB,UAAiB,IAA+B;QAC9C,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;IAC3C,CAAC;IAEa,yBAAU,GAAxB,UACE,UAAkB,EAClB,IAA+B;;;;;;;wBAE3B,YAAY,GAAG,CAAC,CAAA;wBAChB,UAAU,GAAG,CAAC,CAAC,CAAA;wBACH,qBAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAA;;wBAAnC,OAAO,GAAG,SAAyB;;4BACzC,KAA2B,YAAA,SAAA,OAAO,CAAA,qFAAE;gCAAzB,KAAA,4BAAY,EAAX,GAAG,QAAA,EAAE,KAAK,QAAA;gCACd,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAA;gCAClD,IAAI,UAAU,IAAI,UAAU,EAAE;oCAC5B,MAAK;iCACN;qCAAM;oCACL,YAAY,GAAG,KAAK,CAAA;oCACpB,UAAU,GAAG,KAAK,GAAG,KAAK,CAAA;iCAC3B;6BACF;;;;;;;;;wBAGK,GAAG,GAAG,UAAU,GAAG,YAAY,CAAA;wBACrC,IAAI,GAAG,GAAG,CAAC,EAAE;4BACX,sBAAO,SAAS,EAAA;yBACjB;wBACW,qBAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAChC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,EACjB,CAAC,EACD,GAAG,EACH,YAAY,EACZ,IAAI,CACL,EAAA;;wBANK,GAAG,GAAG,SAMX;wBACD,4CACK,GAAG,KACN,UAAU,YAAA,KACX;;;;KACF;IAEa,wBAAS,GAAvB,UACE,OAA0B,EAC1B,IAA+B;;;;;4BAEjB,qBAAM,OAAO,CAAC,QAAQ,YAClC,QAAQ,EAAE,MAAM,IACb,IAAI,EACP,EAAA;;wBAHI,IAAI,GAAG,CAAC,SAGZ,CAAW;wBACb,sBAAO,IAAI,GAAG,CACZ,IAAI;iCACD,KAAK,CAAC,IAAI,CAAC;iCACX,MAAM,CAAC,UAAA,CAAC,IAAI,OAAA,CAAC,CAAC,CAAC,EAAH,CAAG,CAAC;iCAChB,GAAG,CAAC,UAAA,IAAI;gCACP,IAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAA;gCAC5C,IAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAA;gCACzC,IAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;gCACvC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;4BACtB,CAAC,CAAC,CACL,EAAA;;;;KACF;IACH,WAAC;AAAD,CAAC,AApJD,IAoJC"}
|
package/esm/index.d.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { GenericFilehandle } from 'generic-filehandle';
|
|
2
|
+
export default class Trix {
|
|
3
|
+
private ixFile;
|
|
4
|
+
private ixxFile;
|
|
5
|
+
maxResults: number;
|
|
6
|
+
constructor(ixxFile: GenericFilehandle, ixFile: GenericFilehandle, maxResults?: number);
|
|
7
|
+
search(searchString: string, opts?: {
|
|
8
|
+
signal?: AbortSignal;
|
|
9
|
+
}): Promise<[string, string][]>;
|
|
10
|
+
private getIndex;
|
|
11
|
+
private _getBuffer;
|
|
12
|
+
}
|
package/esm/index.js
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const TRIX_PREFIX_SIZE = 5;
|
|
4
|
+
const CHUNK_SIZE = 65536;
|
|
5
|
+
// https://stackoverflow.com/a/9229821/2129219
|
|
6
|
+
function uniqBy(a, key) {
|
|
7
|
+
let seen = new Set();
|
|
8
|
+
return a.filter(item => {
|
|
9
|
+
let k = key(item);
|
|
10
|
+
return seen.has(k) ? false : seen.add(k);
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
class Trix {
|
|
14
|
+
constructor(ixxFile, ixFile, maxResults = 20) {
|
|
15
|
+
this.ixFile = ixFile;
|
|
16
|
+
this.ixxFile = ixxFile;
|
|
17
|
+
this.maxResults = maxResults;
|
|
18
|
+
}
|
|
19
|
+
async search(searchString, opts) {
|
|
20
|
+
let resultArr = [];
|
|
21
|
+
const searchWords = searchString.split(' ');
|
|
22
|
+
// we only search one word at a time
|
|
23
|
+
const searchWord = searchWords[0].toLowerCase();
|
|
24
|
+
const res = await this._getBuffer(searchWord, opts);
|
|
25
|
+
if (!res) {
|
|
26
|
+
return [];
|
|
27
|
+
}
|
|
28
|
+
let { seekPosEnd, buffer } = res;
|
|
29
|
+
let done = false;
|
|
30
|
+
while (!done) {
|
|
31
|
+
let foundSomething = false;
|
|
32
|
+
const str = buffer.toString();
|
|
33
|
+
// slice to lastIndexOf('\n') to make sure we get complete records
|
|
34
|
+
// since the buffer fetch could get halfway into a record
|
|
35
|
+
const lines = str
|
|
36
|
+
.slice(0, str.lastIndexOf('\n'))
|
|
37
|
+
.split('\n')
|
|
38
|
+
.filter(f => !!f);
|
|
39
|
+
const hits = lines
|
|
40
|
+
// eslint-disable-next-line @typescript-eslint/no-loop-func
|
|
41
|
+
.filter(line => {
|
|
42
|
+
const word = line.split(' ')[0];
|
|
43
|
+
const match = word.startsWith(searchWord);
|
|
44
|
+
if (!foundSomething && match) {
|
|
45
|
+
foundSomething = true;
|
|
46
|
+
}
|
|
47
|
+
// we are done scanning if we are lexicographically greater than the
|
|
48
|
+
// search string
|
|
49
|
+
if (word > searchWord) {
|
|
50
|
+
done = true;
|
|
51
|
+
}
|
|
52
|
+
return match;
|
|
53
|
+
})
|
|
54
|
+
.map(line => {
|
|
55
|
+
const [term, ...parts] = line.split(' ');
|
|
56
|
+
return parts.map(elt => [term, elt.split(',')[0]]);
|
|
57
|
+
})
|
|
58
|
+
.flat();
|
|
59
|
+
// if we are not done, and we haven't filled up maxResults with hits yet,
|
|
60
|
+
// then refetch
|
|
61
|
+
if (resultArr.length + hits.length < this.maxResults && !done) {
|
|
62
|
+
// eslint-disable-next-line no-await-in-loop
|
|
63
|
+
const res2 = await this.ixFile.read(Buffer.alloc(CHUNK_SIZE), 0, CHUNK_SIZE, seekPosEnd, opts);
|
|
64
|
+
// early break if empty response
|
|
65
|
+
if (!res2.bytesRead) {
|
|
66
|
+
resultArr = resultArr.concat(hits);
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
buffer = Buffer.concat([buffer, res2.buffer]);
|
|
70
|
+
seekPosEnd += CHUNK_SIZE;
|
|
71
|
+
}
|
|
72
|
+
// if we have filled up the hits, or we are detected to be done via the
|
|
73
|
+
// filtering, then return
|
|
74
|
+
else if (resultArr.length + hits.length >= this.maxResults || done) {
|
|
75
|
+
resultArr = resultArr.concat(hits);
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// deduplicate results based on the detail column (resultArr[1])
|
|
80
|
+
return uniqBy(resultArr, elt => elt[1]).slice(0, this.maxResults);
|
|
81
|
+
}
|
|
82
|
+
async getIndex(opts) {
|
|
83
|
+
const file = await this.ixxFile.readFile({
|
|
84
|
+
encoding: 'utf8',
|
|
85
|
+
...opts,
|
|
86
|
+
});
|
|
87
|
+
return file
|
|
88
|
+
.split('\n')
|
|
89
|
+
.filter(f => !!f)
|
|
90
|
+
.map(line => {
|
|
91
|
+
const prefix = line.slice(0, TRIX_PREFIX_SIZE);
|
|
92
|
+
const posStr = line.slice(TRIX_PREFIX_SIZE);
|
|
93
|
+
const pos = Number.parseInt(posStr, 16);
|
|
94
|
+
return [prefix, pos];
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
async _getBuffer(searchWord, opts) {
|
|
98
|
+
let seekPosStart = 0;
|
|
99
|
+
let seekPosEnd = -1;
|
|
100
|
+
const indexes = await this.getIndex(opts);
|
|
101
|
+
indexes.forEach(([key, value]) => {
|
|
102
|
+
const trimmedKey = key.slice(0, searchWord.length);
|
|
103
|
+
if (trimmedKey < searchWord) {
|
|
104
|
+
seekPosStart = value;
|
|
105
|
+
seekPosEnd = value + 65536;
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
// Return the buffer and its end position in the file.
|
|
109
|
+
const len = seekPosEnd - seekPosStart;
|
|
110
|
+
if (len < 0) {
|
|
111
|
+
return undefined;
|
|
112
|
+
}
|
|
113
|
+
const res = await this.ixFile.read(Buffer.alloc(len), 0, len, seekPosStart, opts);
|
|
114
|
+
return {
|
|
115
|
+
...res,
|
|
116
|
+
seekPosEnd,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
exports.default = Trix;
|