@gmod/trix 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/package.json +3 -2
- package/src/index.ts +160 -0
package/CHANGELOG.md
CHANGED
package/package.json
CHANGED
package/src/index.ts
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import type { GenericFilehandle } from 'generic-filehandle'
|
|
2
|
+
|
|
3
|
+
const TRIX_PREFIX_SIZE = 5
|
|
4
|
+
|
|
5
|
+
const CHUNK_SIZE = 65536
|
|
6
|
+
|
|
7
|
+
// https://stackoverflow.com/a/9229821/2129219
|
|
8
|
+
function uniqBy(a: [string, string][], key: (elt: [string, string]) => string) {
|
|
9
|
+
const seen = new Set()
|
|
10
|
+
return a.filter(item => {
|
|
11
|
+
const k = key(item)
|
|
12
|
+
return seen.has(k) ? false : seen.add(k)
|
|
13
|
+
})
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export default class Trix {
|
|
17
|
+
private ixFile: GenericFilehandle
|
|
18
|
+
|
|
19
|
+
private ixxFile: GenericFilehandle
|
|
20
|
+
|
|
21
|
+
maxResults: number
|
|
22
|
+
|
|
23
|
+
constructor(
|
|
24
|
+
ixxFile: GenericFilehandle,
|
|
25
|
+
ixFile: GenericFilehandle,
|
|
26
|
+
maxResults = 20,
|
|
27
|
+
) {
|
|
28
|
+
this.ixFile = ixFile
|
|
29
|
+
this.ixxFile = ixxFile
|
|
30
|
+
this.maxResults = maxResults
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async search(searchString: string, opts?: { signal?: AbortSignal }) {
|
|
34
|
+
let resultArr = [] as [string, string][]
|
|
35
|
+
const searchWords = searchString.split(' ')
|
|
36
|
+
|
|
37
|
+
// we only search one word at a time
|
|
38
|
+
const searchWord = searchWords[0].toLowerCase()
|
|
39
|
+
const res = await this._getBuffer(searchWord, opts)
|
|
40
|
+
if (!res) {
|
|
41
|
+
return []
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
let { seekPosEnd, buffer } = res
|
|
45
|
+
let done = false
|
|
46
|
+
while (!done) {
|
|
47
|
+
let foundSomething = false
|
|
48
|
+
const str = buffer.toString()
|
|
49
|
+
|
|
50
|
+
// slice to lastIndexOf('\n') to make sure we get complete records
|
|
51
|
+
// since the buffer fetch could get halfway into a record
|
|
52
|
+
const lines = str
|
|
53
|
+
.slice(0, str.lastIndexOf('\n'))
|
|
54
|
+
.split('\n')
|
|
55
|
+
.filter(f => !!f)
|
|
56
|
+
|
|
57
|
+
const hits = lines
|
|
58
|
+
// eslint-disable-next-line @typescript-eslint/no-loop-func
|
|
59
|
+
.filter(line => {
|
|
60
|
+
const word = line.split(' ')[0]
|
|
61
|
+
const match = word.startsWith(searchWord)
|
|
62
|
+
if (!foundSomething && match) {
|
|
63
|
+
foundSomething = true
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// we are done scanning if we are lexicographically greater than the
|
|
67
|
+
// search string
|
|
68
|
+
if (word > searchWord) {
|
|
69
|
+
done = true
|
|
70
|
+
}
|
|
71
|
+
return match
|
|
72
|
+
})
|
|
73
|
+
.map(line => {
|
|
74
|
+
const [term, ...parts] = line.split(' ')
|
|
75
|
+
return parts.map(elt => [term, elt.split(',')[0]])
|
|
76
|
+
})
|
|
77
|
+
.flat() as [string, string][]
|
|
78
|
+
|
|
79
|
+
// if we are not done, and we haven't filled up maxResults with hits yet,
|
|
80
|
+
// then refetch
|
|
81
|
+
if (resultArr.length + hits.length < this.maxResults && !done) {
|
|
82
|
+
// eslint-disable-next-line no-await-in-loop
|
|
83
|
+
const res2 = await this.ixFile.read(
|
|
84
|
+
Buffer.alloc(CHUNK_SIZE),
|
|
85
|
+
0,
|
|
86
|
+
CHUNK_SIZE,
|
|
87
|
+
seekPosEnd,
|
|
88
|
+
opts,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
// early break if empty response
|
|
92
|
+
if (!res2.bytesRead) {
|
|
93
|
+
resultArr = resultArr.concat(hits)
|
|
94
|
+
break
|
|
95
|
+
}
|
|
96
|
+
buffer = Buffer.concat([buffer, res2.buffer])
|
|
97
|
+
seekPosEnd += CHUNK_SIZE
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// if we have filled up the hits, or we are detected to be done via the
|
|
101
|
+
// filtering, then return
|
|
102
|
+
else if (resultArr.length + hits.length >= this.maxResults || done) {
|
|
103
|
+
resultArr = resultArr.concat(hits)
|
|
104
|
+
break
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// deduplicate results based on the detail column (resultArr[1])
|
|
109
|
+
return uniqBy(resultArr, elt => elt[1]).slice(0, this.maxResults)
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
private async getIndex(opts?: { signal?: AbortSignal }) {
|
|
113
|
+
const file = await this.ixxFile.readFile({
|
|
114
|
+
encoding: 'utf8',
|
|
115
|
+
...opts,
|
|
116
|
+
})
|
|
117
|
+
return file
|
|
118
|
+
.split('\n')
|
|
119
|
+
.filter(f => !!f)
|
|
120
|
+
.map(line => {
|
|
121
|
+
const prefix = line.slice(0, TRIX_PREFIX_SIZE)
|
|
122
|
+
const posStr = line.slice(TRIX_PREFIX_SIZE)
|
|
123
|
+
const pos = Number.parseInt(posStr, 16)
|
|
124
|
+
return [prefix, pos] as [string, number]
|
|
125
|
+
})
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private async _getBuffer(
|
|
129
|
+
searchWord: string,
|
|
130
|
+
opts?: { signal?: AbortSignal },
|
|
131
|
+
) {
|
|
132
|
+
let seekPosStart = 0
|
|
133
|
+
let seekPosEnd = -1
|
|
134
|
+
const indexes = await this.getIndex(opts)
|
|
135
|
+
indexes.forEach(([key, value]) => {
|
|
136
|
+
const trimmedKey = key.slice(0, searchWord.length)
|
|
137
|
+
if (trimmedKey < searchWord) {
|
|
138
|
+
seekPosStart = value
|
|
139
|
+
seekPosEnd = value + 65536
|
|
140
|
+
}
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
// Return the buffer and its end position in the file.
|
|
144
|
+
const len = seekPosEnd - seekPosStart
|
|
145
|
+
if (len < 0) {
|
|
146
|
+
return undefined
|
|
147
|
+
}
|
|
148
|
+
const res = await this.ixFile.read(
|
|
149
|
+
Buffer.alloc(len),
|
|
150
|
+
0,
|
|
151
|
+
len,
|
|
152
|
+
seekPosStart,
|
|
153
|
+
opts,
|
|
154
|
+
)
|
|
155
|
+
return {
|
|
156
|
+
...res,
|
|
157
|
+
seekPosEnd,
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|