@oh-my-pi/pi-coding-agent 14.3.0 → 14.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +98 -1
- package/package.json +7 -7
- package/src/autoresearch/prompt.md +1 -1
- package/src/commit/agentic/prompts/analyze-file.md +1 -1
- package/src/config/model-registry.ts +67 -15
- package/src/config/prompt-templates.ts +5 -5
- package/src/config/settings-schema.ts +4 -4
- package/src/cursor.ts +3 -8
- package/src/discovery/helpers.ts +3 -3
- package/src/edit/diff.ts +50 -47
- package/src/edit/index.ts +86 -57
- package/src/edit/line-hash.ts +743 -24
- package/src/edit/modes/apply-patch.ts +0 -9
- package/src/edit/modes/atom.ts +893 -0
- package/src/edit/modes/chunk.ts +14 -24
- package/src/edit/modes/hashline.ts +193 -146
- package/src/edit/modes/patch.ts +5 -9
- package/src/edit/modes/replace.ts +6 -11
- package/src/edit/renderer.ts +14 -10
- package/src/edit/streaming.ts +50 -16
- package/src/exec/bash-executor.ts +2 -4
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +4 -12
- package/src/extensibility/custom-tools/types.ts +2 -0
- package/src/extensibility/custom-tools/wrapper.ts +2 -1
- package/src/internal-urls/docs-index.generated.ts +2 -2
- package/src/lsp/defaults.json +142 -652
- package/src/lsp/index.ts +1 -1
- package/src/mcp/render.ts +1 -8
- package/src/modes/components/assistant-message.ts +4 -0
- package/src/modes/components/diff.ts +23 -14
- package/src/modes/components/footer.ts +21 -16
- package/src/modes/components/session-selector.ts +3 -3
- package/src/modes/components/settings-defs.ts +6 -1
- package/src/modes/components/todo-reminder.ts +1 -8
- package/src/modes/components/tool-execution.ts +1 -4
- package/src/modes/controllers/selector-controller.ts +1 -1
- package/src/modes/print-mode.ts +8 -0
- package/src/prompts/agents/librarian.md +1 -1
- package/src/prompts/agents/reviewer.md +4 -4
- package/src/prompts/ci-green-request.md +1 -1
- package/src/prompts/review-request.md +1 -1
- package/src/prompts/system/subagent-system-prompt.md +3 -3
- package/src/prompts/system/subagent-yield-reminder.md +11 -0
- package/src/prompts/system/system-prompt.md +3 -0
- package/src/prompts/tools/ask.md +3 -2
- package/src/prompts/tools/ast-edit.md +16 -20
- package/src/prompts/tools/ast-grep.md +19 -24
- package/src/prompts/tools/atom.md +87 -0
- package/src/prompts/tools/chunk-edit.md +37 -161
- package/src/prompts/tools/debug.md +4 -5
- package/src/prompts/tools/exit-plan-mode.md +4 -5
- package/src/prompts/tools/find.md +4 -8
- package/src/prompts/tools/github.md +18 -0
- package/src/prompts/tools/grep.md +4 -5
- package/src/prompts/tools/hashline.md +22 -89
- package/src/prompts/tools/{gemini-image.md → image-gen.md} +1 -1
- package/src/prompts/tools/inspect-image.md +6 -6
- package/src/prompts/tools/lsp.md +1 -1
- package/src/prompts/tools/patch.md +12 -19
- package/src/prompts/tools/python.md +3 -2
- package/src/prompts/tools/read-chunk.md +2 -3
- package/src/prompts/tools/read.md +2 -2
- package/src/prompts/tools/ssh.md +8 -17
- package/src/prompts/tools/todo-write.md +54 -41
- package/src/sdk.ts +14 -9
- package/src/session/agent-session.ts +25 -2
- package/src/session/session-manager.ts +4 -1
- package/src/task/executor.ts +43 -48
- package/src/task/render.ts +11 -13
- package/src/tools/ask.ts +7 -7
- package/src/tools/ast-edit.ts +45 -41
- package/src/tools/ast-grep.ts +77 -85
- package/src/tools/bash.ts +8 -9
- package/src/tools/browser.ts +32 -30
- package/src/tools/calculator.ts +4 -4
- package/src/tools/cancel-job.ts +1 -1
- package/src/tools/checkpoint.ts +2 -2
- package/src/tools/debug.ts +41 -37
- package/src/tools/exit-plan-mode.ts +1 -1
- package/src/tools/find.ts +4 -4
- package/src/tools/gh-renderer.ts +12 -4
- package/src/tools/gh.ts +509 -697
- package/src/tools/grep.ts +116 -131
- package/src/tools/{gemini-image.ts → image-gen.ts} +459 -60
- package/src/tools/index.ts +14 -32
- package/src/tools/inspect-image.ts +3 -3
- package/src/tools/json-tree.ts +114 -114
- package/src/tools/match-line-format.ts +8 -7
- package/src/tools/notebook.ts +8 -7
- package/src/tools/poll-tool.ts +2 -1
- package/src/tools/python.ts +9 -23
- package/src/tools/read.ts +32 -25
- package/src/tools/render-mermaid.ts +1 -1
- package/src/tools/render-utils.ts +18 -0
- package/src/tools/renderers.ts +2 -2
- package/src/tools/report-tool-issue.ts +3 -2
- package/src/tools/resolve.ts +1 -1
- package/src/tools/review.ts +12 -10
- package/src/tools/search-tool-bm25.ts +2 -4
- package/src/tools/ssh.ts +4 -4
- package/src/tools/todo-write.ts +172 -147
- package/src/tools/vim.ts +14 -15
- package/src/tools/write.ts +4 -4
- package/src/tools/{submit-result.ts → yield.ts} +11 -13
- package/src/utils/edit-mode.ts +2 -1
- package/src/utils/file-display-mode.ts +10 -5
- package/src/utils/git.ts +9 -5
- package/src/utils/shell-snapshot.ts +2 -3
- package/src/vim/render.ts +4 -4
- package/src/prompts/system/subagent-submit-reminder.md +0 -11
- package/src/prompts/tools/gh-issue-view.md +0 -11
- package/src/prompts/tools/gh-pr-checkout.md +0 -12
- package/src/prompts/tools/gh-pr-diff.md +0 -12
- package/src/prompts/tools/gh-pr-push.md +0 -12
- package/src/prompts/tools/gh-pr-view.md +0 -11
- package/src/prompts/tools/gh-repo-view.md +0 -11
- package/src/prompts/tools/gh-run-watch.md +0 -12
- package/src/prompts/tools/gh-search-issues.md +0 -11
- package/src/prompts/tools/gh-search-prs.md +0 -11
package/src/edit/line-hash.ts
CHANGED
|
@@ -3,26 +3,740 @@
|
|
|
3
3
|
* circular dependencies (prompt-templates → hashline → tools → edit).
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
/**
|
|
7
|
-
|
|
6
|
+
/**
|
|
7
|
+
* 647 single-token BPE bigrams for hashline anchors. Every entry tokenizes as
|
|
8
|
+
* exactly one token in modern BPE vocabularies (cl100k / o200k / Claude family),
|
|
9
|
+
* so a hashline anchor built from one bigram is exactly 1 token.
|
|
10
|
+
*
|
|
11
|
+
* This is the complete set of 2-letter lowercase combinations that are single
|
|
12
|
+
* tokens — the 29 missing combinations are rare-letter pairs (q/x/z heavy)
|
|
13
|
+
* that no major BPE vocabulary merges into a single token.
|
|
14
|
+
*
|
|
15
|
+
* Order is stable forever — changing it would invalidate every saved
|
|
16
|
+
* `LINE+ID` reference in transcripts and prompts.
|
|
17
|
+
*/
|
|
18
|
+
export const HASHLINE_BIGRAMS = [
|
|
19
|
+
"aa",
|
|
20
|
+
"ab",
|
|
21
|
+
"ac",
|
|
22
|
+
"ad",
|
|
23
|
+
"ae",
|
|
24
|
+
"af",
|
|
25
|
+
"ag",
|
|
26
|
+
"ah",
|
|
27
|
+
"ai",
|
|
28
|
+
"aj",
|
|
29
|
+
"ak",
|
|
30
|
+
"al",
|
|
31
|
+
"am",
|
|
32
|
+
"an",
|
|
33
|
+
"ao",
|
|
34
|
+
"ap",
|
|
35
|
+
"aq",
|
|
36
|
+
"ar",
|
|
37
|
+
"as",
|
|
38
|
+
"at",
|
|
39
|
+
"au",
|
|
40
|
+
"av",
|
|
41
|
+
"aw",
|
|
42
|
+
"ax",
|
|
43
|
+
"ay",
|
|
44
|
+
"az",
|
|
45
|
+
"ba",
|
|
46
|
+
"bb",
|
|
47
|
+
"bc",
|
|
48
|
+
"bd",
|
|
49
|
+
"be",
|
|
50
|
+
"bf",
|
|
51
|
+
"bg",
|
|
52
|
+
"bh",
|
|
53
|
+
"bi",
|
|
54
|
+
"bj",
|
|
55
|
+
"bk",
|
|
56
|
+
"bl",
|
|
57
|
+
"bm",
|
|
58
|
+
"bn",
|
|
59
|
+
"bo",
|
|
60
|
+
"bp",
|
|
61
|
+
"br",
|
|
62
|
+
"bs",
|
|
63
|
+
"bt",
|
|
64
|
+
"bu",
|
|
65
|
+
"bv",
|
|
66
|
+
"bw",
|
|
67
|
+
"bx",
|
|
68
|
+
"by",
|
|
69
|
+
"bz",
|
|
70
|
+
"ca",
|
|
71
|
+
"cb",
|
|
72
|
+
"cc",
|
|
73
|
+
"cd",
|
|
74
|
+
"ce",
|
|
75
|
+
"cf",
|
|
76
|
+
"cg",
|
|
77
|
+
"ch",
|
|
78
|
+
"ci",
|
|
79
|
+
"cj",
|
|
80
|
+
"ck",
|
|
81
|
+
"cl",
|
|
82
|
+
"cm",
|
|
83
|
+
"cn",
|
|
84
|
+
"co",
|
|
85
|
+
"cp",
|
|
86
|
+
"cq",
|
|
87
|
+
"cr",
|
|
88
|
+
"cs",
|
|
89
|
+
"ct",
|
|
90
|
+
"cu",
|
|
91
|
+
"cv",
|
|
92
|
+
"cw",
|
|
93
|
+
"cx",
|
|
94
|
+
"cy",
|
|
95
|
+
"cz",
|
|
96
|
+
"da",
|
|
97
|
+
"db",
|
|
98
|
+
"dc",
|
|
99
|
+
"dd",
|
|
100
|
+
"de",
|
|
101
|
+
"df",
|
|
102
|
+
"dg",
|
|
103
|
+
"dh",
|
|
104
|
+
"di",
|
|
105
|
+
"dj",
|
|
106
|
+
"dk",
|
|
107
|
+
"dl",
|
|
108
|
+
"dm",
|
|
109
|
+
"dn",
|
|
110
|
+
"do",
|
|
111
|
+
"dp",
|
|
112
|
+
"dq",
|
|
113
|
+
"dr",
|
|
114
|
+
"ds",
|
|
115
|
+
"dt",
|
|
116
|
+
"du",
|
|
117
|
+
"dv",
|
|
118
|
+
"dw",
|
|
119
|
+
"dx",
|
|
120
|
+
"dy",
|
|
121
|
+
"dz",
|
|
122
|
+
"ea",
|
|
123
|
+
"eb",
|
|
124
|
+
"ec",
|
|
125
|
+
"ed",
|
|
126
|
+
"ee",
|
|
127
|
+
"ef",
|
|
128
|
+
"eg",
|
|
129
|
+
"eh",
|
|
130
|
+
"ei",
|
|
131
|
+
"ej",
|
|
132
|
+
"ek",
|
|
133
|
+
"el",
|
|
134
|
+
"em",
|
|
135
|
+
"en",
|
|
136
|
+
"eo",
|
|
137
|
+
"ep",
|
|
138
|
+
"eq",
|
|
139
|
+
"er",
|
|
140
|
+
"es",
|
|
141
|
+
"et",
|
|
142
|
+
"eu",
|
|
143
|
+
"ev",
|
|
144
|
+
"ew",
|
|
145
|
+
"ex",
|
|
146
|
+
"ey",
|
|
147
|
+
"ez",
|
|
148
|
+
"fa",
|
|
149
|
+
"fb",
|
|
150
|
+
"fc",
|
|
151
|
+
"fd",
|
|
152
|
+
"fe",
|
|
153
|
+
"ff",
|
|
154
|
+
"fg",
|
|
155
|
+
"fh",
|
|
156
|
+
"fi",
|
|
157
|
+
"fj",
|
|
158
|
+
"fk",
|
|
159
|
+
"fl",
|
|
160
|
+
"fm",
|
|
161
|
+
"fn",
|
|
162
|
+
"fo",
|
|
163
|
+
"fp",
|
|
164
|
+
"fq",
|
|
165
|
+
"fr",
|
|
166
|
+
"fs",
|
|
167
|
+
"ft",
|
|
168
|
+
"fu",
|
|
169
|
+
"fv",
|
|
170
|
+
"fw",
|
|
171
|
+
"fx",
|
|
172
|
+
"fy",
|
|
173
|
+
"fz",
|
|
174
|
+
"ga",
|
|
175
|
+
"gb",
|
|
176
|
+
"gc",
|
|
177
|
+
"gd",
|
|
178
|
+
"ge",
|
|
179
|
+
"gf",
|
|
180
|
+
"gg",
|
|
181
|
+
"gh",
|
|
182
|
+
"gi",
|
|
183
|
+
"gj",
|
|
184
|
+
"gl",
|
|
185
|
+
"gm",
|
|
186
|
+
"gn",
|
|
187
|
+
"go",
|
|
188
|
+
"gp",
|
|
189
|
+
"gr",
|
|
190
|
+
"gs",
|
|
191
|
+
"gt",
|
|
192
|
+
"gu",
|
|
193
|
+
"gv",
|
|
194
|
+
"gw",
|
|
195
|
+
"gx",
|
|
196
|
+
"gy",
|
|
197
|
+
"gz",
|
|
198
|
+
"ha",
|
|
199
|
+
"hb",
|
|
200
|
+
"hc",
|
|
201
|
+
"hd",
|
|
202
|
+
"he",
|
|
203
|
+
"hf",
|
|
204
|
+
"hg",
|
|
205
|
+
"hh",
|
|
206
|
+
"hi",
|
|
207
|
+
"hj",
|
|
208
|
+
"hk",
|
|
209
|
+
"hl",
|
|
210
|
+
"hm",
|
|
211
|
+
"hn",
|
|
212
|
+
"ho",
|
|
213
|
+
"hp",
|
|
214
|
+
"hq",
|
|
215
|
+
"hr",
|
|
216
|
+
"hs",
|
|
217
|
+
"ht",
|
|
218
|
+
"hu",
|
|
219
|
+
"hv",
|
|
220
|
+
"hw",
|
|
221
|
+
"hx",
|
|
222
|
+
"hy",
|
|
223
|
+
"hz",
|
|
224
|
+
"ia",
|
|
225
|
+
"ib",
|
|
226
|
+
"ic",
|
|
227
|
+
"id",
|
|
228
|
+
"ie",
|
|
229
|
+
"if",
|
|
230
|
+
"ig",
|
|
231
|
+
"ih",
|
|
232
|
+
"ii",
|
|
233
|
+
"ij",
|
|
234
|
+
"ik",
|
|
235
|
+
"il",
|
|
236
|
+
"im",
|
|
237
|
+
"in",
|
|
238
|
+
"io",
|
|
239
|
+
"ip",
|
|
240
|
+
"iq",
|
|
241
|
+
"ir",
|
|
242
|
+
"is",
|
|
243
|
+
"it",
|
|
244
|
+
"iu",
|
|
245
|
+
"iv",
|
|
246
|
+
"iw",
|
|
247
|
+
"ix",
|
|
248
|
+
"iy",
|
|
249
|
+
"iz",
|
|
250
|
+
"ja",
|
|
251
|
+
"jb",
|
|
252
|
+
"jc",
|
|
253
|
+
"jd",
|
|
254
|
+
"je",
|
|
255
|
+
"jf",
|
|
256
|
+
"jg",
|
|
257
|
+
"jh",
|
|
258
|
+
"ji",
|
|
259
|
+
"jj",
|
|
260
|
+
"jk",
|
|
261
|
+
"jl",
|
|
262
|
+
"jm",
|
|
263
|
+
"jn",
|
|
264
|
+
"jo",
|
|
265
|
+
"jp",
|
|
266
|
+
"jq",
|
|
267
|
+
"jr",
|
|
268
|
+
"js",
|
|
269
|
+
"jt",
|
|
270
|
+
"ju",
|
|
271
|
+
"jw",
|
|
272
|
+
"jx",
|
|
273
|
+
"jy",
|
|
274
|
+
"ka",
|
|
275
|
+
"kb",
|
|
276
|
+
"kc",
|
|
277
|
+
"kd",
|
|
278
|
+
"ke",
|
|
279
|
+
"kf",
|
|
280
|
+
"kg",
|
|
281
|
+
"kh",
|
|
282
|
+
"ki",
|
|
283
|
+
"kj",
|
|
284
|
+
"kk",
|
|
285
|
+
"kl",
|
|
286
|
+
"km",
|
|
287
|
+
"kn",
|
|
288
|
+
"ko",
|
|
289
|
+
"kp",
|
|
290
|
+
"kr",
|
|
291
|
+
"ks",
|
|
292
|
+
"kt",
|
|
293
|
+
"ku",
|
|
294
|
+
"kv",
|
|
295
|
+
"kw",
|
|
296
|
+
"kx",
|
|
297
|
+
"ky",
|
|
298
|
+
"la",
|
|
299
|
+
"lb",
|
|
300
|
+
"lc",
|
|
301
|
+
"ld",
|
|
302
|
+
"le",
|
|
303
|
+
"lf",
|
|
304
|
+
"lg",
|
|
305
|
+
"lh",
|
|
306
|
+
"li",
|
|
307
|
+
"lj",
|
|
308
|
+
"lk",
|
|
309
|
+
"ll",
|
|
310
|
+
"lm",
|
|
311
|
+
"ln",
|
|
312
|
+
"lo",
|
|
313
|
+
"lp",
|
|
314
|
+
"lr",
|
|
315
|
+
"ls",
|
|
316
|
+
"lt",
|
|
317
|
+
"lu",
|
|
318
|
+
"lv",
|
|
319
|
+
"lw",
|
|
320
|
+
"lx",
|
|
321
|
+
"ly",
|
|
322
|
+
"lz",
|
|
323
|
+
"ma",
|
|
324
|
+
"mb",
|
|
325
|
+
"mc",
|
|
326
|
+
"md",
|
|
327
|
+
"me",
|
|
328
|
+
"mf",
|
|
329
|
+
"mg",
|
|
330
|
+
"mh",
|
|
331
|
+
"mi",
|
|
332
|
+
"mj",
|
|
333
|
+
"mk",
|
|
334
|
+
"ml",
|
|
335
|
+
"mm",
|
|
336
|
+
"mn",
|
|
337
|
+
"mo",
|
|
338
|
+
"mp",
|
|
339
|
+
"mq",
|
|
340
|
+
"mr",
|
|
341
|
+
"ms",
|
|
342
|
+
"mt",
|
|
343
|
+
"mu",
|
|
344
|
+
"mv",
|
|
345
|
+
"mw",
|
|
346
|
+
"mx",
|
|
347
|
+
"my",
|
|
348
|
+
"mz",
|
|
349
|
+
"na",
|
|
350
|
+
"nb",
|
|
351
|
+
"nc",
|
|
352
|
+
"nd",
|
|
353
|
+
"ne",
|
|
354
|
+
"nf",
|
|
355
|
+
"ng",
|
|
356
|
+
"nh",
|
|
357
|
+
"ni",
|
|
358
|
+
"nj",
|
|
359
|
+
"nk",
|
|
360
|
+
"nl",
|
|
361
|
+
"nm",
|
|
362
|
+
"nn",
|
|
363
|
+
"no",
|
|
364
|
+
"np",
|
|
365
|
+
"nr",
|
|
366
|
+
"ns",
|
|
367
|
+
"nt",
|
|
368
|
+
"nu",
|
|
369
|
+
"nv",
|
|
370
|
+
"nw",
|
|
371
|
+
"nx",
|
|
372
|
+
"ny",
|
|
373
|
+
"nz",
|
|
374
|
+
"oa",
|
|
375
|
+
"ob",
|
|
376
|
+
"oc",
|
|
377
|
+
"od",
|
|
378
|
+
"oe",
|
|
379
|
+
"of",
|
|
380
|
+
"og",
|
|
381
|
+
"oh",
|
|
382
|
+
"oi",
|
|
383
|
+
"oj",
|
|
384
|
+
"ok",
|
|
385
|
+
"ol",
|
|
386
|
+
"om",
|
|
387
|
+
"on",
|
|
388
|
+
"oo",
|
|
389
|
+
"op",
|
|
390
|
+
"oq",
|
|
391
|
+
"or",
|
|
392
|
+
"os",
|
|
393
|
+
"ot",
|
|
394
|
+
"ou",
|
|
395
|
+
"ov",
|
|
396
|
+
"ow",
|
|
397
|
+
"ox",
|
|
398
|
+
"oy",
|
|
399
|
+
"oz",
|
|
400
|
+
"pa",
|
|
401
|
+
"pb",
|
|
402
|
+
"pc",
|
|
403
|
+
"pd",
|
|
404
|
+
"pe",
|
|
405
|
+
"pf",
|
|
406
|
+
"pg",
|
|
407
|
+
"ph",
|
|
408
|
+
"pi",
|
|
409
|
+
"pj",
|
|
410
|
+
"pk",
|
|
411
|
+
"pl",
|
|
412
|
+
"pm",
|
|
413
|
+
"pn",
|
|
414
|
+
"po",
|
|
415
|
+
"pp",
|
|
416
|
+
"pq",
|
|
417
|
+
"pr",
|
|
418
|
+
"ps",
|
|
419
|
+
"pt",
|
|
420
|
+
"pu",
|
|
421
|
+
"pv",
|
|
422
|
+
"pw",
|
|
423
|
+
"px",
|
|
424
|
+
"py",
|
|
425
|
+
"pz",
|
|
426
|
+
"qa",
|
|
427
|
+
"qb",
|
|
428
|
+
"qc",
|
|
429
|
+
"qd",
|
|
430
|
+
"qe",
|
|
431
|
+
"qh",
|
|
432
|
+
"qi",
|
|
433
|
+
"ql",
|
|
434
|
+
"qm",
|
|
435
|
+
"qn",
|
|
436
|
+
"qo",
|
|
437
|
+
"qp",
|
|
438
|
+
"qq",
|
|
439
|
+
"qr",
|
|
440
|
+
"qs",
|
|
441
|
+
"qt",
|
|
442
|
+
"qu",
|
|
443
|
+
"qw",
|
|
444
|
+
"qx",
|
|
445
|
+
"qy",
|
|
446
|
+
"ra",
|
|
447
|
+
"rb",
|
|
448
|
+
"rc",
|
|
449
|
+
"rd",
|
|
450
|
+
"re",
|
|
451
|
+
"rf",
|
|
452
|
+
"rg",
|
|
453
|
+
"rh",
|
|
454
|
+
"ri",
|
|
455
|
+
"rk",
|
|
456
|
+
"rl",
|
|
457
|
+
"rm",
|
|
458
|
+
"rn",
|
|
459
|
+
"ro",
|
|
460
|
+
"rp",
|
|
461
|
+
"rq",
|
|
462
|
+
"rr",
|
|
463
|
+
"rs",
|
|
464
|
+
"rt",
|
|
465
|
+
"ru",
|
|
466
|
+
"rv",
|
|
467
|
+
"rw",
|
|
468
|
+
"rx",
|
|
469
|
+
"ry",
|
|
470
|
+
"rz",
|
|
471
|
+
"sa",
|
|
472
|
+
"sb",
|
|
473
|
+
"sc",
|
|
474
|
+
"sd",
|
|
475
|
+
"se",
|
|
476
|
+
"sf",
|
|
477
|
+
"sg",
|
|
478
|
+
"sh",
|
|
479
|
+
"si",
|
|
480
|
+
"sj",
|
|
481
|
+
"sk",
|
|
482
|
+
"sl",
|
|
483
|
+
"sm",
|
|
484
|
+
"sn",
|
|
485
|
+
"so",
|
|
486
|
+
"sp",
|
|
487
|
+
"sq",
|
|
488
|
+
"sr",
|
|
489
|
+
"ss",
|
|
490
|
+
"st",
|
|
491
|
+
"su",
|
|
492
|
+
"sv",
|
|
493
|
+
"sw",
|
|
494
|
+
"sx",
|
|
495
|
+
"sy",
|
|
496
|
+
"sz",
|
|
497
|
+
"ta",
|
|
498
|
+
"tb",
|
|
499
|
+
"tc",
|
|
500
|
+
"td",
|
|
501
|
+
"te",
|
|
502
|
+
"tf",
|
|
503
|
+
"tg",
|
|
504
|
+
"th",
|
|
505
|
+
"ti",
|
|
506
|
+
"tj",
|
|
507
|
+
"tk",
|
|
508
|
+
"tl",
|
|
509
|
+
"tm",
|
|
510
|
+
"tn",
|
|
511
|
+
"to",
|
|
512
|
+
"tp",
|
|
513
|
+
"tr",
|
|
514
|
+
"ts",
|
|
515
|
+
"tt",
|
|
516
|
+
"tu",
|
|
517
|
+
"tv",
|
|
518
|
+
"tw",
|
|
519
|
+
"tx",
|
|
520
|
+
"ty",
|
|
521
|
+
"tz",
|
|
522
|
+
"ua",
|
|
523
|
+
"ub",
|
|
524
|
+
"uc",
|
|
525
|
+
"ud",
|
|
526
|
+
"ue",
|
|
527
|
+
"uf",
|
|
528
|
+
"ug",
|
|
529
|
+
"uh",
|
|
530
|
+
"ui",
|
|
531
|
+
"uj",
|
|
532
|
+
"uk",
|
|
533
|
+
"ul",
|
|
534
|
+
"um",
|
|
535
|
+
"un",
|
|
536
|
+
"uo",
|
|
537
|
+
"up",
|
|
538
|
+
"uq",
|
|
539
|
+
"ur",
|
|
540
|
+
"us",
|
|
541
|
+
"ut",
|
|
542
|
+
"uu",
|
|
543
|
+
"uv",
|
|
544
|
+
"uw",
|
|
545
|
+
"ux",
|
|
546
|
+
"uy",
|
|
547
|
+
"uz",
|
|
548
|
+
"va",
|
|
549
|
+
"vb",
|
|
550
|
+
"vc",
|
|
551
|
+
"vd",
|
|
552
|
+
"ve",
|
|
553
|
+
"vf",
|
|
554
|
+
"vg",
|
|
555
|
+
"vh",
|
|
556
|
+
"vi",
|
|
557
|
+
"vj",
|
|
558
|
+
"vk",
|
|
559
|
+
"vl",
|
|
560
|
+
"vm",
|
|
561
|
+
"vn",
|
|
562
|
+
"vo",
|
|
563
|
+
"vp",
|
|
564
|
+
"vq",
|
|
565
|
+
"vr",
|
|
566
|
+
"vs",
|
|
567
|
+
"vt",
|
|
568
|
+
"vu",
|
|
569
|
+
"vv",
|
|
570
|
+
"vw",
|
|
571
|
+
"vx",
|
|
572
|
+
"vy",
|
|
573
|
+
"vz",
|
|
574
|
+
"wa",
|
|
575
|
+
"wb",
|
|
576
|
+
"wc",
|
|
577
|
+
"wd",
|
|
578
|
+
"we",
|
|
579
|
+
"wf",
|
|
580
|
+
"wg",
|
|
581
|
+
"wh",
|
|
582
|
+
"wi",
|
|
583
|
+
"wj",
|
|
584
|
+
"wk",
|
|
585
|
+
"wl",
|
|
586
|
+
"wm",
|
|
587
|
+
"wn",
|
|
588
|
+
"wo",
|
|
589
|
+
"wp",
|
|
590
|
+
"wr",
|
|
591
|
+
"ws",
|
|
592
|
+
"wt",
|
|
593
|
+
"wu",
|
|
594
|
+
"wv",
|
|
595
|
+
"ww",
|
|
596
|
+
"wx",
|
|
597
|
+
"wy",
|
|
598
|
+
"xa",
|
|
599
|
+
"xb",
|
|
600
|
+
"xc",
|
|
601
|
+
"xd",
|
|
602
|
+
"xe",
|
|
603
|
+
"xf",
|
|
604
|
+
"xh",
|
|
605
|
+
"xi",
|
|
606
|
+
"xl",
|
|
607
|
+
"xm",
|
|
608
|
+
"xn",
|
|
609
|
+
"xo",
|
|
610
|
+
"xp",
|
|
611
|
+
"xr",
|
|
612
|
+
"xs",
|
|
613
|
+
"xt",
|
|
614
|
+
"xu",
|
|
615
|
+
"xx",
|
|
616
|
+
"xy",
|
|
617
|
+
"xz",
|
|
618
|
+
"ya",
|
|
619
|
+
"yb",
|
|
620
|
+
"yc",
|
|
621
|
+
"yd",
|
|
622
|
+
"ye",
|
|
623
|
+
"yf",
|
|
624
|
+
"yg",
|
|
625
|
+
"yh",
|
|
626
|
+
"yi",
|
|
627
|
+
"yj",
|
|
628
|
+
"yk",
|
|
629
|
+
"yl",
|
|
630
|
+
"ym",
|
|
631
|
+
"yn",
|
|
632
|
+
"yo",
|
|
633
|
+
"yp",
|
|
634
|
+
"yr",
|
|
635
|
+
"ys",
|
|
636
|
+
"yt",
|
|
637
|
+
"yu",
|
|
638
|
+
"yv",
|
|
639
|
+
"yw",
|
|
640
|
+
"yx",
|
|
641
|
+
"yy",
|
|
642
|
+
"yz",
|
|
643
|
+
"za",
|
|
644
|
+
"zb",
|
|
645
|
+
"zc",
|
|
646
|
+
"zd",
|
|
647
|
+
"ze",
|
|
648
|
+
"zf",
|
|
649
|
+
"zg",
|
|
650
|
+
"zh",
|
|
651
|
+
"zi",
|
|
652
|
+
"zk",
|
|
653
|
+
"zl",
|
|
654
|
+
"zm",
|
|
655
|
+
"zn",
|
|
656
|
+
"zo",
|
|
657
|
+
"zp",
|
|
658
|
+
"zr",
|
|
659
|
+
"zs",
|
|
660
|
+
"zt",
|
|
661
|
+
"zu",
|
|
662
|
+
"zw",
|
|
663
|
+
"zx",
|
|
664
|
+
"zy",
|
|
665
|
+
"zz",
|
|
666
|
+
] as const;
|
|
667
|
+
|
|
668
|
+
export const HASHLINE_BIGRAMS_COUNT = HASHLINE_BIGRAMS.length;
|
|
669
|
+
|
|
670
|
+
/**
|
|
671
|
+
* 40 common English BPE bigrams used by chunk checksums (`path#checksum`).
|
|
672
|
+
* Kept separate from {@link HASHLINE_BIGRAMS} because the chunk checksum
|
|
673
|
+
* format is `path#bigram1bigram2` (4 chars from a 1600-code namespace) and
|
|
674
|
+
* is independent of the line-anchor format.
|
|
675
|
+
*
|
|
676
|
+
* Order is stable forever — changing it invalidates every saved chunk path.
|
|
677
|
+
*/
|
|
678
|
+
export const CHUNK_BIGRAMS = [
|
|
679
|
+
"th",
|
|
680
|
+
"he",
|
|
681
|
+
"in",
|
|
682
|
+
"er",
|
|
683
|
+
"an",
|
|
684
|
+
"re",
|
|
685
|
+
"on",
|
|
686
|
+
"at",
|
|
687
|
+
"en",
|
|
688
|
+
"nd",
|
|
689
|
+
"ti",
|
|
690
|
+
"es",
|
|
691
|
+
"or",
|
|
692
|
+
"te",
|
|
693
|
+
"of",
|
|
694
|
+
"ed",
|
|
695
|
+
"is",
|
|
696
|
+
"it",
|
|
697
|
+
"al",
|
|
698
|
+
"ar",
|
|
699
|
+
"st",
|
|
700
|
+
"to",
|
|
701
|
+
"nt",
|
|
702
|
+
"ng",
|
|
703
|
+
"se",
|
|
704
|
+
"ha",
|
|
705
|
+
"as",
|
|
706
|
+
"ou",
|
|
707
|
+
"io",
|
|
708
|
+
"le",
|
|
709
|
+
"ve",
|
|
710
|
+
"co",
|
|
711
|
+
"me",
|
|
712
|
+
"de",
|
|
713
|
+
"hi",
|
|
714
|
+
"ri",
|
|
715
|
+
"ro",
|
|
716
|
+
"ic",
|
|
717
|
+
"ne",
|
|
718
|
+
"ea",
|
|
719
|
+
] as const;
|
|
720
|
+
|
|
721
|
+
export const CHUNK_BIGRAMS_COUNT = CHUNK_BIGRAMS.length;
|
|
8
722
|
|
|
9
|
-
|
|
723
|
+
/**
|
|
724
|
+
* Regex source matching exactly one bigram from {@link HASHLINE_BIGRAMS}.
|
|
725
|
+
* Used by hashline parsers — keep in sync with the alphabet array above.
|
|
726
|
+
*/
|
|
727
|
+
export const HASHLINE_BIGRAM_RE_SRC = `(?:${HASHLINE_BIGRAMS.join("|")})`;
|
|
10
728
|
|
|
11
|
-
const
|
|
12
|
-
const h = i >>> 4;
|
|
13
|
-
const l = i & 0x0f;
|
|
14
|
-
return `${NIBBLE_STR[h]}${NIBBLE_STR[l]}`;
|
|
15
|
-
});
|
|
729
|
+
export const HASHLINE_CONTENT_SEPARATOR = "|";
|
|
16
730
|
|
|
17
731
|
const RE_SIGNIFICANT = /[\p{L}\p{N}]/u;
|
|
18
732
|
|
|
19
733
|
/**
|
|
20
|
-
* Compute a short
|
|
734
|
+
* Compute a short BPE-bigram hash of a single line.
|
|
21
735
|
*
|
|
22
|
-
* Uses xxHash32 on a trailing-whitespace-trimmed, CR-stripped line,
|
|
23
|
-
* {@link
|
|
24
|
-
* punctuation/symbols/whitespace), the line number is mixed in
|
|
25
|
-
* The line input should not include a trailing newline.
|
|
736
|
+
* Uses xxHash32 on a trailing-whitespace-trimmed, CR-stripped line, mapped into
|
|
737
|
+
* {@link HASHLINE_BIGRAMS} via modulo. For lines containing no alphanumeric
|
|
738
|
+
* characters (only punctuation/symbols/whitespace), the line number is mixed in
|
|
739
|
+
* to reduce hash collisions. The line input should not include a trailing newline.
|
|
26
740
|
*/
|
|
27
741
|
export function computeLineHash(idx: number, line: string): string {
|
|
28
742
|
line = line.replace(/\r/g, "").trimEnd();
|
|
@@ -31,20 +745,30 @@ export function computeLineHash(idx: number, line: string): string {
|
|
|
31
745
|
if (!RE_SIGNIFICANT.test(line)) {
|
|
32
746
|
seed = idx;
|
|
33
747
|
}
|
|
34
|
-
return
|
|
748
|
+
return HASHLINE_BIGRAMS[Bun.hash.xxHash32(line, seed) % HASHLINE_BIGRAMS_COUNT];
|
|
35
749
|
}
|
|
36
750
|
|
|
37
751
|
/**
|
|
38
|
-
* Formats
|
|
752
|
+
* Formats an anchor reference given a line number and its text.
|
|
753
|
+
* Returns `LINE+ID` (e.g., `42nd`) — no separator between number and bigram.
|
|
39
754
|
*/
|
|
40
755
|
export function formatLineHash(line: number, lines: string): string {
|
|
41
|
-
return `${line}
|
|
756
|
+
return `${line}${computeLineHash(line, lines)}`;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
/**
|
|
760
|
+
* Formats a single line with a hashline anchor.
|
|
761
|
+
* Returns `LINE+ID|TEXT` (e.g., `42nd|function hi() {\n2er| return;\n3in|}`)
|
|
762
|
+
*/
|
|
763
|
+
export function formatHashLine(lineNumber: number, line: string): string {
|
|
764
|
+
return `${lineNumber}${computeLineHash(lineNumber, line)}${HASHLINE_CONTENT_SEPARATOR}${line}`;
|
|
42
765
|
}
|
|
43
766
|
|
|
44
767
|
/**
|
|
45
768
|
* Format file text with hashline prefixes for display.
|
|
46
769
|
*
|
|
47
|
-
* Each line becomes `
|
|
770
|
+
* Each line becomes `LINE+ID|TEXT` where LINENUM is 1-indexed.
|
|
771
|
+
* No padding on line numbers; pipe separator between anchor and content.
|
|
48
772
|
*
|
|
49
773
|
* @param text - Raw file text string
|
|
50
774
|
* @param startLine - First line number (1-indexed, defaults to 1)
|
|
@@ -53,15 +777,10 @@ export function formatLineHash(line: number, lines: string): string {
|
|
|
53
777
|
* @example
|
|
54
778
|
* ```
|
|
55
779
|
* formatHashLines("function hi() {\n return;\n}")
|
|
56
|
-
* // "
|
|
780
|
+
* // "1th|function hi() {\n2er| return;\n3in|}"
|
|
57
781
|
* ```
|
|
58
782
|
*/
|
|
59
783
|
export function formatHashLines(text: string, startLine = 1): string {
|
|
60
784
|
const lines = text.split("\n");
|
|
61
|
-
return lines
|
|
62
|
-
.map((line, i) => {
|
|
63
|
-
const num = startLine + i;
|
|
64
|
-
return `${formatLineHash(num, line)}:${line}`;
|
|
65
|
-
})
|
|
66
|
-
.join("\n");
|
|
785
|
+
return lines.map((line, i) => formatHashLine(startLine + i, line)).join("\n");
|
|
67
786
|
}
|