@jerryan/pi-hashline-edit 0.7.3 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +7 -5
- package/index.ts +6 -0
- package/package.json +4 -4
- package/src/edit-diff.ts +201 -390
- package/src/edit-response.ts +3 -0
- package/src/edit.ts +141 -78
- package/src/file-kind.ts +130 -167
- package/src/fs-write.ts +76 -76
- package/src/hashline.ts +699 -1071
- package/src/package-info.ts +4 -0
- package/src/path-utils.ts +13 -13
- package/src/read.ts +241 -230
- package/src/runtime.ts +3 -3
- package/src/snapshot.ts +29 -29
- package/src/undo.ts +212 -0
- package/{prompts → tool-descriptions}/edit.md +23 -23
- package/{prompts → tool-descriptions}/read-guidelines.md +1 -1
- package/{prompts → tool-descriptions}/read.md +5 -5
- package/tool-descriptions/undo.md +8 -0
- /package/{prompts → tool-descriptions}/edit-snippet.md +0 -0
- /package/{prompts → tool-descriptions}/read-snippet.md +0 -0
package/src/edit.ts
CHANGED
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
resolveEditAnchors,
|
|
19
19
|
type HashlineToolEdit,
|
|
20
20
|
ANCHOR_SEP,
|
|
21
|
+
CONTENT_SEP,
|
|
21
22
|
} from "./hashline";
|
|
22
23
|
import { loadFileKindAndText } from "./file-kind";
|
|
23
24
|
import { resolveToCwd } from "./path-utils";
|
|
@@ -25,6 +26,7 @@ import { resolveToCwd } from "./path-utils";
|
|
|
25
26
|
import { throwIfAborted } from "./runtime";
|
|
26
27
|
import { getFileSnapshot } from "./snapshot";
|
|
27
28
|
import { buildChangedResponse, buildNoopResponse } from "./edit-response";
|
|
29
|
+
import { setLastEdit } from "./undo";
|
|
28
30
|
|
|
29
31
|
const editEntrySchema = Type.Object(
|
|
30
32
|
{
|
|
@@ -68,15 +70,16 @@ type HashlineEditToolDetails = {
|
|
|
68
70
|
snapshotId?: string;
|
|
69
71
|
classification?: "noop";
|
|
70
72
|
metrics?: EditMetrics;
|
|
73
|
+
package: { name: string; version: string };
|
|
71
74
|
};
|
|
72
75
|
|
|
73
76
|
const EDIT_DESC = readFileSync(
|
|
74
|
-
new URL("../
|
|
77
|
+
new URL("../tool-descriptions/edit.md", import.meta.url),
|
|
75
78
|
"utf-8",
|
|
76
79
|
).trim();
|
|
77
80
|
|
|
78
81
|
const EDIT_PROMPT_SNIPPET = readFileSync(
|
|
79
|
-
new URL("../
|
|
82
|
+
new URL("../tool-descriptions/edit-snippet.md", import.meta.url),
|
|
80
83
|
"utf-8",
|
|
81
84
|
).trim();
|
|
82
85
|
|
|
@@ -107,6 +110,73 @@ export function normalizeEditItems(edits: Record<string, unknown>[]): HashlineTo
|
|
|
107
110
|
});
|
|
108
111
|
}
|
|
109
112
|
|
|
113
|
+
type EditTargetResult =
|
|
114
|
+
| { ok: false; error: string; code?: string }
|
|
115
|
+
| {
|
|
116
|
+
ok: true;
|
|
117
|
+
normalized: string;
|
|
118
|
+
bom: string;
|
|
119
|
+
ending: "\r\n" | "\n";
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
async function resolveEditTarget(
|
|
123
|
+
absolutePath: string,
|
|
124
|
+
path: string,
|
|
125
|
+
accessMode: number,
|
|
126
|
+
): Promise<EditTargetResult> {
|
|
127
|
+
try {
|
|
128
|
+
await fsAccess(absolutePath, accessMode);
|
|
129
|
+
} catch (error: unknown) {
|
|
130
|
+
const code = (error as NodeJS.ErrnoException).code;
|
|
131
|
+
if (code === "ENOENT") {
|
|
132
|
+
return { ok: false, error: `File not found: ${path}` };
|
|
133
|
+
}
|
|
134
|
+
if (code === "EACCES" || code === "EPERM") {
|
|
135
|
+
const action = accessMode & constants.W_OK ? "writable" : "readable";
|
|
136
|
+
return { ok: false, error: `File is not ${action}: ${path}` };
|
|
137
|
+
}
|
|
138
|
+
return { ok: false, error: `Cannot access file: ${path}` };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const file = await loadFileKindAndText(absolutePath);
|
|
142
|
+
if (file.kind === "directory") {
|
|
143
|
+
return {
|
|
144
|
+
ok: false,
|
|
145
|
+
error: `Path is a directory: ${path}. Use ls to inspect directories.`,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
if (file.kind === "image") {
|
|
149
|
+
return {
|
|
150
|
+
ok: false,
|
|
151
|
+
error: `Path is an image file: ${path}. Hashline edit only supports text files.`,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
if (file.kind === "binary") {
|
|
155
|
+
return {
|
|
156
|
+
ok: false,
|
|
157
|
+
error: `Path is a binary file: ${path} (${file.description}). Hashline edit only supports text files.`,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const { bom, text: content } = stripBom(file.text);
|
|
162
|
+
const normalized = normalizeToLF(content);
|
|
163
|
+
if (normalized.length === 0) {
|
|
164
|
+
return {
|
|
165
|
+
ok: false,
|
|
166
|
+
code: "E_EMPTY_FILE",
|
|
167
|
+
error: `File is empty: ${path}. The edit tool requires anchors from a read output, which an empty file cannot provide. Use the write tool to create initial content in an empty file.`,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
ok: true,
|
|
173
|
+
normalized,
|
|
174
|
+
bom,
|
|
175
|
+
ending: detectLineEnding(content),
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
110
180
|
type EditPreview = { diff: string } | { error: string };
|
|
111
181
|
type EditRenderState = {
|
|
112
182
|
argsKey?: string;
|
|
@@ -126,21 +196,66 @@ function getRenderablePreviewInput(args: unknown): EditRequestParams | null {
|
|
|
126
196
|
return request.edits.length > 0 ? request : null;
|
|
127
197
|
}
|
|
128
198
|
|
|
199
|
+
function colorDiffLine(
|
|
200
|
+
line: string,
|
|
201
|
+
theme: { fg: (token: string, text: string) => string },
|
|
202
|
+
): string {
|
|
203
|
+
const prefix = line[0];
|
|
204
|
+
if (prefix !== "-" && prefix !== "+" && prefix !== " ") {
|
|
205
|
+
return theme.fg("dim", line);
|
|
206
|
+
}
|
|
207
|
+
if (line.startsWith("---") || line.startsWith("+++")) {
|
|
208
|
+
return theme.fg("dim", line);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const sepIdx = line.indexOf(CONTENT_SEP);
|
|
212
|
+
if (sepIdx === -1) {
|
|
213
|
+
if (prefix === "-") return theme.fg("error", line);
|
|
214
|
+
if (prefix === "+") return theme.fg("success", line);
|
|
215
|
+
return theme.fg("dim", line);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const meta = line.slice(0, sepIdx); // prefix + lineNum + anchor/pad
|
|
219
|
+
const content = line.slice(sepIdx + CONTENT_SEP.length);
|
|
220
|
+
|
|
221
|
+
const digits = meta.match(/\d+/);
|
|
222
|
+
if (!digits) {
|
|
223
|
+
if (prefix === "-") return theme.fg("error", line);
|
|
224
|
+
if (prefix === "+") return theme.fg("success", line);
|
|
225
|
+
return theme.fg("dim", line);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const lineNumStart = meta.indexOf(digits[0]);
|
|
229
|
+
const lineNumEnd = lineNumStart + digits[0].length;
|
|
230
|
+
const prefixAndLineNum = meta.slice(0, lineNumEnd);
|
|
231
|
+
const anchorAndSep = meta.slice(lineNumEnd) + CONTENT_SEP;
|
|
232
|
+
|
|
233
|
+
if (prefix === "-") {
|
|
234
|
+
return (
|
|
235
|
+
theme.fg("error", prefixAndLineNum) +
|
|
236
|
+
theme.fg("muted", anchorAndSep) +
|
|
237
|
+
theme.fg("error", content)
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
if (prefix === "+") {
|
|
241
|
+
return (
|
|
242
|
+
theme.fg("success", prefixAndLineNum) +
|
|
243
|
+
theme.fg("muted", anchorAndSep) +
|
|
244
|
+
theme.fg("success", content)
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
return (
|
|
248
|
+
theme.fg("dim", prefixAndLineNum) +
|
|
249
|
+
theme.fg("muted", anchorAndSep) +
|
|
250
|
+
theme.fg("dim", content)
|
|
251
|
+
);
|
|
252
|
+
}
|
|
129
253
|
function colorDiffLines(
|
|
130
254
|
lines: string[],
|
|
131
255
|
theme: { fg: (token: string, text: string) => string },
|
|
132
256
|
): string[] {
|
|
133
|
-
return lines.map((line) =>
|
|
134
|
-
if (line.startsWith("+") && !line.startsWith("+++")) {
|
|
135
|
-
return theme.fg("success", line);
|
|
136
|
-
}
|
|
137
|
-
if (line.startsWith("-") && !line.startsWith("---")) {
|
|
138
|
-
return theme.fg("error", line);
|
|
139
|
-
}
|
|
140
|
-
return theme.fg("dim", line);
|
|
141
|
-
});
|
|
257
|
+
return lines.map((line) => colorDiffLine(line, theme));
|
|
142
258
|
}
|
|
143
|
-
|
|
144
259
|
function formatPreviewDiff(
|
|
145
260
|
diff: string,
|
|
146
261
|
expanded: boolean,
|
|
@@ -148,7 +263,7 @@ function formatPreviewDiff(
|
|
|
148
263
|
): string {
|
|
149
264
|
const lines = diff.split("\n");
|
|
150
265
|
const maxLines = expanded ? 40 : 16;
|
|
151
|
-
const shown =
|
|
266
|
+
const shown = lines.slice(0, maxLines).map((line) => colorDiffLine(line, theme));
|
|
152
267
|
|
|
153
268
|
if (lines.length > maxLines) {
|
|
154
269
|
shown.push(theme.fg("muted", `... ${lines.length - maxLines} more diff lines`));
|
|
@@ -160,9 +275,8 @@ function formatResultDiff(
|
|
|
160
275
|
diff: string,
|
|
161
276
|
theme: { fg: (token: string, text: string) => string },
|
|
162
277
|
): string {
|
|
163
|
-
return
|
|
278
|
+
return diff.split("\n").map((line) => colorDiffLine(line, theme)).join("\n");
|
|
164
279
|
}
|
|
165
|
-
|
|
166
280
|
function getRenderedEditTextContent(
|
|
167
281
|
result: { content?: Array<{ type: string; text?: string }> },
|
|
168
282
|
): string | undefined {
|
|
@@ -249,36 +363,13 @@ export async function computeEditPreview(
|
|
|
249
363
|
const absolutePath = resolveToCwd(path, cwd);
|
|
250
364
|
const toolEdits = normalizeEditItems(params.edits);
|
|
251
365
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
const code = (error as NodeJS.ErrnoException).code;
|
|
256
|
-
if (code === "ENOENT") {
|
|
257
|
-
return { error: `File not found: ${path}` };
|
|
258
|
-
}
|
|
259
|
-
if (code === "EACCES" || code === "EPERM") {
|
|
260
|
-
return { error: `File is not readable: ${path}` };
|
|
261
|
-
}
|
|
262
|
-
return { error: `Cannot access file: ${path}` };
|
|
366
|
+
const target = await resolveEditTarget(absolutePath, path, constants.R_OK);
|
|
367
|
+
if (!target.ok) {
|
|
368
|
+
return { error: target.error };
|
|
263
369
|
}
|
|
370
|
+
const originalNormalized = target.normalized;
|
|
264
371
|
|
|
265
372
|
try {
|
|
266
|
-
const file = await loadFileKindAndText(absolutePath);
|
|
267
|
-
if (file.kind === "directory") {
|
|
268
|
-
return { error: `Path is a directory: ${path}. Use ls to inspect directories.` };
|
|
269
|
-
}
|
|
270
|
-
if (file.kind === "image") {
|
|
271
|
-
return {
|
|
272
|
-
error: `Path is an image file: ${path}. Hashline edit only supports UTF-8 text files.`,
|
|
273
|
-
};
|
|
274
|
-
}
|
|
275
|
-
if (file.kind === "binary") {
|
|
276
|
-
return {
|
|
277
|
-
error: `Path is a binary file: ${path} (${file.description}). Hashline edit only supports UTF-8 text files.`,
|
|
278
|
-
};
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
const originalNormalized = normalizeToLF(stripBom(file.text).text);
|
|
282
373
|
const resolved = resolveEditAnchors(toolEdits);
|
|
283
374
|
const result = applyHashlineEdits(originalNormalized, resolved).content;
|
|
284
375
|
|
|
@@ -433,48 +524,20 @@ const editToolDefinition: EditToolDefinition = {
|
|
|
433
524
|
const mutationTargetPath = await resolveMutationTargetPath(absolutePath);
|
|
434
525
|
return withFileMutationQueue(mutationTargetPath, async () => {
|
|
435
526
|
throwIfAborted(signal);
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
if (code === "ENOENT") {
|
|
441
|
-
throw new Error(`File not found: ${path}`);
|
|
442
|
-
}
|
|
443
|
-
if (code === "EACCES" || code === "EPERM") {
|
|
444
|
-
throw new Error(`File is not writable: ${path}`);
|
|
445
|
-
}
|
|
446
|
-
throw new Error(`Cannot access file: ${path}`);
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
throwIfAborted(signal);
|
|
450
|
-
const file = await loadFileKindAndText(absolutePath);
|
|
451
|
-
if (file.kind === "directory") {
|
|
452
|
-
throw new Error(`Path is a directory: ${path}. Use ls to inspect directories.`);
|
|
527
|
+
const target = await resolveEditTarget(absolutePath, path, constants.R_OK | constants.W_OK);
|
|
528
|
+
if (!target.ok) {
|
|
529
|
+
const prefix = target.code ? `[${target.code}] ` : "";
|
|
530
|
+
throw new Error(`${prefix}${target.error}`);
|
|
453
531
|
}
|
|
454
|
-
|
|
455
|
-
throw new Error(
|
|
456
|
-
`Path is an image file: ${path}. Hashline edit only supports UTF-8 text files.`,
|
|
457
|
-
);
|
|
458
|
-
}
|
|
459
|
-
if (file.kind === "binary") {
|
|
460
|
-
throw new Error(
|
|
461
|
-
`Path is a binary file: ${path} (${file.description}). Hashline edit only supports UTF-8 text files.`,
|
|
462
|
-
);
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
throwIfAborted(signal);
|
|
466
|
-
const { bom, text: content } = stripBom(file.text);
|
|
467
|
-
const originalEnding = detectLineEnding(content);
|
|
468
|
-
const originalNormalized = normalizeToLF(content);
|
|
532
|
+
const { bom, normalized: originalNormalized, ending: originalEnding } = target;
|
|
469
533
|
|
|
470
534
|
const resolved = resolveEditAnchors(toolEdits);
|
|
471
535
|
|
|
536
|
+
|
|
472
537
|
const anchorResult = applyHashlineEdits(originalNormalized, resolved, signal);
|
|
473
538
|
const result = anchorResult.content;
|
|
474
539
|
const warnings = anchorResult.warnings;
|
|
475
|
-
const originalLineCount = originalNormalized.length
|
|
476
|
-
? 0
|
|
477
|
-
: originalNormalized.split("\n").length - (originalNormalized.endsWith("\n") ? 1 : 0);
|
|
540
|
+
const originalLineCount = originalNormalized.split("\n").length - (originalNormalized.endsWith("\n") ? 1 : 0);
|
|
478
541
|
if (result.length === 0 && originalLineCount > 50) {
|
|
479
542
|
throw new Error(
|
|
480
543
|
"[E_WOULD_EMPTY] This edit would delete the entire file. The edit tool does not allow full-file deletion for files with more than 50 lines. If you truly intend to clear the file, use the write tool to overwrite it with an empty string.",
|
|
@@ -494,7 +557,7 @@ const editToolDefinition: EditToolDefinition = {
|
|
|
494
557
|
warnings,
|
|
495
558
|
});
|
|
496
559
|
}
|
|
497
|
-
|
|
560
|
+
setLastEdit({ path, previousContent: originalNormalized });
|
|
498
561
|
throwIfAborted(signal);
|
|
499
562
|
await writeFileAtomically(
|
|
500
563
|
absolutePath,
|
package/src/file-kind.ts
CHANGED
|
@@ -1,167 +1,130 @@
|
|
|
1
|
-
import { open as fsOpen, stat as fsStat } from "fs/promises";
|
|
2
|
-
import { fileTypeFromBuffer } from "file-type";
|
|
3
|
-
|
|
4
|
-
const IMAGE_MIME_TYPES = new Set<string>([
|
|
5
|
-
"image/jpeg",
|
|
6
|
-
"image/png",
|
|
7
|
-
"image/gif",
|
|
8
|
-
"image/webp",
|
|
9
|
-
]);
|
|
10
|
-
|
|
11
|
-
const TEXT_LIKE_MIME_TYPES = new Set<string>([
|
|
12
|
-
"application/rtf",
|
|
13
|
-
"application/xml",
|
|
14
|
-
"application/x-ms-regedit",
|
|
15
|
-
]);
|
|
16
|
-
|
|
17
|
-
function isTextLikeMimeType(mimeType: string): boolean {
|
|
18
|
-
return mimeType.startsWith("text/") || TEXT_LIKE_MIME_TYPES.has(mimeType);
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
const FILE_TYPE_SNIFF_BYTES = 8192;
|
|
22
|
-
|
|
23
|
-
export type FileKind =
|
|
24
|
-
| { kind: "directory" }
|
|
25
|
-
| { kind: "image"; mimeType: string }
|
|
26
|
-
| { kind: "text" }
|
|
27
|
-
| { kind: "binary"; description: string };
|
|
28
|
-
|
|
29
|
-
export type LoadedFile =
|
|
30
|
-
| { kind: "directory" }
|
|
31
|
-
| { kind: "image"; mimeType: string }
|
|
32
|
-
| { kind: "text"; text: string }
|
|
33
|
-
| { kind: "binary"; description: string };
|
|
34
|
-
|
|
35
|
-
function hasNullByte(buffer: Uint8Array): boolean {
|
|
36
|
-
return buffer.includes(0);
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
parts.push(
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return {
|
|
132
|
-
kind: "binary",
|
|
133
|
-
description: "invalid UTF-8",
|
|
134
|
-
};
|
|
135
|
-
}
|
|
136
|
-
parts.push(chunkText);
|
|
137
|
-
position += chunkBytesRead;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
const tail = finishUtf8(decoder);
|
|
141
|
-
if (tail === null) {
|
|
142
|
-
return {
|
|
143
|
-
kind: "binary",
|
|
144
|
-
description: "invalid UTF-8",
|
|
145
|
-
};
|
|
146
|
-
}
|
|
147
|
-
parts.push(tail);
|
|
148
|
-
|
|
149
|
-
return { kind: "text", text: parts.join("") };
|
|
150
|
-
} finally {
|
|
151
|
-
await fileHandle.close();
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
export async function classifyFileKind(filePath: string): Promise<FileKind> {
|
|
156
|
-
const loaded = await loadFileKindAndText(filePath);
|
|
157
|
-
switch (loaded.kind) {
|
|
158
|
-
case "directory":
|
|
159
|
-
return loaded;
|
|
160
|
-
case "image":
|
|
161
|
-
return loaded;
|
|
162
|
-
case "binary":
|
|
163
|
-
return loaded;
|
|
164
|
-
case "text":
|
|
165
|
-
return { kind: "text" };
|
|
166
|
-
}
|
|
167
|
-
}
|
|
1
|
+
import { open as fsOpen, stat as fsStat } from "fs/promises";
|
|
2
|
+
import { fileTypeFromBuffer } from "file-type";
|
|
3
|
+
|
|
4
|
+
const IMAGE_MIME_TYPES = new Set<string>([
|
|
5
|
+
"image/jpeg",
|
|
6
|
+
"image/png",
|
|
7
|
+
"image/gif",
|
|
8
|
+
"image/webp",
|
|
9
|
+
]);
|
|
10
|
+
|
|
11
|
+
const TEXT_LIKE_MIME_TYPES = new Set<string>([
|
|
12
|
+
"application/rtf",
|
|
13
|
+
"application/xml",
|
|
14
|
+
"application/x-ms-regedit",
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
function isTextLikeMimeType(mimeType: string): boolean {
|
|
18
|
+
return mimeType.startsWith("text/") || TEXT_LIKE_MIME_TYPES.has(mimeType);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const FILE_TYPE_SNIFF_BYTES = 8192;
|
|
22
|
+
|
|
23
|
+
export type FileKind =
|
|
24
|
+
| { kind: "directory" }
|
|
25
|
+
| { kind: "image"; mimeType: string }
|
|
26
|
+
| { kind: "text" }
|
|
27
|
+
| { kind: "binary"; description: string };
|
|
28
|
+
|
|
29
|
+
export type LoadedFile =
|
|
30
|
+
| { kind: "directory" }
|
|
31
|
+
| { kind: "image"; mimeType: string }
|
|
32
|
+
| { kind: "text"; text: string }
|
|
33
|
+
| { kind: "binary"; description: string };
|
|
34
|
+
|
|
35
|
+
function hasNullByte(buffer: Uint8Array): boolean {
|
|
36
|
+
return buffer.includes(0);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
export async function loadFileKindAndText(filePath: string): Promise<LoadedFile> {
|
|
41
|
+
const pathStat = await fsStat(filePath);
|
|
42
|
+
if (pathStat.isDirectory()) {
|
|
43
|
+
return { kind: "directory" };
|
|
44
|
+
}
|
|
45
|
+
if (!pathStat.isFile()) {
|
|
46
|
+
return {
|
|
47
|
+
kind: "binary",
|
|
48
|
+
description: "unsupported file type",
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const fileHandle = await fsOpen(filePath, "r");
|
|
53
|
+
try {
|
|
54
|
+
const buffer = Buffer.alloc(FILE_TYPE_SNIFF_BYTES);
|
|
55
|
+
const { bytesRead } = await fileHandle.read(buffer, 0, FILE_TYPE_SNIFF_BYTES, 0);
|
|
56
|
+
if (bytesRead === 0) {
|
|
57
|
+
return { kind: "text", text: "" };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const sample = buffer.subarray(0, bytesRead);
|
|
61
|
+
const detectedMimeType = (await fileTypeFromBuffer(sample))?.mime;
|
|
62
|
+
if (detectedMimeType !== undefined && !isTextLikeMimeType(detectedMimeType)) {
|
|
63
|
+
if (IMAGE_MIME_TYPES.has(detectedMimeType)) {
|
|
64
|
+
return { kind: "image", mimeType: detectedMimeType };
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
kind: "binary",
|
|
68
|
+
description: detectedMimeType,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
if (hasNullByte(sample)) {
|
|
72
|
+
return {
|
|
73
|
+
kind: "binary",
|
|
74
|
+
description: "null bytes detected",
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Non-fatal decode, matching pi's built-in tools: invalid UTF-8 becomes
|
|
79
|
+
// U+FFFD rather than rejecting the file. The null-byte guard above is the
|
|
80
|
+
// only signal we treat as binary, so non-UTF-8 text (CP1251, GBK, …) reads
|
|
81
|
+
// instead of forcing the model to bypass hashline with raw shell edits.
|
|
82
|
+
const decoder = new TextDecoder("utf-8");
|
|
83
|
+
const parts: string[] = [decoder.decode(sample, { stream: true })];
|
|
84
|
+
|
|
85
|
+
let position = bytesRead;
|
|
86
|
+
while (true) {
|
|
87
|
+
const { bytesRead: chunkBytesRead } = await fileHandle.read(
|
|
88
|
+
buffer,
|
|
89
|
+
0,
|
|
90
|
+
FILE_TYPE_SNIFF_BYTES,
|
|
91
|
+
position,
|
|
92
|
+
);
|
|
93
|
+
if (chunkBytesRead === 0) {
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const chunk = buffer.subarray(0, chunkBytesRead);
|
|
98
|
+
if (hasNullByte(chunk)) {
|
|
99
|
+
return {
|
|
100
|
+
kind: "binary",
|
|
101
|
+
description: "null bytes detected",
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
parts.push(decoder.decode(chunk, { stream: true }));
|
|
105
|
+
position += chunkBytesRead;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
parts.push(decoder.decode());
|
|
109
|
+
|
|
110
|
+
return { kind: "text", text: parts.join("") };
|
|
111
|
+
|
|
112
|
+
return { kind: "text", text: parts.join("") };
|
|
113
|
+
} finally {
|
|
114
|
+
await fileHandle.close();
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export async function classifyFileKind(filePath: string): Promise<FileKind> {
|
|
119
|
+
const loaded = await loadFileKindAndText(filePath);
|
|
120
|
+
switch (loaded.kind) {
|
|
121
|
+
case "directory":
|
|
122
|
+
return loaded;
|
|
123
|
+
case "image":
|
|
124
|
+
return loaded;
|
|
125
|
+
case "binary":
|
|
126
|
+
return loaded;
|
|
127
|
+
case "text":
|
|
128
|
+
return { kind: "text" };
|
|
129
|
+
}
|
|
130
|
+
}
|