@jerryan/pi-hashline-edit 0.7.4 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +6 -4
- package/index.ts +6 -0
- package/package.json +53 -53
- package/src/edit-diff.ts +201 -390
- package/src/edit.ts +138 -76
- package/src/file-kind.ts +130 -130
- package/src/fs-write.ts +76 -76
- package/src/hashline.ts +699 -1071
- package/src/package-info.ts +1 -1
- package/src/path-utils.ts +13 -13
- package/src/read.ts +3 -3
- package/src/runtime.ts +3 -3
- package/src/snapshot.ts +29 -29
- package/src/undo.ts +212 -0
- package/tool-descriptions/edit.md +23 -23
- package/tool-descriptions/read-guidelines.md +1 -1
- package/tool-descriptions/read.md +5 -5
- package/tool-descriptions/undo.md +8 -0
package/src/edit.ts
CHANGED
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
resolveEditAnchors,
|
|
19
19
|
type HashlineToolEdit,
|
|
20
20
|
ANCHOR_SEP,
|
|
21
|
+
CONTENT_SEP,
|
|
21
22
|
} from "./hashline";
|
|
22
23
|
import { loadFileKindAndText } from "./file-kind";
|
|
23
24
|
import { resolveToCwd } from "./path-utils";
|
|
@@ -25,6 +26,7 @@ import { resolveToCwd } from "./path-utils";
|
|
|
25
26
|
import { throwIfAborted } from "./runtime";
|
|
26
27
|
import { getFileSnapshot } from "./snapshot";
|
|
27
28
|
import { buildChangedResponse, buildNoopResponse } from "./edit-response";
|
|
29
|
+
import { setLastEdit } from "./undo";
|
|
28
30
|
|
|
29
31
|
const editEntrySchema = Type.Object(
|
|
30
32
|
{
|
|
@@ -108,6 +110,73 @@ export function normalizeEditItems(edits: Record<string, unknown>[]): HashlineTo
|
|
|
108
110
|
});
|
|
109
111
|
}
|
|
110
112
|
|
|
113
|
+
type EditTargetResult =
|
|
114
|
+
| { ok: false; error: string; code?: string }
|
|
115
|
+
| {
|
|
116
|
+
ok: true;
|
|
117
|
+
normalized: string;
|
|
118
|
+
bom: string;
|
|
119
|
+
ending: "\r\n" | "\n";
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
async function resolveEditTarget(
|
|
123
|
+
absolutePath: string,
|
|
124
|
+
path: string,
|
|
125
|
+
accessMode: number,
|
|
126
|
+
): Promise<EditTargetResult> {
|
|
127
|
+
try {
|
|
128
|
+
await fsAccess(absolutePath, accessMode);
|
|
129
|
+
} catch (error: unknown) {
|
|
130
|
+
const code = (error as NodeJS.ErrnoException).code;
|
|
131
|
+
if (code === "ENOENT") {
|
|
132
|
+
return { ok: false, error: `File not found: ${path}` };
|
|
133
|
+
}
|
|
134
|
+
if (code === "EACCES" || code === "EPERM") {
|
|
135
|
+
const action = accessMode & constants.W_OK ? "writable" : "readable";
|
|
136
|
+
return { ok: false, error: `File is not ${action}: ${path}` };
|
|
137
|
+
}
|
|
138
|
+
return { ok: false, error: `Cannot access file: ${path}` };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const file = await loadFileKindAndText(absolutePath);
|
|
142
|
+
if (file.kind === "directory") {
|
|
143
|
+
return {
|
|
144
|
+
ok: false,
|
|
145
|
+
error: `Path is a directory: ${path}. Use ls to inspect directories.`,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
if (file.kind === "image") {
|
|
149
|
+
return {
|
|
150
|
+
ok: false,
|
|
151
|
+
error: `Path is an image file: ${path}. Hashline edit only supports text files.`,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
if (file.kind === "binary") {
|
|
155
|
+
return {
|
|
156
|
+
ok: false,
|
|
157
|
+
error: `Path is a binary file: ${path} (${file.description}). Hashline edit only supports text files.`,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const { bom, text: content } = stripBom(file.text);
|
|
162
|
+
const normalized = normalizeToLF(content);
|
|
163
|
+
if (normalized.length === 0) {
|
|
164
|
+
return {
|
|
165
|
+
ok: false,
|
|
166
|
+
code: "E_EMPTY_FILE",
|
|
167
|
+
error: `File is empty: ${path}. The edit tool requires anchors from a read output, which an empty file cannot provide. Use the write tool to create initial content in an empty file.`,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
ok: true,
|
|
173
|
+
normalized,
|
|
174
|
+
bom,
|
|
175
|
+
ending: detectLineEnding(content),
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
111
180
|
type EditPreview = { diff: string } | { error: string };
|
|
112
181
|
type EditRenderState = {
|
|
113
182
|
argsKey?: string;
|
|
@@ -127,21 +196,66 @@ function getRenderablePreviewInput(args: unknown): EditRequestParams | null {
|
|
|
127
196
|
return request.edits.length > 0 ? request : null;
|
|
128
197
|
}
|
|
129
198
|
|
|
199
|
+
function colorDiffLine(
|
|
200
|
+
line: string,
|
|
201
|
+
theme: { fg: (token: string, text: string) => string },
|
|
202
|
+
): string {
|
|
203
|
+
const prefix = line[0];
|
|
204
|
+
if (prefix !== "-" && prefix !== "+" && prefix !== " ") {
|
|
205
|
+
return theme.fg("dim", line);
|
|
206
|
+
}
|
|
207
|
+
if (line.startsWith("---") || line.startsWith("+++")) {
|
|
208
|
+
return theme.fg("dim", line);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const sepIdx = line.indexOf(CONTENT_SEP);
|
|
212
|
+
if (sepIdx === -1) {
|
|
213
|
+
if (prefix === "-") return theme.fg("error", line);
|
|
214
|
+
if (prefix === "+") return theme.fg("success", line);
|
|
215
|
+
return theme.fg("dim", line);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const meta = line.slice(0, sepIdx); // prefix + lineNum + anchor/pad
|
|
219
|
+
const content = line.slice(sepIdx + CONTENT_SEP.length);
|
|
220
|
+
|
|
221
|
+
const digits = meta.match(/\d+/);
|
|
222
|
+
if (!digits) {
|
|
223
|
+
if (prefix === "-") return theme.fg("error", line);
|
|
224
|
+
if (prefix === "+") return theme.fg("success", line);
|
|
225
|
+
return theme.fg("dim", line);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const lineNumStart = meta.indexOf(digits[0]);
|
|
229
|
+
const lineNumEnd = lineNumStart + digits[0].length;
|
|
230
|
+
const prefixAndLineNum = meta.slice(0, lineNumEnd);
|
|
231
|
+
const anchorAndSep = meta.slice(lineNumEnd) + CONTENT_SEP;
|
|
232
|
+
|
|
233
|
+
if (prefix === "-") {
|
|
234
|
+
return (
|
|
235
|
+
theme.fg("error", prefixAndLineNum) +
|
|
236
|
+
theme.fg("muted", anchorAndSep) +
|
|
237
|
+
theme.fg("error", content)
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
if (prefix === "+") {
|
|
241
|
+
return (
|
|
242
|
+
theme.fg("success", prefixAndLineNum) +
|
|
243
|
+
theme.fg("muted", anchorAndSep) +
|
|
244
|
+
theme.fg("success", content)
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
return (
|
|
248
|
+
theme.fg("dim", prefixAndLineNum) +
|
|
249
|
+
theme.fg("muted", anchorAndSep) +
|
|
250
|
+
theme.fg("dim", content)
|
|
251
|
+
);
|
|
252
|
+
}
|
|
130
253
|
function colorDiffLines(
|
|
131
254
|
lines: string[],
|
|
132
255
|
theme: { fg: (token: string, text: string) => string },
|
|
133
256
|
): string[] {
|
|
134
|
-
return lines.map((line) =>
|
|
135
|
-
if (line.startsWith("+") && !line.startsWith("+++")) {
|
|
136
|
-
return theme.fg("success", line);
|
|
137
|
-
}
|
|
138
|
-
if (line.startsWith("-") && !line.startsWith("---")) {
|
|
139
|
-
return theme.fg("error", line);
|
|
140
|
-
}
|
|
141
|
-
return theme.fg("dim", line);
|
|
142
|
-
});
|
|
257
|
+
return lines.map((line) => colorDiffLine(line, theme));
|
|
143
258
|
}
|
|
144
|
-
|
|
145
259
|
function formatPreviewDiff(
|
|
146
260
|
diff: string,
|
|
147
261
|
expanded: boolean,
|
|
@@ -149,7 +263,7 @@ function formatPreviewDiff(
|
|
|
149
263
|
): string {
|
|
150
264
|
const lines = diff.split("\n");
|
|
151
265
|
const maxLines = expanded ? 40 : 16;
|
|
152
|
-
const shown =
|
|
266
|
+
const shown = lines.slice(0, maxLines).map((line) => colorDiffLine(line, theme));
|
|
153
267
|
|
|
154
268
|
if (lines.length > maxLines) {
|
|
155
269
|
shown.push(theme.fg("muted", `... ${lines.length - maxLines} more diff lines`));
|
|
@@ -161,9 +275,8 @@ function formatResultDiff(
|
|
|
161
275
|
diff: string,
|
|
162
276
|
theme: { fg: (token: string, text: string) => string },
|
|
163
277
|
): string {
|
|
164
|
-
return
|
|
278
|
+
return diff.split("\n").map((line) => colorDiffLine(line, theme)).join("\n");
|
|
165
279
|
}
|
|
166
|
-
|
|
167
280
|
function getRenderedEditTextContent(
|
|
168
281
|
result: { content?: Array<{ type: string; text?: string }> },
|
|
169
282
|
): string | undefined {
|
|
@@ -250,36 +363,13 @@ export async function computeEditPreview(
|
|
|
250
363
|
const absolutePath = resolveToCwd(path, cwd);
|
|
251
364
|
const toolEdits = normalizeEditItems(params.edits);
|
|
252
365
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
const code = (error as NodeJS.ErrnoException).code;
|
|
257
|
-
if (code === "ENOENT") {
|
|
258
|
-
return { error: `File not found: ${path}` };
|
|
259
|
-
}
|
|
260
|
-
if (code === "EACCES" || code === "EPERM") {
|
|
261
|
-
return { error: `File is not readable: ${path}` };
|
|
262
|
-
}
|
|
263
|
-
return { error: `Cannot access file: ${path}` };
|
|
366
|
+
const target = await resolveEditTarget(absolutePath, path, constants.R_OK);
|
|
367
|
+
if (!target.ok) {
|
|
368
|
+
return { error: target.error };
|
|
264
369
|
}
|
|
370
|
+
const originalNormalized = target.normalized;
|
|
265
371
|
|
|
266
372
|
try {
|
|
267
|
-
const file = await loadFileKindAndText(absolutePath);
|
|
268
|
-
if (file.kind === "directory") {
|
|
269
|
-
return { error: `Path is a directory: ${path}. Use ls to inspect directories.` };
|
|
270
|
-
}
|
|
271
|
-
if (file.kind === "image") {
|
|
272
|
-
return {
|
|
273
|
-
error: `Path is an image file: ${path}. Hashline edit only supports text files.`,
|
|
274
|
-
};
|
|
275
|
-
}
|
|
276
|
-
if (file.kind === "binary") {
|
|
277
|
-
return {
|
|
278
|
-
error: `Path is a binary file: ${path} (${file.description}). Hashline edit only supports text files.`,
|
|
279
|
-
};
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
const originalNormalized = normalizeToLF(stripBom(file.text).text);
|
|
283
373
|
const resolved = resolveEditAnchors(toolEdits);
|
|
284
374
|
const result = applyHashlineEdits(originalNormalized, resolved).content;
|
|
285
375
|
|
|
@@ -434,48 +524,20 @@ const editToolDefinition: EditToolDefinition = {
|
|
|
434
524
|
const mutationTargetPath = await resolveMutationTargetPath(absolutePath);
|
|
435
525
|
return withFileMutationQueue(mutationTargetPath, async () => {
|
|
436
526
|
throwIfAborted(signal);
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
if (code === "ENOENT") {
|
|
442
|
-
throw new Error(`File not found: ${path}`);
|
|
443
|
-
}
|
|
444
|
-
if (code === "EACCES" || code === "EPERM") {
|
|
445
|
-
throw new Error(`File is not writable: ${path}`);
|
|
446
|
-
}
|
|
447
|
-
throw new Error(`Cannot access file: ${path}`);
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
throwIfAborted(signal);
|
|
451
|
-
const file = await loadFileKindAndText(absolutePath);
|
|
452
|
-
if (file.kind === "directory") {
|
|
453
|
-
throw new Error(`Path is a directory: ${path}. Use ls to inspect directories.`);
|
|
527
|
+
const target = await resolveEditTarget(absolutePath, path, constants.R_OK | constants.W_OK);
|
|
528
|
+
if (!target.ok) {
|
|
529
|
+
const prefix = target.code ? `[${target.code}] ` : "";
|
|
530
|
+
throw new Error(`${prefix}${target.error}`);
|
|
454
531
|
}
|
|
455
|
-
|
|
456
|
-
throw new Error(
|
|
457
|
-
`Path is an image file: ${path}. Hashline edit only supports text files.`,
|
|
458
|
-
);
|
|
459
|
-
}
|
|
460
|
-
if (file.kind === "binary") {
|
|
461
|
-
throw new Error(
|
|
462
|
-
`Path is a binary file: ${path} (${file.description}). Hashline edit only supports text files.`,
|
|
463
|
-
);
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
throwIfAborted(signal);
|
|
467
|
-
const { bom, text: content } = stripBom(file.text);
|
|
468
|
-
const originalEnding = detectLineEnding(content);
|
|
469
|
-
const originalNormalized = normalizeToLF(content);
|
|
532
|
+
const { bom, normalized: originalNormalized, ending: originalEnding } = target;
|
|
470
533
|
|
|
471
534
|
const resolved = resolveEditAnchors(toolEdits);
|
|
472
535
|
|
|
536
|
+
|
|
473
537
|
const anchorResult = applyHashlineEdits(originalNormalized, resolved, signal);
|
|
474
538
|
const result = anchorResult.content;
|
|
475
539
|
const warnings = anchorResult.warnings;
|
|
476
|
-
const originalLineCount = originalNormalized.length
|
|
477
|
-
? 0
|
|
478
|
-
: originalNormalized.split("\n").length - (originalNormalized.endsWith("\n") ? 1 : 0);
|
|
540
|
+
const originalLineCount = originalNormalized.split("\n").length - (originalNormalized.endsWith("\n") ? 1 : 0);
|
|
479
541
|
if (result.length === 0 && originalLineCount > 50) {
|
|
480
542
|
throw new Error(
|
|
481
543
|
"[E_WOULD_EMPTY] This edit would delete the entire file. The edit tool does not allow full-file deletion for files with more than 50 lines. If you truly intend to clear the file, use the write tool to overwrite it with an empty string.",
|
|
@@ -495,7 +557,7 @@ const editToolDefinition: EditToolDefinition = {
|
|
|
495
557
|
warnings,
|
|
496
558
|
});
|
|
497
559
|
}
|
|
498
|
-
|
|
560
|
+
setLastEdit({ path, previousContent: originalNormalized });
|
|
499
561
|
throwIfAborted(signal);
|
|
500
562
|
await writeFileAtomically(
|
|
501
563
|
absolutePath,
|
package/src/file-kind.ts
CHANGED
|
@@ -1,130 +1,130 @@
|
|
|
1
|
-
import { open as fsOpen, stat as fsStat } from "fs/promises";
|
|
2
|
-
import { fileTypeFromBuffer } from "file-type";
|
|
3
|
-
|
|
4
|
-
const IMAGE_MIME_TYPES = new Set<string>([
|
|
5
|
-
"image/jpeg",
|
|
6
|
-
"image/png",
|
|
7
|
-
"image/gif",
|
|
8
|
-
"image/webp",
|
|
9
|
-
]);
|
|
10
|
-
|
|
11
|
-
const TEXT_LIKE_MIME_TYPES = new Set<string>([
|
|
12
|
-
"application/rtf",
|
|
13
|
-
"application/xml",
|
|
14
|
-
"application/x-ms-regedit",
|
|
15
|
-
]);
|
|
16
|
-
|
|
17
|
-
function isTextLikeMimeType(mimeType: string): boolean {
|
|
18
|
-
return mimeType.startsWith("text/") || TEXT_LIKE_MIME_TYPES.has(mimeType);
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
const FILE_TYPE_SNIFF_BYTES = 8192;
|
|
22
|
-
|
|
23
|
-
export type FileKind =
|
|
24
|
-
| { kind: "directory" }
|
|
25
|
-
| { kind: "image"; mimeType: string }
|
|
26
|
-
| { kind: "text" }
|
|
27
|
-
| { kind: "binary"; description: string };
|
|
28
|
-
|
|
29
|
-
export type LoadedFile =
|
|
30
|
-
| { kind: "directory" }
|
|
31
|
-
| { kind: "image"; mimeType: string }
|
|
32
|
-
| { kind: "text"; text: string }
|
|
33
|
-
| { kind: "binary"; description: string };
|
|
34
|
-
|
|
35
|
-
function hasNullByte(buffer: Uint8Array): boolean {
|
|
36
|
-
return buffer.includes(0);
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
export async function loadFileKindAndText(filePath: string): Promise<LoadedFile> {
|
|
41
|
-
const pathStat = await fsStat(filePath);
|
|
42
|
-
if (pathStat.isDirectory()) {
|
|
43
|
-
return { kind: "directory" };
|
|
44
|
-
}
|
|
45
|
-
if (!pathStat.isFile()) {
|
|
46
|
-
return {
|
|
47
|
-
kind: "binary",
|
|
48
|
-
description: "unsupported file type",
|
|
49
|
-
};
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
const fileHandle = await fsOpen(filePath, "r");
|
|
53
|
-
try {
|
|
54
|
-
const buffer = Buffer.alloc(FILE_TYPE_SNIFF_BYTES);
|
|
55
|
-
const { bytesRead } = await fileHandle.read(buffer, 0, FILE_TYPE_SNIFF_BYTES, 0);
|
|
56
|
-
if (bytesRead === 0) {
|
|
57
|
-
return { kind: "text", text: "" };
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
const sample = buffer.subarray(0, bytesRead);
|
|
61
|
-
const detectedMimeType = (await fileTypeFromBuffer(sample))?.mime;
|
|
62
|
-
if (detectedMimeType !== undefined && !isTextLikeMimeType(detectedMimeType)) {
|
|
63
|
-
if (IMAGE_MIME_TYPES.has(detectedMimeType)) {
|
|
64
|
-
return { kind: "image", mimeType: detectedMimeType };
|
|
65
|
-
}
|
|
66
|
-
return {
|
|
67
|
-
kind: "binary",
|
|
68
|
-
description: detectedMimeType,
|
|
69
|
-
};
|
|
70
|
-
}
|
|
71
|
-
if (hasNullByte(sample)) {
|
|
72
|
-
return {
|
|
73
|
-
kind: "binary",
|
|
74
|
-
description: "null bytes detected",
|
|
75
|
-
};
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// Non-fatal decode, matching pi's built-in tools: invalid UTF-8 becomes
|
|
79
|
-
// U+FFFD rather than rejecting the file. The null-byte guard above is the
|
|
80
|
-
// only signal we treat as binary, so non-UTF-8 text (CP1251, GBK, …) reads
|
|
81
|
-
// instead of forcing the model to bypass hashline with raw shell edits.
|
|
82
|
-
const decoder = new TextDecoder("utf-8");
|
|
83
|
-
const parts: string[] = [decoder.decode(sample, { stream: true })];
|
|
84
|
-
|
|
85
|
-
let position = bytesRead;
|
|
86
|
-
while (true) {
|
|
87
|
-
const { bytesRead: chunkBytesRead } = await fileHandle.read(
|
|
88
|
-
buffer,
|
|
89
|
-
0,
|
|
90
|
-
FILE_TYPE_SNIFF_BYTES,
|
|
91
|
-
position,
|
|
92
|
-
);
|
|
93
|
-
if (chunkBytesRead === 0) {
|
|
94
|
-
break;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
const chunk = buffer.subarray(0, chunkBytesRead);
|
|
98
|
-
if (hasNullByte(chunk)) {
|
|
99
|
-
return {
|
|
100
|
-
kind: "binary",
|
|
101
|
-
description: "null bytes detected",
|
|
102
|
-
};
|
|
103
|
-
}
|
|
104
|
-
parts.push(decoder.decode(chunk, { stream: true }));
|
|
105
|
-
position += chunkBytesRead;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
parts.push(decoder.decode());
|
|
109
|
-
|
|
110
|
-
return { kind: "text", text: parts.join("") };
|
|
111
|
-
|
|
112
|
-
return { kind: "text", text: parts.join("") };
|
|
113
|
-
} finally {
|
|
114
|
-
await fileHandle.close();
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
export async function classifyFileKind(filePath: string): Promise<FileKind> {
|
|
119
|
-
const loaded = await loadFileKindAndText(filePath);
|
|
120
|
-
switch (loaded.kind) {
|
|
121
|
-
case "directory":
|
|
122
|
-
return loaded;
|
|
123
|
-
case "image":
|
|
124
|
-
return loaded;
|
|
125
|
-
case "binary":
|
|
126
|
-
return loaded;
|
|
127
|
-
case "text":
|
|
128
|
-
return { kind: "text" };
|
|
129
|
-
}
|
|
130
|
-
}
|
|
1
|
+
import { open as fsOpen, stat as fsStat } from "fs/promises";
|
|
2
|
+
import { fileTypeFromBuffer } from "file-type";
|
|
3
|
+
|
|
4
|
+
const IMAGE_MIME_TYPES = new Set<string>([
|
|
5
|
+
"image/jpeg",
|
|
6
|
+
"image/png",
|
|
7
|
+
"image/gif",
|
|
8
|
+
"image/webp",
|
|
9
|
+
]);
|
|
10
|
+
|
|
11
|
+
const TEXT_LIKE_MIME_TYPES = new Set<string>([
|
|
12
|
+
"application/rtf",
|
|
13
|
+
"application/xml",
|
|
14
|
+
"application/x-ms-regedit",
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
function isTextLikeMimeType(mimeType: string): boolean {
|
|
18
|
+
return mimeType.startsWith("text/") || TEXT_LIKE_MIME_TYPES.has(mimeType);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const FILE_TYPE_SNIFF_BYTES = 8192;
|
|
22
|
+
|
|
23
|
+
export type FileKind =
|
|
24
|
+
| { kind: "directory" }
|
|
25
|
+
| { kind: "image"; mimeType: string }
|
|
26
|
+
| { kind: "text" }
|
|
27
|
+
| { kind: "binary"; description: string };
|
|
28
|
+
|
|
29
|
+
export type LoadedFile =
|
|
30
|
+
| { kind: "directory" }
|
|
31
|
+
| { kind: "image"; mimeType: string }
|
|
32
|
+
| { kind: "text"; text: string }
|
|
33
|
+
| { kind: "binary"; description: string };
|
|
34
|
+
|
|
35
|
+
function hasNullByte(buffer: Uint8Array): boolean {
|
|
36
|
+
return buffer.includes(0);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
export async function loadFileKindAndText(filePath: string): Promise<LoadedFile> {
|
|
41
|
+
const pathStat = await fsStat(filePath);
|
|
42
|
+
if (pathStat.isDirectory()) {
|
|
43
|
+
return { kind: "directory" };
|
|
44
|
+
}
|
|
45
|
+
if (!pathStat.isFile()) {
|
|
46
|
+
return {
|
|
47
|
+
kind: "binary",
|
|
48
|
+
description: "unsupported file type",
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const fileHandle = await fsOpen(filePath, "r");
|
|
53
|
+
try {
|
|
54
|
+
const buffer = Buffer.alloc(FILE_TYPE_SNIFF_BYTES);
|
|
55
|
+
const { bytesRead } = await fileHandle.read(buffer, 0, FILE_TYPE_SNIFF_BYTES, 0);
|
|
56
|
+
if (bytesRead === 0) {
|
|
57
|
+
return { kind: "text", text: "" };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const sample = buffer.subarray(0, bytesRead);
|
|
61
|
+
const detectedMimeType = (await fileTypeFromBuffer(sample))?.mime;
|
|
62
|
+
if (detectedMimeType !== undefined && !isTextLikeMimeType(detectedMimeType)) {
|
|
63
|
+
if (IMAGE_MIME_TYPES.has(detectedMimeType)) {
|
|
64
|
+
return { kind: "image", mimeType: detectedMimeType };
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
kind: "binary",
|
|
68
|
+
description: detectedMimeType,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
if (hasNullByte(sample)) {
|
|
72
|
+
return {
|
|
73
|
+
kind: "binary",
|
|
74
|
+
description: "null bytes detected",
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Non-fatal decode, matching pi's built-in tools: invalid UTF-8 becomes
|
|
79
|
+
// U+FFFD rather than rejecting the file. The null-byte guard above is the
|
|
80
|
+
// only signal we treat as binary, so non-UTF-8 text (CP1251, GBK, …) reads
|
|
81
|
+
// instead of forcing the model to bypass hashline with raw shell edits.
|
|
82
|
+
const decoder = new TextDecoder("utf-8");
|
|
83
|
+
const parts: string[] = [decoder.decode(sample, { stream: true })];
|
|
84
|
+
|
|
85
|
+
let position = bytesRead;
|
|
86
|
+
while (true) {
|
|
87
|
+
const { bytesRead: chunkBytesRead } = await fileHandle.read(
|
|
88
|
+
buffer,
|
|
89
|
+
0,
|
|
90
|
+
FILE_TYPE_SNIFF_BYTES,
|
|
91
|
+
position,
|
|
92
|
+
);
|
|
93
|
+
if (chunkBytesRead === 0) {
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const chunk = buffer.subarray(0, chunkBytesRead);
|
|
98
|
+
if (hasNullByte(chunk)) {
|
|
99
|
+
return {
|
|
100
|
+
kind: "binary",
|
|
101
|
+
description: "null bytes detected",
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
parts.push(decoder.decode(chunk, { stream: true }));
|
|
105
|
+
position += chunkBytesRead;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
parts.push(decoder.decode());
|
|
109
|
+
|
|
110
|
+
return { kind: "text", text: parts.join("") };
|
|
111
|
+
|
|
112
|
+
return { kind: "text", text: parts.join("") };
|
|
113
|
+
} finally {
|
|
114
|
+
await fileHandle.close();
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export async function classifyFileKind(filePath: string): Promise<FileKind> {
|
|
119
|
+
const loaded = await loadFileKindAndText(filePath);
|
|
120
|
+
switch (loaded.kind) {
|
|
121
|
+
case "directory":
|
|
122
|
+
return loaded;
|
|
123
|
+
case "image":
|
|
124
|
+
return loaded;
|
|
125
|
+
case "binary":
|
|
126
|
+
return loaded;
|
|
127
|
+
case "text":
|
|
128
|
+
return { kind: "text" };
|
|
129
|
+
}
|
|
130
|
+
}
|