@oh-my-pi/hashline 15.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +78 -0
- package/dist/types/apply.d.ts +10 -0
- package/dist/types/diff-preview.d.ts +12 -0
- package/dist/types/format.d.ts +65 -0
- package/dist/types/fs.d.ts +80 -0
- package/dist/types/index.d.ts +16 -0
- package/dist/types/input.d.ts +85 -0
- package/dist/types/messages.d.ts +56 -0
- package/dist/types/mismatch.d.ts +35 -0
- package/dist/types/normalize.d.ts +20 -0
- package/dist/types/parser.d.ts +50 -0
- package/dist/types/patcher.d.ts +112 -0
- package/dist/types/prefixes.d.ts +34 -0
- package/dist/types/recovery.d.ts +38 -0
- package/dist/types/snapshots.d.ts +67 -0
- package/dist/types/stream.d.ts +2 -0
- package/dist/types/tokenizer.d.ts +104 -0
- package/dist/types/types.d.ts +93 -0
- package/package.json +61 -0
- package/src/apply.ts +799 -0
- package/src/diff-preview.ts +49 -0
- package/src/format.ts +110 -0
- package/src/fs.ts +167 -0
- package/src/grammar.lark +22 -0
- package/src/index.ts +16 -0
- package/src/input.ts +319 -0
- package/src/messages.ts +76 -0
- package/src/mismatch.ts +121 -0
- package/src/normalize.ts +38 -0
- package/src/parser.ts +350 -0
- package/src/patcher.ts +360 -0
- package/src/prefixes.ts +130 -0
- package/src/prompt.md +83 -0
- package/src/recovery.ts +163 -0
- package/src/snapshots.ts +171 -0
- package/src/stream.ts +132 -0
- package/src/tokenizer.ts +486 -0
- package/src/types.ts +87 -0
package/src/recovery.ts
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recover from a stale section file-hash by replaying the would-be edit
|
|
3
|
+
* against a cached pre-edit snapshot of the file and 3-way-merging the
|
|
4
|
+
* result onto the current on-disk content.
|
|
5
|
+
*
|
|
6
|
+
* The patcher consults this when it sees a section hash that doesn't match
|
|
7
|
+
* the live file content. The recovery class is stateless apart from the
|
|
8
|
+
* {@link SnapshotStore} it queries; the snapshot store is the seam that
|
|
9
|
+
* lets you plug in your own caching strategy.
|
|
10
|
+
*/
|
|
11
|
+
import * as Diff from "diff";
|
|
12
|
+
import { applyEdits } from "./apply";
|
|
13
|
+
import { computeFileHash } from "./format";
|
|
14
|
+
import { RECOVERY_EXTERNAL_WARNING, RECOVERY_SESSION_CHAIN_WARNING, RECOVERY_SESSION_REPLAY_WARNING } from "./messages";
|
|
15
|
+
import type { Snapshot, SnapshotStore } from "./snapshots";
|
|
16
|
+
import type { ApplyOptions, ApplyResult, Edit } from "./types";
|
|
17
|
+
|
|
18
|
+
// Section hashes are line-precise; never let Diff.applyPatch slide a hunk
|
|
19
|
+
// onto a duplicate closer 100+ lines away. If snapshot replay does not
|
|
20
|
+
// align exactly, refuse and let the caller re-read.
|
|
21
|
+
const RECOVERY_FUZZ_FACTOR = 0;
|
|
22
|
+
|
|
23
|
+
export interface RecoveryArgs {
|
|
24
|
+
path: string;
|
|
25
|
+
currentText: string;
|
|
26
|
+
fileHash: string;
|
|
27
|
+
edits: readonly Edit[];
|
|
28
|
+
options?: ApplyOptions;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface RecoveryResult {
|
|
32
|
+
/** Post-recovery text. */
|
|
33
|
+
text: string;
|
|
34
|
+
/** First changed line (1-indexed) relative to the live `currentText`, or `undefined`. */
|
|
35
|
+
firstChangedLine: number | undefined;
|
|
36
|
+
/** Warnings collected during recovery, including the user-facing recovery banner. */
|
|
37
|
+
warnings: string[];
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function applyEditsToSnapshot(
|
|
41
|
+
previousText: string,
|
|
42
|
+
currentText: string,
|
|
43
|
+
edits: readonly Edit[],
|
|
44
|
+
options: ApplyOptions,
|
|
45
|
+
recoveryWarning: string,
|
|
46
|
+
): RecoveryResult | null {
|
|
47
|
+
let applied: ApplyResult;
|
|
48
|
+
try {
|
|
49
|
+
applied = applyEdits(previousText, [...edits], options);
|
|
50
|
+
} catch {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
if (applied.text === previousText) return null;
|
|
54
|
+
|
|
55
|
+
const patch = Diff.structuredPatch("file", "file", previousText, applied.text, "", "", { context: 3 });
|
|
56
|
+
const merged = Diff.applyPatch(currentText, patch, { fuzzFactor: RECOVERY_FUZZ_FACTOR });
|
|
57
|
+
if (typeof merged !== "string" || merged === currentText) return null;
|
|
58
|
+
|
|
59
|
+
const firstChangedLine = findFirstChangedLine(currentText, merged) ?? applied.firstChangedLine;
|
|
60
|
+
const hasNetChange = firstChangedLine !== undefined;
|
|
61
|
+
const warnings = hasNetChange ? [recoveryWarning, ...(applied.warnings ?? [])] : [...(applied.warnings ?? [])];
|
|
62
|
+
|
|
63
|
+
return { text: merged, firstChangedLine, warnings };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function replaySessionChainOnCurrent(
|
|
67
|
+
previousText: string,
|
|
68
|
+
currentText: string,
|
|
69
|
+
edits: readonly Edit[],
|
|
70
|
+
options: ApplyOptions,
|
|
71
|
+
): RecoveryResult | null {
|
|
72
|
+
// Only safe when no insert/delete shifted line counts in the prior edit
|
|
73
|
+
// chain: if total line counts match, every line number in `edits` still
|
|
74
|
+
// resolves to the same logical row.
|
|
75
|
+
if (previousText.split("\n").length !== currentText.split("\n").length) return null;
|
|
76
|
+
let applied: ApplyResult;
|
|
77
|
+
try {
|
|
78
|
+
applied = applyEdits(currentText, [...edits], options);
|
|
79
|
+
} catch {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
if (applied.text === currentText) return null;
|
|
83
|
+
return {
|
|
84
|
+
text: applied.text,
|
|
85
|
+
firstChangedLine: applied.firstChangedLine,
|
|
86
|
+
warnings: [RECOVERY_SESSION_REPLAY_WARNING, ...(applied.warnings ?? [])],
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function buildSparseOverlayText(currentText: string, snapshotLines: ReadonlyMap<number, string>): string {
|
|
91
|
+
const overlaid = currentText.split("\n");
|
|
92
|
+
let maxCachedLine = 0;
|
|
93
|
+
for (const lineNum of snapshotLines.keys()) {
|
|
94
|
+
if (lineNum > maxCachedLine) maxCachedLine = lineNum;
|
|
95
|
+
}
|
|
96
|
+
while (overlaid.length < maxCachedLine) overlaid.push("");
|
|
97
|
+
for (const [lineNum, content] of snapshotLines) {
|
|
98
|
+
overlaid[lineNum - 1] = content;
|
|
99
|
+
}
|
|
100
|
+
return overlaid.join("\n");
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** First 1-indexed line at which `a` and `b` diverge, or `undefined` if equal. */
|
|
104
|
+
function findFirstChangedLine(a: string, b: string): number | undefined {
|
|
105
|
+
if (a === b) return undefined;
|
|
106
|
+
const aLines = a.split("\n");
|
|
107
|
+
const bLines = b.split("\n");
|
|
108
|
+
const max = Math.max(aLines.length, bLines.length);
|
|
109
|
+
for (let i = 0; i < max; i++) {
|
|
110
|
+
if (aLines[i] !== bLines[i]) return i + 1;
|
|
111
|
+
}
|
|
112
|
+
return undefined;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function isHeadSnapshot(head: Snapshot | null, snapshot: Snapshot): boolean {
|
|
116
|
+
return head === snapshot;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Stateless recovery driver over a {@link SnapshotStore}. Construct once and
|
|
121
|
+
* call {@link Recovery.tryRecover} per stale-hash incident. The default
|
|
122
|
+
* implementation tries three strategies in order:
|
|
123
|
+
*
|
|
124
|
+
* 1. Apply on the cached `fullText` snapshot, then 3-way-merge onto current.
|
|
125
|
+
* 2. (Session chain) If the snapshot wasn't the head, retry on current text
|
|
126
|
+
* when line counts match — the user's previous edit advanced the hash but
|
|
127
|
+
* didn't shift line numbers.
|
|
128
|
+
* 3. Reconstruct from a sparse snapshot (lines map only), verify the rebuilt
|
|
129
|
+
* text hashes to the expected value, then 3-way-merge.
|
|
130
|
+
*/
|
|
131
|
+
export class Recovery {
|
|
132
|
+
constructor(readonly store: SnapshotStore) {}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Attempt recovery. Returns `null` when no path forward is found — the
|
|
136
|
+
* caller should then surface a {@link MismatchError}.
|
|
137
|
+
*/
|
|
138
|
+
tryRecover(args: RecoveryArgs): RecoveryResult | null {
|
|
139
|
+
const { path, currentText, fileHash, edits, options = {} } = args;
|
|
140
|
+
const head = this.store.head(path);
|
|
141
|
+
const snapshot = this.store.byHash(path, fileHash);
|
|
142
|
+
if (!snapshot || snapshot.lines.size === 0) return null;
|
|
143
|
+
|
|
144
|
+
const isHead = isHeadSnapshot(head, snapshot);
|
|
145
|
+
const recoveryWarning = isHead ? RECOVERY_EXTERNAL_WARNING : RECOVERY_SESSION_CHAIN_WARNING;
|
|
146
|
+
const isSessionChain = !isHead;
|
|
147
|
+
|
|
148
|
+
if (snapshot.fullText !== undefined) {
|
|
149
|
+
const merged = applyEditsToSnapshot(snapshot.fullText, currentText, edits, options, recoveryWarning);
|
|
150
|
+
if (merged !== null) return merged;
|
|
151
|
+
// Session-chain fast-path: prior in-session edit changed the same
|
|
152
|
+
// line(s) the user is now re-targeting with the stale hash. When
|
|
153
|
+
// line counts match, the edits' line numbers still resolve to the
|
|
154
|
+
// right rows — replay onto the current text directly.
|
|
155
|
+
if (isSessionChain) return replaySessionChainOnCurrent(snapshot.fullText, currentText, edits, options);
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const overlayText = buildSparseOverlayText(currentText, snapshot.lines);
|
|
160
|
+
if (computeFileHash(overlayText) !== fileHash) return null;
|
|
161
|
+
return applyEditsToSnapshot(overlayText, currentText, edits, options, recoveryWarning);
|
|
162
|
+
}
|
|
163
|
+
}
|
package/src/snapshots.ts
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-session snapshot store used by {@link Recovery} to rescue patches when
|
|
3
|
+
* a section's file hash has drifted (file changed externally or a prior
|
|
4
|
+
* in-session edit advanced the hash).
|
|
5
|
+
*
|
|
6
|
+
* Producers (typically a `read` tool) record snapshots as they observe file
|
|
7
|
+
* content. Consumers (the patcher) query for the snapshot whose hash matches
|
|
8
|
+
* a stale section, then 3-way-merge the would-be edit onto the live content.
|
|
9
|
+
*
|
|
10
|
+
* The abstract base class lets callers plug in whatever storage they like
|
|
11
|
+
* (LRU, persistent SQLite, etc.). {@link InMemorySnapshotStore} ships as a
|
|
12
|
+
* sensible default backed by `lru-cache` so paths age out automatically.
|
|
13
|
+
*/
|
|
14
|
+
import { LRUCache } from "lru-cache/raw";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* One snapshot of a file as it was observed at a point in time. Either the
|
|
18
|
+
* full text is recorded (`fullText` set) for full-file reads, or a sparse
|
|
19
|
+
* map of `(lineNumber, content)` pairs for partial views (search matches,
|
|
20
|
+
* range reads).
|
|
21
|
+
*/
|
|
22
|
+
export interface Snapshot {
|
|
23
|
+
/** 1-indexed line number → exact content as observed. */
|
|
24
|
+
readonly lines: Map<number, string>;
|
|
25
|
+
/** Full normalized text when the read observed the whole file. */
|
|
26
|
+
fullText?: string;
|
|
27
|
+
/** 4-hex hash carried alongside the read, when known. */
|
|
28
|
+
fileHash?: string;
|
|
29
|
+
/** Timestamp (ms since epoch) the snapshot was recorded. */
|
|
30
|
+
recordedAt: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Optional metadata supplied at snapshot record time. */
|
|
34
|
+
export interface SnapshotMetadata {
|
|
35
|
+
/** Full normalized text, when the producer observed the whole file. */
|
|
36
|
+
fullText?: string;
|
|
37
|
+
/** 4-hex hash carried by the read, when known. */
|
|
38
|
+
fileHash?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Storage seam for file-content snapshots. The patcher calls {@link head}
|
|
43
|
+
* for the latest snapshot of a path and {@link byHash} when it needs the
|
|
44
|
+
* specific historical snapshot that matches a section's stale hash.
|
|
45
|
+
*/
|
|
46
|
+
export abstract class SnapshotStore {
|
|
47
|
+
/** Most-recent snapshot for `path`, or `null` if none. */
|
|
48
|
+
abstract head(path: string): Snapshot | null;
|
|
49
|
+
|
|
50
|
+
/** Most-recent snapshot for `path` whose `fileHash` equals `fileHash`. */
|
|
51
|
+
abstract byHash(path: string, fileHash: string): Snapshot | null;
|
|
52
|
+
|
|
53
|
+
/** Record a contiguous run of lines (e.g. from a `read` tool). `startLine` is 1-indexed. */
|
|
54
|
+
abstract recordContiguous(
|
|
55
|
+
path: string,
|
|
56
|
+
startLine: number,
|
|
57
|
+
lines: readonly string[],
|
|
58
|
+
metadata?: SnapshotMetadata,
|
|
59
|
+
): void;
|
|
60
|
+
|
|
61
|
+
/** Record sparse `(lineNumber, content)` pairs (e.g. a `search` match plus context). */
|
|
62
|
+
abstract recordSparse(path: string, entries: Iterable<readonly [number, string]>, metadata?: SnapshotMetadata): void;
|
|
63
|
+
|
|
64
|
+
/** Drop the snapshot history for a single path. */
|
|
65
|
+
abstract invalidate(path: string): void;
|
|
66
|
+
|
|
67
|
+
/** Drop every snapshot history. */
|
|
68
|
+
abstract clear(): void;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const DEFAULT_MAX_PATHS = 30;
|
|
72
|
+
const DEFAULT_MAX_SNAPSHOTS_PER_PATH = 4;
|
|
73
|
+
|
|
74
|
+
function hasConflict(
|
|
75
|
+
existing: ReadonlyMap<number, string>,
|
|
76
|
+
incoming: ReadonlyArray<readonly [number, string]>,
|
|
77
|
+
): boolean {
|
|
78
|
+
for (const [lineNum, content] of incoming) {
|
|
79
|
+
const prior = existing.get(lineNum);
|
|
80
|
+
if (prior !== undefined && prior !== content) return true;
|
|
81
|
+
}
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function hasHashConflict(existing: Snapshot, metadata: SnapshotMetadata): boolean {
|
|
86
|
+
return metadata.fileHash !== undefined && existing.fileHash !== undefined && metadata.fileHash !== existing.fileHash;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function isSameSnapshotIdentity(left: Snapshot, right: Snapshot): boolean {
|
|
90
|
+
if (left.fileHash !== undefined && right.fileHash !== undefined) return left.fileHash === right.fileHash;
|
|
91
|
+
if (left.fullText !== undefined && right.fullText !== undefined) return left.fullText === right.fullText;
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export interface InMemorySnapshotStoreOptions {
|
|
96
|
+
/** Maximum number of distinct paths tracked at once (default 30). LRU eviction. */
|
|
97
|
+
maxPaths?: number;
|
|
98
|
+
/** Maximum snapshots retained per path (default 4). Oldest dropped first. */
|
|
99
|
+
maxSnapshotsPerPath?: number;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* In-memory {@link SnapshotStore} backed by `lru-cache`. Per-path snapshot
|
|
104
|
+
* history is a short ring (oldest dropped first); per-session path tracking
|
|
105
|
+
* is LRU-bounded so cold paths age out automatically.
|
|
106
|
+
*
|
|
107
|
+
* Newer snapshots merge into the head when their entries don't conflict and
|
|
108
|
+
* the recorded `fileHash` (if any) still agrees; otherwise a fresh snapshot
|
|
109
|
+
* is pushed onto the front of the history list.
|
|
110
|
+
*/
|
|
111
|
+
export class InMemorySnapshotStore extends SnapshotStore {
|
|
112
|
+
readonly #snapshots: LRUCache<string, Snapshot[]>;
|
|
113
|
+
readonly #maxSnapshotsPerPath: number;
|
|
114
|
+
|
|
115
|
+
constructor(options: InMemorySnapshotStoreOptions = {}) {
|
|
116
|
+
super();
|
|
117
|
+
this.#snapshots = new LRUCache<string, Snapshot[]>({ max: options.maxPaths ?? DEFAULT_MAX_PATHS });
|
|
118
|
+
this.#maxSnapshotsPerPath = options.maxSnapshotsPerPath ?? DEFAULT_MAX_SNAPSHOTS_PER_PATH;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
head(path: string): Snapshot | null {
|
|
122
|
+
return this.#snapshots.get(path)?.[0] ?? null;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
byHash(path: string, fileHash: string): Snapshot | null {
|
|
126
|
+
const history = this.#snapshots.get(path);
|
|
127
|
+
return history?.find(entry => entry.fileHash === fileHash) ?? null;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
recordContiguous(path: string, startLine: number, lines: readonly string[], metadata: SnapshotMetadata = {}): void {
|
|
131
|
+
if (lines.length === 0 && metadata.fullText === undefined) return;
|
|
132
|
+
const entries: Array<readonly [number, string]> = lines.map((line, idx) => [startLine + idx, line] as const);
|
|
133
|
+
this.#record(path, entries, metadata);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
recordSparse(path: string, entries: Iterable<readonly [number, string]>, metadata: SnapshotMetadata = {}): void {
|
|
137
|
+
const arr = Array.from(entries);
|
|
138
|
+
if (arr.length === 0 && metadata.fullText === undefined) return;
|
|
139
|
+
this.#record(path, arr, metadata);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
invalidate(path: string): void {
|
|
143
|
+
this.#snapshots.delete(path);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
clear(): void {
|
|
147
|
+
this.#snapshots.clear();
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
#record(path: string, entries: ReadonlyArray<readonly [number, string]>, metadata: SnapshotMetadata): void {
|
|
151
|
+
const history = this.#snapshots.get(path) ?? [];
|
|
152
|
+
const head = history[0];
|
|
153
|
+
const now = Date.now();
|
|
154
|
+
if (head && !hasConflict(head.lines, entries) && !hasHashConflict(head, metadata)) {
|
|
155
|
+
for (const [lineNum, content] of entries) head.lines.set(lineNum, content);
|
|
156
|
+
if (metadata.fullText !== undefined) head.fullText = metadata.fullText;
|
|
157
|
+
if (metadata.fileHash !== undefined) head.fileHash = metadata.fileHash;
|
|
158
|
+
head.recordedAt = now;
|
|
159
|
+
// `get` above already touched LRU recency for this key.
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const nextSnapshot: Snapshot = {
|
|
164
|
+
lines: new Map(entries),
|
|
165
|
+
...metadata,
|
|
166
|
+
recordedAt: now,
|
|
167
|
+
};
|
|
168
|
+
const deduped = history.filter(entry => !isSameSnapshotIdentity(entry, nextSnapshot));
|
|
169
|
+
this.#snapshots.set(path, [nextSnapshot, ...deduped].slice(0, this.#maxSnapshotsPerPath));
|
|
170
|
+
}
|
|
171
|
+
}
|
package/src/stream.ts
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lazily format a stream of UTF-8 bytes into hashline-numbered lines, yielded
|
|
3
|
+
* as bounded text chunks. Used to send `read`-style file content to consumers
|
|
4
|
+
* without materializing the full file at once.
|
|
5
|
+
*
|
|
6
|
+
* Each yielded chunk is at most {@link StreamOptions.maxChunkLines} lines and
|
|
7
|
+
* at most {@link StreamOptions.maxChunkBytes} UTF-8 bytes (whichever fires
|
|
8
|
+
* first).
|
|
9
|
+
*/
|
|
10
|
+
import { formatNumberedLine } from "./format";
|
|
11
|
+
import type { StreamOptions } from "./types";
|
|
12
|
+
|
|
13
|
+
interface ResolvedStreamOptions {
|
|
14
|
+
startLine: number;
|
|
15
|
+
maxChunkLines: number;
|
|
16
|
+
maxChunkBytes: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function resolveStreamOptions(options: StreamOptions): ResolvedStreamOptions {
|
|
20
|
+
return {
|
|
21
|
+
startLine: options.startLine ?? 1,
|
|
22
|
+
maxChunkLines: options.maxChunkLines ?? 200,
|
|
23
|
+
maxChunkBytes: options.maxChunkBytes ?? 64 * 1024,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface ChunkEmitter {
|
|
28
|
+
pushLine: (line: string) => string[];
|
|
29
|
+
flush: () => string | undefined;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function createChunkEmitter(options: ResolvedStreamOptions): ChunkEmitter {
|
|
33
|
+
let lineNumber = options.startLine;
|
|
34
|
+
let outLines: string[] = [];
|
|
35
|
+
let outBytes = 0;
|
|
36
|
+
|
|
37
|
+
const flush = (): string | undefined => {
|
|
38
|
+
if (outLines.length === 0) return undefined;
|
|
39
|
+
const chunk = outLines.join("\n");
|
|
40
|
+
outLines = [];
|
|
41
|
+
outBytes = 0;
|
|
42
|
+
return chunk;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const pushLine = (line: string): string[] => {
|
|
46
|
+
const formatted = formatNumberedLine(lineNumber, line);
|
|
47
|
+
lineNumber++;
|
|
48
|
+
|
|
49
|
+
const chunks: string[] = [];
|
|
50
|
+
const sepBytes = outLines.length === 0 ? 0 : 1;
|
|
51
|
+
const lineBytes = Buffer.byteLength(formatted, "utf-8");
|
|
52
|
+
const wouldOverflow =
|
|
53
|
+
outLines.length >= options.maxChunkLines || outBytes + sepBytes + lineBytes > options.maxChunkBytes;
|
|
54
|
+
|
|
55
|
+
if (outLines.length > 0 && wouldOverflow) {
|
|
56
|
+
const flushed = flush();
|
|
57
|
+
if (flushed) chunks.push(flushed);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
outLines.push(formatted);
|
|
61
|
+
outBytes += (outLines.length === 1 ? 0 : 1) + lineBytes;
|
|
62
|
+
|
|
63
|
+
if (outLines.length >= options.maxChunkLines || outBytes >= options.maxChunkBytes) {
|
|
64
|
+
const flushed = flush();
|
|
65
|
+
if (flushed) chunks.push(flushed);
|
|
66
|
+
}
|
|
67
|
+
return chunks;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
return { pushLine, flush };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function isReadableStream(value: unknown): value is ReadableStream<Uint8Array> {
|
|
74
|
+
return (
|
|
75
|
+
typeof value === "object" &&
|
|
76
|
+
value !== null &&
|
|
77
|
+
"getReader" in value &&
|
|
78
|
+
typeof (value as { getReader?: unknown }).getReader === "function"
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function* bytesFromReadableStream(stream: ReadableStream<Uint8Array>): AsyncGenerator<Uint8Array> {
|
|
83
|
+
const reader = stream.getReader();
|
|
84
|
+
try {
|
|
85
|
+
while (true) {
|
|
86
|
+
const { done, value } = await reader.read();
|
|
87
|
+
if (done) return;
|
|
88
|
+
if (value) yield value;
|
|
89
|
+
}
|
|
90
|
+
} finally {
|
|
91
|
+
reader.releaseLock();
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export async function* streamHashLines(
|
|
96
|
+
source: ReadableStream<Uint8Array> | AsyncIterable<Uint8Array>,
|
|
97
|
+
options: StreamOptions = {},
|
|
98
|
+
): AsyncGenerator<string> {
|
|
99
|
+
const resolved = resolveStreamOptions(options);
|
|
100
|
+
const decoder = new TextDecoder("utf-8");
|
|
101
|
+
const chunks = isReadableStream(source) ? bytesFromReadableStream(source) : source;
|
|
102
|
+
const emitter = createChunkEmitter(resolved);
|
|
103
|
+
|
|
104
|
+
let pending = "";
|
|
105
|
+
let sawAnyLine = false;
|
|
106
|
+
|
|
107
|
+
for await (const chunk of chunks) {
|
|
108
|
+
pending += decoder.decode(chunk, { stream: true });
|
|
109
|
+
let nl = pending.indexOf("\n");
|
|
110
|
+
while (nl !== -1) {
|
|
111
|
+
const raw = pending.slice(0, nl);
|
|
112
|
+
const line = raw.endsWith("\r") ? raw.slice(0, -1) : raw;
|
|
113
|
+
sawAnyLine = true;
|
|
114
|
+
for (const out of emitter.pushLine(line)) yield out;
|
|
115
|
+
pending = pending.slice(nl + 1);
|
|
116
|
+
nl = pending.indexOf("\n");
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
pending += decoder.decode();
|
|
121
|
+
if (pending.length > 0) {
|
|
122
|
+
sawAnyLine = true;
|
|
123
|
+
const tail = pending.endsWith("\r") ? pending.slice(0, -1) : pending;
|
|
124
|
+
for (const out of emitter.pushLine(tail)) yield out;
|
|
125
|
+
}
|
|
126
|
+
if (!sawAnyLine) {
|
|
127
|
+
for (const out of emitter.pushLine("")) yield out;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const last = emitter.flush();
|
|
131
|
+
if (last) yield last;
|
|
132
|
+
}
|