@prometheus-ai/hashline 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -0
- package/dist/types/apply.d.ts +8 -0
- package/dist/types/block.d.ts +24 -0
- package/dist/types/diff-preview.d.ts +12 -0
- package/dist/types/format.d.ts +76 -0
- package/dist/types/fs.d.ts +80 -0
- package/dist/types/index.d.ts +17 -0
- package/dist/types/input.d.ts +100 -0
- package/dist/types/messages.d.ts +85 -0
- package/dist/types/mismatch.d.ts +44 -0
- package/dist/types/normalize.d.ts +20 -0
- package/dist/types/parser.d.ts +23 -0
- package/dist/types/patcher.d.ts +109 -0
- package/dist/types/prefixes.d.ts +34 -0
- package/dist/types/recovery.d.ts +40 -0
- package/dist/types/snapshots.d.ts +55 -0
- package/dist/types/stream.d.ts +2 -0
- package/dist/types/tokenizer.d.ts +65 -0
- package/dist/types/types.d.ts +129 -0
- package/package.json +62 -0
- package/src/apply.ts +586 -0
- package/src/block.ts +84 -0
- package/src/diff-preview.ts +49 -0
- package/src/format.ts +134 -0
- package/src/fs.ts +167 -0
- package/src/grammar.lark +25 -0
- package/src/index.ts +17 -0
- package/src/input.ts +423 -0
- package/src/messages.ts +128 -0
- package/src/mismatch.ts +138 -0
- package/src/normalize.ts +38 -0
- package/src/parser.ts +325 -0
- package/src/patcher.ts +392 -0
- package/src/prefixes.ts +132 -0
- package/src/prompt.md +109 -0
- package/src/recovery.ts +186 -0
- package/src/snapshots.ts +128 -0
- package/src/stream.ts +132 -0
- package/src/tokenizer.ts +471 -0
- package/src/types.ts +132 -0
package/src/recovery.ts
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recover from a stale section snapshot tag by replaying the would-be edit
|
|
3
|
+
* against a cached pre-edit snapshot of the file and 3-way-merging the
|
|
4
|
+
* result onto the current on-disk content.
|
|
5
|
+
*
|
|
6
|
+
* The patcher consults this when a section tag resolves to a snapshot that no
|
|
7
|
+
* longer matches the live file content. The recovery class is stateless apart
|
|
8
|
+
* from the {@link SnapshotStore} it queries; the snapshot store is the seam
|
|
9
|
+
* lets you plug in your own caching strategy.
|
|
10
|
+
*/
|
|
11
|
+
import * as Diff from "diff";
|
|
12
|
+
import { applyEdits } from "./apply";
|
|
13
|
+
import { RECOVERY_EXTERNAL_WARNING, RECOVERY_SESSION_CHAIN_WARNING, RECOVERY_SESSION_REPLAY_WARNING } from "./messages";
|
|
14
|
+
import type { Snapshot, SnapshotStore } from "./snapshots";
|
|
15
|
+
import type { Anchor, ApplyResult, Edit } from "./types";
|
|
16
|
+
|
|
17
|
+
// Section tags are line-precise; never let Diff.applyPatch slide a hunk
|
|
18
|
+
// onto a duplicate closer 100+ lines away. If snapshot replay does not
|
|
19
|
+
// align exactly, refuse and let the caller re-read.
|
|
20
|
+
const RECOVERY_FUZZ_FACTOR = 0;
|
|
21
|
+
|
|
22
|
+
export interface RecoveryArgs {
|
|
23
|
+
path: string;
|
|
24
|
+
currentText: string;
|
|
25
|
+
fileHash: string;
|
|
26
|
+
edits: readonly Edit[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface RecoveryResult {
|
|
30
|
+
/** Post-recovery text. */
|
|
31
|
+
text: string;
|
|
32
|
+
/** First changed line (1-indexed) relative to the live `currentText`, or `undefined`. */
|
|
33
|
+
firstChangedLine: number | undefined;
|
|
34
|
+
/** Warnings collected during recovery, including the user-facing recovery banner. */
|
|
35
|
+
warnings: string[];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function applyEditsToSnapshot(
|
|
39
|
+
previousText: string,
|
|
40
|
+
currentText: string,
|
|
41
|
+
edits: readonly Edit[],
|
|
42
|
+
recoveryWarning: string,
|
|
43
|
+
): RecoveryResult | null {
|
|
44
|
+
let applied: ApplyResult;
|
|
45
|
+
try {
|
|
46
|
+
applied = applyEdits(previousText, [...edits]);
|
|
47
|
+
} catch {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
if (applied.text === previousText) return null;
|
|
51
|
+
|
|
52
|
+
const patch = Diff.structuredPatch("file", "file", previousText, applied.text, "", "", { context: 3 });
|
|
53
|
+
const merged = Diff.applyPatch(currentText, patch, { fuzzFactor: RECOVERY_FUZZ_FACTOR });
|
|
54
|
+
if (typeof merged !== "string" || merged === currentText) return null;
|
|
55
|
+
|
|
56
|
+
const firstChangedLine = findFirstChangedLine(currentText, merged) ?? applied.firstChangedLine;
|
|
57
|
+
const hasNetChange = firstChangedLine !== undefined;
|
|
58
|
+
const warnings = hasNetChange ? [recoveryWarning, ...(applied.warnings ?? [])] : [...(applied.warnings ?? [])];
|
|
59
|
+
|
|
60
|
+
return { text: merged, firstChangedLine, warnings };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function collectAnchorLines(edits: readonly Edit[]): number[] {
|
|
64
|
+
const lines: number[] = [];
|
|
65
|
+
for (const edit of edits) {
|
|
66
|
+
for (const anchor of getEditAnchors(edit)) lines.push(anchor.line);
|
|
67
|
+
}
|
|
68
|
+
return lines;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function getEditAnchors(edit: Edit): Anchor[] {
|
|
72
|
+
if (edit.kind === "delete") return [edit.anchor];
|
|
73
|
+
// Recovery only ever receives already-resolved edits (no `block`); this arm
|
|
74
|
+
// exists for type-exhaustiveness over the full `Edit` union.
|
|
75
|
+
if (edit.kind === "block") return [edit.anchor];
|
|
76
|
+
return edit.cursor.kind === "before_anchor" || edit.cursor.kind === "after_anchor" ? [edit.cursor.anchor] : [];
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Returns true when every anchor line in `edits` has identical content in
|
|
81
|
+
* `previousText` and `currentText`. The session-chain replay fast-path
|
|
82
|
+
* requires this: if the prior in-session edit rewrote the line the model is
|
|
83
|
+
* now re-targeting with a stale hash, replaying onto current would silently
|
|
84
|
+
* overwrite the new content with whatever the model authored against the
|
|
85
|
+
* old content — a corruption window, not a recovery.
|
|
86
|
+
*/
|
|
87
|
+
function verifyAnchorContent(previousText: string, currentText: string, edits: readonly Edit[]): boolean {
|
|
88
|
+
const lines = collectAnchorLines(edits);
|
|
89
|
+
if (lines.length === 0) return true;
|
|
90
|
+
const prev = previousText.split("\n");
|
|
91
|
+
const curr = currentText.split("\n");
|
|
92
|
+
for (const line of lines) {
|
|
93
|
+
const idx = line - 1;
|
|
94
|
+
if (idx < 0 || idx >= prev.length || idx >= curr.length) return false;
|
|
95
|
+
if (prev[idx] !== curr[idx]) return false;
|
|
96
|
+
}
|
|
97
|
+
return true;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function replaySessionChainOnCurrent(
|
|
101
|
+
previousText: string,
|
|
102
|
+
currentText: string,
|
|
103
|
+
edits: readonly Edit[],
|
|
104
|
+
): RecoveryResult | null {
|
|
105
|
+
// Two guards narrow the corruption window. Neither alone is sufficient,
|
|
106
|
+
// and even together they don't fully prove correctness — replay is the
|
|
107
|
+
// less-certain recovery mode and emits RECOVERY_SESSION_REPLAY_WARNING
|
|
108
|
+
// so the caller can verify the diff.
|
|
109
|
+
// - Equal line counts: every line number in `edits` still resolves to
|
|
110
|
+
// SOME logical row (no net shift across the prior chain). A
|
|
111
|
+
// coincidental insert+delete pair can still leave indices pointing
|
|
112
|
+
// at different logical rows than the model anchored against.
|
|
113
|
+
// - Anchor-content alignment: the row at each anchor's line index has
|
|
114
|
+
// identical content in previous and current. Catches the common
|
|
115
|
+
// case of a prior edit rewriting the targeted line; can still be
|
|
116
|
+
// coincidentally satisfied by a duplicated row at the shifted
|
|
117
|
+
// index.
|
|
118
|
+
if (previousText.split("\n").length !== currentText.split("\n").length) return null;
|
|
119
|
+
if (!verifyAnchorContent(previousText, currentText, edits)) return null;
|
|
120
|
+
let applied: ApplyResult;
|
|
121
|
+
try {
|
|
122
|
+
applied = applyEdits(currentText, [...edits]);
|
|
123
|
+
} catch {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
if (applied.text === currentText) return null;
|
|
127
|
+
return {
|
|
128
|
+
text: applied.text,
|
|
129
|
+
firstChangedLine: applied.firstChangedLine,
|
|
130
|
+
warnings: [RECOVERY_SESSION_REPLAY_WARNING, ...(applied.warnings ?? [])],
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/** First 1-indexed line at which `a` and `b` diverge, or `undefined` if equal. */
|
|
135
|
+
function findFirstChangedLine(a: string, b: string): number | undefined {
|
|
136
|
+
if (a === b) return undefined;
|
|
137
|
+
const aLines = a.split("\n");
|
|
138
|
+
const bLines = b.split("\n");
|
|
139
|
+
const max = Math.max(aLines.length, bLines.length);
|
|
140
|
+
for (let i = 0; i < max; i++) {
|
|
141
|
+
if (aLines[i] !== bLines[i]) return i + 1;
|
|
142
|
+
}
|
|
143
|
+
return undefined;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function isHeadSnapshot(head: Snapshot | null, snapshot: Snapshot): boolean {
|
|
147
|
+
return head === snapshot;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Stateless recovery driver over a {@link SnapshotStore}. Construct once and
|
|
152
|
+
* call {@link Recovery.tryRecover} per stale-tag incident. The default
|
|
153
|
+
* implementation tries two strategies in order:
|
|
154
|
+
*
|
|
155
|
+
* 1. Apply the edits on the full-file version the tag names, then 3-way-merge
|
|
156
|
+
* the resulting patch onto the live content (handles external writes).
|
|
157
|
+
* 2. (Session chain) If that version wasn't the head, replay the edits onto
|
|
158
|
+
* the live content directly when line counts match AND every edit's anchor
|
|
159
|
+
* line content is unchanged between version and current — a prior in-session
|
|
160
|
+
* edit advanced the tag and the model's anchors still name the same logical
|
|
161
|
+
* rows. Emits a dedicated {@link RECOVERY_SESSION_REPLAY_WARNING} because
|
|
162
|
+
* even with both guards a coincidental insert+delete pair on duplicate rows
|
|
163
|
+
* can still land the edit on the wrong row; see {@link replaySessionChainOnCurrent}.
|
|
164
|
+
*/
|
|
165
|
+
export class Recovery {
|
|
166
|
+
constructor(readonly store: SnapshotStore) {}
|
|
167
|
+
/**
|
|
168
|
+
* Attempt recovery. Returns `null` when no path forward is found — the
|
|
169
|
+
* caller should then surface a {@link MismatchError}.
|
|
170
|
+
*/
|
|
171
|
+
tryRecover(args: RecoveryArgs): RecoveryResult | null {
|
|
172
|
+
const { path, currentText, fileHash, edits } = args;
|
|
173
|
+
const snapshot = this.store.byHash(path, fileHash);
|
|
174
|
+
if (!snapshot) return null;
|
|
175
|
+
const isHead = isHeadSnapshot(this.store.head(path), snapshot);
|
|
176
|
+
const recoveryWarning = isHead ? RECOVERY_EXTERNAL_WARNING : RECOVERY_SESSION_CHAIN_WARNING;
|
|
177
|
+
const merged = applyEditsToSnapshot(snapshot.text, currentText, edits, recoveryWarning);
|
|
178
|
+
if (merged !== null) return merged;
|
|
179
|
+
// Session-chain fallback: the 3-way merge on the version refused.
|
|
180
|
+
// Replay onto current is gated by line-count equality AND
|
|
181
|
+
// anchor-content alignment — see `replaySessionChainOnCurrent`
|
|
182
|
+
// for why both guards together still don't fully prove correctness.
|
|
183
|
+
if (!isHead) return replaySessionChainOnCurrent(snapshot.text, currentText, edits);
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
}
|
package/src/snapshots.ts
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-session snapshot store used by {@link Recovery} and {@link Patcher} to
|
|
3
|
+
* bind hashline section tags to the exact file content that minted them.
|
|
4
|
+
*
|
|
5
|
+
* A section tag is a content-derived hash of the *whole file* (see
|
|
6
|
+
* {@link computeFileHash}). Any read of byte-identical content mints the same
|
|
7
|
+
* tag, so reads of one file state fuse onto one anchor and a follow-up edit
|
|
8
|
+
* anchored at any line validates whenever the live file still hashes to it.
|
|
9
|
+
*
|
|
10
|
+
* Producers (typically `read` / `search` / `write` tools) call
|
|
11
|
+
* {@link SnapshotStore.record} with the full normalized text they observed.
|
|
12
|
+
* The store hashes it, dedups against the per-path history, and returns the
|
|
13
|
+
* tag. Consumers (the patcher) resolve a stale tag back to the recorded full
|
|
14
|
+
* text via {@link SnapshotStore.byHash} and 3-way-merge the would-be edit onto
|
|
15
|
+
* the live content.
|
|
16
|
+
*
|
|
17
|
+
* The abstract base class lets callers plug in whatever storage they like
|
|
18
|
+
* (LRU, persistent SQLite, etc.). {@link InMemorySnapshotStore} ships as a
|
|
19
|
+
* sensible default backed by `lru-cache`: a bounded set of paths, each with a
|
|
20
|
+
* short history of full-file versions so in-session edit chains can still
|
|
21
|
+
* recover against the version a stale tag names.
|
|
22
|
+
*/
|
|
23
|
+
import { LRUCache } from "lru-cache/raw";
|
|
24
|
+
import { computeFileHash } from "./format";
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* One full-file version observed at a point in time. The tag the model sees is
|
|
28
|
+
* {@link Snapshot.hash}; recovery replays edits against {@link Snapshot.text}.
|
|
29
|
+
*/
|
|
30
|
+
export interface Snapshot {
|
|
31
|
+
/** Canonical path this version belongs to. */
|
|
32
|
+
readonly path: string;
|
|
33
|
+
/** Full normalized (LF, no BOM) file text as observed. */
|
|
34
|
+
readonly text: string;
|
|
35
|
+
/** Content-derived tag for {@link Snapshot.text} (see {@link computeFileHash}). */
|
|
36
|
+
readonly hash: string;
|
|
37
|
+
/** Timestamp (ms since epoch) the version was recorded. */
|
|
38
|
+
recordedAt: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Storage seam for full-file version snapshots. The patcher calls {@link head}
|
|
43
|
+
* for the latest version of a path and {@link byHash} when it needs the
|
|
44
|
+
* specific historical version a section's stale tag names.
|
|
45
|
+
*/
|
|
46
|
+
export abstract class SnapshotStore {
|
|
47
|
+
/** Most-recently recorded version for `path`, or `null` if none. */
|
|
48
|
+
abstract head(path: string): Snapshot | null;
|
|
49
|
+
|
|
50
|
+
/** Recorded version for `path` whose tag equals `hash`, or `null`. */
|
|
51
|
+
abstract byHash(path: string, hash: string): Snapshot | null;
|
|
52
|
+
|
|
53
|
+
/** Record the full normalized text of `path` and return its content tag. */
|
|
54
|
+
abstract record(path: string, fullText: string): string;
|
|
55
|
+
|
|
56
|
+
/** Drop the version history for a single path. */
|
|
57
|
+
abstract invalidate(path: string): void;
|
|
58
|
+
|
|
59
|
+
/** Drop every version history. */
|
|
60
|
+
abstract clear(): void;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const DEFAULT_MAX_PATHS = 30;
|
|
64
|
+
const DEFAULT_MAX_VERSIONS_PER_PATH = 4;
|
|
65
|
+
|
|
66
|
+
export interface InMemorySnapshotStoreOptions {
|
|
67
|
+
/** Maximum number of distinct paths tracked at once (default 30). LRU eviction. */
|
|
68
|
+
maxPaths?: number;
|
|
69
|
+
/** Maximum full-file versions retained per path (default 4). Oldest dropped first. */
|
|
70
|
+
maxVersionsPerPath?: number;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* In-memory {@link SnapshotStore} backed by `lru-cache`. Per-path history is a
|
|
75
|
+
* short ring of full-file versions (oldest dropped first); per-session path
|
|
76
|
+
* tracking is LRU-bounded so cold paths age out automatically.
|
|
77
|
+
*
|
|
78
|
+
* Recording byte-identical content again refreshes recency and reuses the
|
|
79
|
+
* existing tag (read fusion); recording new content unshifts a fresh version
|
|
80
|
+
* onto the front of the path history.
|
|
81
|
+
*/
|
|
82
|
+
export class InMemorySnapshotStore extends SnapshotStore {
|
|
83
|
+
readonly #versions: LRUCache<string, Snapshot[]>;
|
|
84
|
+
readonly #maxVersionsPerPath: number;
|
|
85
|
+
|
|
86
|
+
constructor(options: InMemorySnapshotStoreOptions = {}) {
|
|
87
|
+
super();
|
|
88
|
+
this.#versions = new LRUCache<string, Snapshot[]>({ max: options.maxPaths ?? DEFAULT_MAX_PATHS });
|
|
89
|
+
this.#maxVersionsPerPath = options.maxVersionsPerPath ?? DEFAULT_MAX_VERSIONS_PER_PATH;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
head(path: string): Snapshot | null {
|
|
93
|
+
return this.#versions.get(path)?.[0] ?? null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
byHash(path: string, hash: string): Snapshot | null {
|
|
97
|
+
const history = this.#versions.get(path);
|
|
98
|
+
return history?.find(version => version.hash === hash) ?? null;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
record(path: string, fullText: string): string {
|
|
102
|
+
const hash = computeFileHash(fullText);
|
|
103
|
+
// `get` refreshes LRU recency for `path`.
|
|
104
|
+
const history = this.#versions.get(path) ?? [];
|
|
105
|
+
const existing = history.find(version => version.hash === hash);
|
|
106
|
+
if (existing) {
|
|
107
|
+
// Same content state observed again: refresh recency and promote to
|
|
108
|
+
// head (it is the current file content), then reuse the tag.
|
|
109
|
+
existing.recordedAt = Date.now();
|
|
110
|
+
if (history[0] !== existing) {
|
|
111
|
+
this.#versions.set(path, [existing, ...history.filter(version => version !== existing)]);
|
|
112
|
+
}
|
|
113
|
+
return hash;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const snapshot: Snapshot = { path, text: fullText, hash, recordedAt: Date.now() };
|
|
117
|
+
this.#versions.set(path, [snapshot, ...history].slice(0, this.#maxVersionsPerPath));
|
|
118
|
+
return hash;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
invalidate(path: string): void {
|
|
122
|
+
this.#versions.delete(path);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
clear(): void {
|
|
126
|
+
this.#versions.clear();
|
|
127
|
+
}
|
|
128
|
+
}
|
package/src/stream.ts
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lazily format a stream of UTF-8 bytes into hashline-numbered lines, yielded
|
|
3
|
+
* as bounded text chunks. Used to send `read`-style file content to consumers
|
|
4
|
+
* without materializing the full file at once.
|
|
5
|
+
*
|
|
6
|
+
* Each yielded chunk is at most {@link StreamOptions.maxChunkLines} lines and
|
|
7
|
+
* at most {@link StreamOptions.maxChunkBytes} UTF-8 bytes (whichever fires
|
|
8
|
+
* first).
|
|
9
|
+
*/
|
|
10
|
+
import { formatNumberedLine } from "./format";
|
|
11
|
+
import type { StreamOptions } from "./types";
|
|
12
|
+
|
|
13
|
+
interface ResolvedStreamOptions {
|
|
14
|
+
startLine: number;
|
|
15
|
+
maxChunkLines: number;
|
|
16
|
+
maxChunkBytes: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function resolveStreamOptions(options: StreamOptions): ResolvedStreamOptions {
|
|
20
|
+
return {
|
|
21
|
+
startLine: options.startLine ?? 1,
|
|
22
|
+
maxChunkLines: options.maxChunkLines ?? 200,
|
|
23
|
+
maxChunkBytes: options.maxChunkBytes ?? 64 * 1024,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface ChunkEmitter {
|
|
28
|
+
pushLine: (line: string) => string[];
|
|
29
|
+
flush: () => string | undefined;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function createChunkEmitter(options: ResolvedStreamOptions): ChunkEmitter {
|
|
33
|
+
let lineNumber = options.startLine;
|
|
34
|
+
let outLines: string[] = [];
|
|
35
|
+
let outBytes = 0;
|
|
36
|
+
|
|
37
|
+
const flush = (): string | undefined => {
|
|
38
|
+
if (outLines.length === 0) return undefined;
|
|
39
|
+
const chunk = outLines.join("\n");
|
|
40
|
+
outLines = [];
|
|
41
|
+
outBytes = 0;
|
|
42
|
+
return chunk;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const pushLine = (line: string): string[] => {
|
|
46
|
+
const formatted = formatNumberedLine(lineNumber, line);
|
|
47
|
+
lineNumber++;
|
|
48
|
+
|
|
49
|
+
const chunks: string[] = [];
|
|
50
|
+
const sepBytes = outLines.length === 0 ? 0 : 1;
|
|
51
|
+
const lineBytes = Buffer.byteLength(formatted, "utf-8");
|
|
52
|
+
const wouldOverflow =
|
|
53
|
+
outLines.length >= options.maxChunkLines || outBytes + sepBytes + lineBytes > options.maxChunkBytes;
|
|
54
|
+
|
|
55
|
+
if (outLines.length > 0 && wouldOverflow) {
|
|
56
|
+
const flushed = flush();
|
|
57
|
+
if (flushed) chunks.push(flushed);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
outLines.push(formatted);
|
|
61
|
+
outBytes += (outLines.length === 1 ? 0 : 1) + lineBytes;
|
|
62
|
+
|
|
63
|
+
if (outLines.length >= options.maxChunkLines || outBytes >= options.maxChunkBytes) {
|
|
64
|
+
const flushed = flush();
|
|
65
|
+
if (flushed) chunks.push(flushed);
|
|
66
|
+
}
|
|
67
|
+
return chunks;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
return { pushLine, flush };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function isReadableStream(value: unknown): value is ReadableStream<Uint8Array> {
|
|
74
|
+
return (
|
|
75
|
+
typeof value === "object" &&
|
|
76
|
+
value !== null &&
|
|
77
|
+
"getReader" in value &&
|
|
78
|
+
typeof (value as { getReader?: unknown }).getReader === "function"
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function* bytesFromReadableStream(stream: ReadableStream<Uint8Array>): AsyncGenerator<Uint8Array> {
|
|
83
|
+
const reader = stream.getReader();
|
|
84
|
+
try {
|
|
85
|
+
while (true) {
|
|
86
|
+
const { done, value } = await reader.read();
|
|
87
|
+
if (done) return;
|
|
88
|
+
if (value) yield value;
|
|
89
|
+
}
|
|
90
|
+
} finally {
|
|
91
|
+
reader.releaseLock();
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export async function* streamHashLines(
|
|
96
|
+
source: ReadableStream<Uint8Array> | AsyncIterable<Uint8Array>,
|
|
97
|
+
options: StreamOptions = {},
|
|
98
|
+
): AsyncGenerator<string> {
|
|
99
|
+
const resolved = resolveStreamOptions(options);
|
|
100
|
+
const decoder = new TextDecoder("utf-8");
|
|
101
|
+
const chunks = isReadableStream(source) ? bytesFromReadableStream(source) : source;
|
|
102
|
+
const emitter = createChunkEmitter(resolved);
|
|
103
|
+
|
|
104
|
+
let pending = "";
|
|
105
|
+
let sawAnyLine = false;
|
|
106
|
+
|
|
107
|
+
for await (const chunk of chunks) {
|
|
108
|
+
pending += decoder.decode(chunk, { stream: true });
|
|
109
|
+
let nl = pending.indexOf("\n");
|
|
110
|
+
while (nl !== -1) {
|
|
111
|
+
const raw = pending.slice(0, nl);
|
|
112
|
+
const line = raw.endsWith("\r") ? raw.slice(0, -1) : raw;
|
|
113
|
+
sawAnyLine = true;
|
|
114
|
+
for (const out of emitter.pushLine(line)) yield out;
|
|
115
|
+
pending = pending.slice(nl + 1);
|
|
116
|
+
nl = pending.indexOf("\n");
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
pending += decoder.decode();
|
|
121
|
+
if (pending.length > 0) {
|
|
122
|
+
sawAnyLine = true;
|
|
123
|
+
const tail = pending.endsWith("\r") ? pending.slice(0, -1) : pending;
|
|
124
|
+
for (const out of emitter.pushLine(tail)) yield out;
|
|
125
|
+
}
|
|
126
|
+
if (!sawAnyLine) {
|
|
127
|
+
for (const out of emitter.pushLine("")) yield out;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const last = emitter.flush();
|
|
131
|
+
if (last) yield last;
|
|
132
|
+
}
|