@blackwell-systems/gcf 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -11
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/stream.d.ts +67 -0
- package/dist/stream.d.ts.map +1 -0
- package/dist/stream.js +123 -0
- package/dist/stream.js.map +1 -0
- package/package.json +1 -1
- package/src/index.ts +1 -0
- package/src/stream.ts +154 -0
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
TypeScript implementation of [GCF (Graph Compact Format)](https://gcformat.com/) — the most token-efficient wire format for LLMs. A drop-in alternative to JSON and TOON for any structured data.
|
|
10
10
|
|
|
11
|
-
**79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON
|
|
11
|
+
**79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON scores 76.9% and TOON scores 92.3%.**
|
|
12
12
|
|
|
13
13
|
Docs: [gcformat.com](https://gcformat.com/) · [Playground](https://gcformat.com/playground.html) · [GCF vs TOON](https://gcformat.com/guide/vs-toon.html)
|
|
14
14
|
|
|
@@ -97,6 +97,40 @@ const out2 = encodeWithSession(payload2, sess); // reused symbols as "@N # prev
|
|
|
97
97
|
|
|
98
98
|
By the 5th call in a session: 92.7% token savings vs JSON.
|
|
99
99
|
|
|
100
|
+
## Streaming Encode
|
|
101
|
+
|
|
102
|
+
Write GCF output incrementally as symbols and edges arrive. Zero buffering, O(1) memory per row. Ideal for MCP servers that walk large graphs or paginate results:
|
|
103
|
+
|
|
104
|
+
```typescript
|
|
105
|
+
import { StreamEncoder } from '@blackwell-systems/gcf';
|
|
106
|
+
|
|
107
|
+
const enc = new StreamEncoder(writer, 'context_for_task', { tokenBudget: 5000 });
|
|
108
|
+
|
|
109
|
+
// Symbols emit immediately as they're discovered.
|
|
110
|
+
enc.writeSymbol({ qualifiedName: 'pkg.Auth', kind: 'function', score: 0.95, provenance: 'lsp', distance: 0 });
|
|
111
|
+
enc.writeSymbol({ qualifiedName: 'pkg.Server', kind: 'function', score: 0.60, provenance: 'lsp', distance: 1 });
|
|
112
|
+
|
|
113
|
+
// Edges emit immediately too.
|
|
114
|
+
enc.writeEdge({ source: 'pkg.Server', target: 'pkg.Auth', edgeType: 'calls' });
|
|
115
|
+
|
|
116
|
+
// Close emits the ## _summary trailer with final counts.
|
|
117
|
+
enc.close();
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Output:
|
|
121
|
+
```
|
|
122
|
+
GCF tool=context_for_task budget=5000
|
|
123
|
+
## targets
|
|
124
|
+
@0 fn pkg.Auth 0.95 lsp
|
|
125
|
+
## related
|
|
126
|
+
@1 fn pkg.Server 0.60 lsp
|
|
127
|
+
## edges [?]
|
|
128
|
+
@0<@1 calls
|
|
129
|
+
## _summary symbols=2 edges=1 sections=targets:1,related:1,edges:1
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
The `writer` is any object with a `write(s: string)` method (Node.js streams, web WritableStreams, or a simple callback). Standard `decode()` handles streaming output with no changes.
|
|
133
|
+
|
|
100
134
|
## Delta Encoding
|
|
101
135
|
|
|
102
136
|
When the consumer already has a prior context pack, send only what changed:
|
|
@@ -153,6 +187,7 @@ Works on objects, arrays, and primitives. Arrays of uniform objects get tabular
|
|
|
153
187
|
| `encodeGeneric(data: unknown): string` | Encode any value to GCF tabular format |
|
|
154
188
|
| `decode(input: string): Payload` | Parse GCF text back to a Payload |
|
|
155
189
|
| `encodeWithSession(p: Payload, s: Session): string` | Encode with session deduplication |
|
|
190
|
+
| `new StreamEncoder(w, tool, opts)` | Create a streaming encoder (zero-buffering) |
|
|
156
191
|
| `encodeDelta(d: DeltaPayload): string` | Encode a delta (added/removed only) |
|
|
157
192
|
| `new Session()` | Create a new session tracker |
|
|
158
193
|
|
|
@@ -169,15 +204,17 @@ Works on objects, arrays, and primitives. Arrays of uniform objects get tabular
|
|
|
169
204
|
|
|
170
205
|
## Comprehension Eval
|
|
171
206
|
|
|
172
|
-
|
|
207
|
+
Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. 13 structured extraction questions sent to an LLM with zero format instructions:
|
|
173
208
|
|
|
174
209
|
| Format | Accuracy | Tokens | vs JSON |
|
|
175
210
|
|--------|----------|--------|---------|
|
|
176
|
-
| **GCF** | **100%** (
|
|
177
|
-
| TOON |
|
|
178
|
-
| JSON |
|
|
211
|
+
| **GCF** | **100%** (13/13) | **11,090** | **79% fewer** |
|
|
212
|
+
| TOON | 92.3% (12/13) | 16,378 | 69% fewer |
|
|
213
|
+
| JSON | 76.9% (10/13) | 53,341 | baseline |
|
|
214
|
+
|
|
215
|
+
GCF is the only format with perfect accuracy at scale, at 32% fewer tokens than TOON.
|
|
179
216
|
|
|
180
|
-
|
|
217
|
+
Reproduce: `git clone https://github.com/blackwell-systems/gcf-go && cd gcf-go/eval && GOWORK=off go test -run TestComprehension -v -timeout 0`
|
|
181
218
|
|
|
182
219
|
## Token Efficiency (TOON's Own Benchmark)
|
|
183
220
|
|
|
@@ -185,13 +222,13 @@ Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tre
|
|
|
185
222
|
|
|
186
223
|
| Track | GCF | TOON | Result |
|
|
187
224
|
|-------|-----|------|--------|
|
|
188
|
-
| Mixed-structure (nested, semi-uniform) |
|
|
189
|
-
| Flat-only (tabular) | 66,
|
|
190
|
-
| Semi-uniform event logs |
|
|
225
|
+
| Mixed-structure (nested, semi-uniform) | 170,367 | 227,896 | **GCF 34% smaller** |
|
|
226
|
+
| Flat-only (tabular) | 66,029 | 67,837 | **GCF 3% smaller** |
|
|
227
|
+
| Semi-uniform event logs | 108,158 | 154,032 | **GCF 42% smaller** |
|
|
191
228
|
|
|
192
|
-
GCF wins
|
|
229
|
+
GCF wins all 6 datasets. On semi-uniform data (the most common real-world pattern), GCF uses 42% fewer tokens than TOON.
|
|
193
230
|
|
|
194
|
-
|
|
231
|
+
Reproduce: `git clone https://github.com/blackwell-systems/toon && cd toon && git checkout gcf-comparison && cd benchmarks && pnpm install && pnpm benchmark:tokens`
|
|
195
232
|
|
|
196
233
|
## Links
|
|
197
234
|
|
package/dist/index.d.ts
CHANGED
|
@@ -5,4 +5,5 @@ export { decode } from './decode.js';
|
|
|
5
5
|
export { Session, encodeWithSession } from './session.js';
|
|
6
6
|
export { encodeDelta } from './delta.js';
|
|
7
7
|
export { encodeGeneric } from './generic.js';
|
|
8
|
+
export { StreamEncoder, type StreamWriter, type StreamOptions } from './stream.js';
|
|
8
9
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAClF,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAClF,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,KAAK,YAAY,EAAE,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC"}
|
package/dist/index.js
CHANGED
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAyC,MAAM,aAAa,CAAC"}
|
package/dist/stream.d.ts
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import type { Symbol, Edge } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Options for the streaming encoder.
|
|
4
|
+
*/
|
|
5
|
+
export interface StreamOptions {
|
|
6
|
+
tokenBudget?: number;
|
|
7
|
+
tokensUsed?: number;
|
|
8
|
+
packRoot?: string;
|
|
9
|
+
session?: boolean;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* A writable sink for streaming output. Accepts string chunks.
|
|
13
|
+
* Compatible with Node.js streams, web WritableStreams, or simple callbacks.
|
|
14
|
+
*/
|
|
15
|
+
export interface StreamWriter {
|
|
16
|
+
write(chunk: string): void;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* StreamEncoder writes GCF output incrementally as symbols and edges arrive.
|
|
20
|
+
* Zero buffering: each symbol/edge is written immediately. A trailer summary
|
|
21
|
+
* is emitted on close() with the final counts.
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```ts
|
|
25
|
+
* const chunks: string[] = [];
|
|
26
|
+
* const enc = new StreamEncoder({ write: (s) => chunks.push(s) }, 'context_for_task', { tokenBudget: 5000 });
|
|
27
|
+
* enc.writeSymbol({ qualifiedName: 'pkg.Auth', kind: 'function', score: 0.95, provenance: 'lsp', distance: 0 });
|
|
28
|
+
* enc.writeEdge({ source: 'pkg.Server', target: 'pkg.Auth', edgeType: 'calls' });
|
|
29
|
+
* enc.close();
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
export declare class StreamEncoder {
|
|
33
|
+
private w;
|
|
34
|
+
private symIndex;
|
|
35
|
+
private nextID;
|
|
36
|
+
private currentGroup;
|
|
37
|
+
private groupCounts;
|
|
38
|
+
private edgeCount;
|
|
39
|
+
private edgesStarted;
|
|
40
|
+
constructor(w: StreamWriter, tool: string, opts?: StreamOptions);
|
|
41
|
+
private writeHeader;
|
|
42
|
+
/**
|
|
43
|
+
* Emit a symbol line immediately. Group headers are emitted automatically
|
|
44
|
+
* when the distance changes.
|
|
45
|
+
*/
|
|
46
|
+
writeSymbol(s: Symbol): void;
|
|
47
|
+
/**
|
|
48
|
+
* Emit an edge line immediately. The edges section header is emitted
|
|
49
|
+
* automatically on the first edge (with [?] deferred count).
|
|
50
|
+
* Source and target must reference previously-written symbols.
|
|
51
|
+
*/
|
|
52
|
+
writeEdge(e: Edge): void;
|
|
53
|
+
/**
|
|
54
|
+
* Emit a bare reference for a previously-transmitted symbol (session mode).
|
|
55
|
+
*/
|
|
56
|
+
writeBareRef(qname: string, distance: number): void;
|
|
57
|
+
/**
|
|
58
|
+
* Emit the ## _summary trailer with final counts. Must be called after all
|
|
59
|
+
* symbols and edges have been written.
|
|
60
|
+
*/
|
|
61
|
+
close(): void;
|
|
62
|
+
/** Number of symbols written so far. */
|
|
63
|
+
get symbolCount(): number;
|
|
64
|
+
/** Number of edges written so far. */
|
|
65
|
+
get edgeCount_(): number;
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=stream.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stream.d.ts","sourceRoot":"","sources":["../src/stream.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAE/C;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;;GAGG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;;;;GAaG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,CAAC,CAAe;IACxB,OAAO,CAAC,QAAQ,CAAkC;IAClD,OAAO,CAAC,MAAM,CAAK;IACnB,OAAO,CAAC,YAAY,CAAM;IAC1B,OAAO,CAAC,WAAW,CAAkC;IACrD,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,YAAY,CAAS;gBAEjB,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB;IAKnE,OAAO,CAAC,WAAW;IASnB;;;OAGG;IACH,WAAW,CAAC,CAAC,EAAE,MAAM,GAAG,IAAI;IAoB5B;;;;OAIG;IACH,SAAS,CAAC,CAAC,EAAE,IAAI,GAAG,IAAI;IAkBxB;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAkBnD;;;OAGG;IACH,KAAK,IAAI,IAAI;IAkBb,wCAAwC;IACxC,IAAI,WAAW,IAAI,MAAM,CAAwB;IAEjD,sCAAsC;IACtC,IAAI,UAAU,IAAI,MAAM,CAA2B;CACpD"}
|
package/dist/stream.js
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { KIND_ABBREV } from './constants.js';
|
|
2
|
+
/**
|
|
3
|
+
* StreamEncoder writes GCF output incrementally as symbols and edges arrive.
|
|
4
|
+
* Zero buffering: each symbol/edge is written immediately. A trailer summary
|
|
5
|
+
* is emitted on close() with the final counts.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```ts
|
|
9
|
+
* const chunks: string[] = [];
|
|
10
|
+
* const enc = new StreamEncoder({ write: (s) => chunks.push(s) }, 'context_for_task', { tokenBudget: 5000 });
|
|
11
|
+
* enc.writeSymbol({ qualifiedName: 'pkg.Auth', kind: 'function', score: 0.95, provenance: 'lsp', distance: 0 });
|
|
12
|
+
* enc.writeEdge({ source: 'pkg.Server', target: 'pkg.Auth', edgeType: 'calls' });
|
|
13
|
+
* enc.close();
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
export class StreamEncoder {
|
|
17
|
+
w;
|
|
18
|
+
symIndex = new Map();
|
|
19
|
+
nextID = 0;
|
|
20
|
+
currentGroup = '';
|
|
21
|
+
groupCounts = new Map();
|
|
22
|
+
edgeCount = 0;
|
|
23
|
+
edgesStarted = false;
|
|
24
|
+
constructor(w, tool, opts = {}) {
|
|
25
|
+
this.w = w;
|
|
26
|
+
this.writeHeader(tool, opts);
|
|
27
|
+
}
|
|
28
|
+
writeHeader(tool, opts) {
|
|
29
|
+
const parts = [`GCF tool=${tool}`];
|
|
30
|
+
if (opts.tokenBudget)
|
|
31
|
+
parts.push(`budget=${opts.tokenBudget}`);
|
|
32
|
+
if (opts.tokensUsed)
|
|
33
|
+
parts.push(`tokens=${opts.tokensUsed}`);
|
|
34
|
+
if (opts.packRoot)
|
|
35
|
+
parts.push(`pack_root=${opts.packRoot}`);
|
|
36
|
+
if (opts.session)
|
|
37
|
+
parts.push('session=true');
|
|
38
|
+
this.w.write(parts.join(' ') + '\n');
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Emit a symbol line immediately. Group headers are emitted automatically
|
|
42
|
+
* when the distance changes.
|
|
43
|
+
*/
|
|
44
|
+
writeSymbol(s) {
|
|
45
|
+
const groupNames = ['targets', 'related', 'extended'];
|
|
46
|
+
const groupName = s.distance < groupNames.length
|
|
47
|
+
? groupNames[s.distance]
|
|
48
|
+
: `distance_${s.distance}`;
|
|
49
|
+
if (groupName !== this.currentGroup) {
|
|
50
|
+
this.w.write(`## ${groupName}\n`);
|
|
51
|
+
this.currentGroup = groupName;
|
|
52
|
+
}
|
|
53
|
+
const id = this.nextID++;
|
|
54
|
+
this.symIndex.set(s.qualifiedName, id);
|
|
55
|
+
const kind = KIND_ABBREV[s.kind] || s.kind;
|
|
56
|
+
this.w.write(`@${id} ${kind} ${s.qualifiedName} ${s.score.toFixed(2)} ${s.provenance}\n`);
|
|
57
|
+
this.groupCounts.set(groupName, (this.groupCounts.get(groupName) || 0) + 1);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Emit an edge line immediately. The edges section header is emitted
|
|
61
|
+
* automatically on the first edge (with [?] deferred count).
|
|
62
|
+
* Source and target must reference previously-written symbols.
|
|
63
|
+
*/
|
|
64
|
+
writeEdge(e) {
|
|
65
|
+
const srcIdx = this.symIndex.get(e.source);
|
|
66
|
+
const tgtIdx = this.symIndex.get(e.target);
|
|
67
|
+
if (srcIdx === undefined || tgtIdx === undefined)
|
|
68
|
+
return;
|
|
69
|
+
if (!this.edgesStarted) {
|
|
70
|
+
this.w.write('## edges [?]\n');
|
|
71
|
+
this.edgesStarted = true;
|
|
72
|
+
}
|
|
73
|
+
let line = `@${tgtIdx}<@${srcIdx} ${e.edgeType}`;
|
|
74
|
+
if (e.status && e.status !== 'unchanged') {
|
|
75
|
+
line += ` ${e.status}`;
|
|
76
|
+
}
|
|
77
|
+
this.w.write(line + '\n');
|
|
78
|
+
this.edgeCount++;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Emit a bare reference for a previously-transmitted symbol (session mode).
|
|
82
|
+
*/
|
|
83
|
+
writeBareRef(qname, distance) {
|
|
84
|
+
const groupNames = ['targets', 'related', 'extended'];
|
|
85
|
+
const groupName = distance < groupNames.length
|
|
86
|
+
? groupNames[distance]
|
|
87
|
+
: `distance_${distance}`;
|
|
88
|
+
if (groupName !== this.currentGroup) {
|
|
89
|
+
this.w.write(`## ${groupName}\n`);
|
|
90
|
+
this.currentGroup = groupName;
|
|
91
|
+
}
|
|
92
|
+
const id = this.nextID++;
|
|
93
|
+
this.symIndex.set(qname, id);
|
|
94
|
+
this.w.write(`@${id} # previously transmitted\n`);
|
|
95
|
+
this.groupCounts.set(groupName, (this.groupCounts.get(groupName) || 0) + 1);
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Emit the ## _summary trailer with final counts. Must be called after all
|
|
99
|
+
* symbols and edges have been written.
|
|
100
|
+
*/
|
|
101
|
+
close() {
|
|
102
|
+
const sections = [];
|
|
103
|
+
const groupOrder = ['targets', 'related', 'extended'];
|
|
104
|
+
for (const g of groupOrder) {
|
|
105
|
+
const c = this.groupCounts.get(g);
|
|
106
|
+
if (c && c > 0)
|
|
107
|
+
sections.push(`${g}:${c}`);
|
|
108
|
+
}
|
|
109
|
+
for (const [g, c] of this.groupCounts) {
|
|
110
|
+
if (!groupOrder.includes(g) && c > 0)
|
|
111
|
+
sections.push(`${g}:${c}`);
|
|
112
|
+
}
|
|
113
|
+
if (this.edgeCount > 0) {
|
|
114
|
+
sections.push(`edges:${this.edgeCount}`);
|
|
115
|
+
}
|
|
116
|
+
this.w.write(`## _summary symbols=${this.nextID} edges=${this.edgeCount} sections=${sections.join(',')}\n`);
|
|
117
|
+
}
|
|
118
|
+
/** Number of symbols written so far. */
|
|
119
|
+
get symbolCount() { return this.nextID; }
|
|
120
|
+
/** Number of edges written so far. */
|
|
121
|
+
get edgeCount_() { return this.edgeCount; }
|
|
122
|
+
}
|
|
123
|
+
//# sourceMappingURL=stream.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stream.js","sourceRoot":"","sources":["../src/stream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAqB7C;;;;;;;;;;;;;GAaG;AACH,MAAM,OAAO,aAAa;IAChB,CAAC,CAAe;IAChB,QAAQ,GAAwB,IAAI,GAAG,EAAE,CAAC;IAC1C,MAAM,GAAG,CAAC,CAAC;IACX,YAAY,GAAG,EAAE,CAAC;IAClB,WAAW,GAAwB,IAAI,GAAG,EAAE,CAAC;IAC7C,SAAS,GAAG,CAAC,CAAC;IACd,YAAY,GAAG,KAAK,CAAC;IAE7B,YAAY,CAAe,EAAE,IAAY,EAAE,OAAsB,EAAE;QACjE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACX,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAC/B,CAAC;IAEO,WAAW,CAAC,IAAY,EAAE,IAAmB;QACnD,MAAM,KAAK,GAAG,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;QAC/D,IAAI,IAAI,CAAC,UAAU;YAAE,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAC7D,IAAI,IAAI,CAAC,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC5D,IAAI,IAAI,CAAC,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC7C,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACvC,CAAC;IAED;;;OAGG;IACH,WAAW,CAAC,CAAS;QACnB,MAAM,UAAU,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QACtD,MAAM,SAAS,GAAG,CAAC,CAAC,QAAQ,GAAG,UAAU,CAAC,MAAM;YAC9C,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC;YACxB,CAAC,CAAC,YAAY,CAAC,CAAC,QAAQ,EAAE,CAAC;QAE7B,IAAI,SAAS,KAAK,IAAI,CAAC,YAAY,EAAE,CAAC;YACpC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,SAAS,IAAI,CAAC,CAAC;YAClC,IAAI,CAAC,YAAY,GAAG,SAAS,CAAC;QAChC,CAAC;QAED,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;QAEvC,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC;QAC3C,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,aAAa,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;QAE1F,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9E,CAAC;IAED;;;;OAIG;IACH,SAAS,CAAC,CAAO;QACf,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC3C,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC3C,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO;QAEzD,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YAC/B,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAC3B,CAAC;QAED,IAAI,IAAI,GAAG,IAAI,MAAM,KAAK,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;QACjD,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YACzC,IAAI,IAAI,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;QACzB,CAAC;QACD,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAC1B,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,KAAa,EAAE,QAAgB;QAC1C,MAAM,UAAU,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QACtD,MAAM,SAAS,GAAG,QAAQ,GAAG,UAAU,CAAC,MAAM;YAC5C,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC;YACtB,CAAC,CAAC,YAAY,QAAQ,EAAE,CAAC;QAE3B,IAAI,SAAS,KAAK,IAAI,CAAC,YAAY,EAAE,CAAC;YACpC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,SAAS,IAAI,CAAC,CAAC;YAClC,IAAI,CAAC,YAAY,GAAG,SAAS,CAAC;QAChC,CAAC;QAED,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7B,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,8BAA8B,CAAC,CAAC;QAEnD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9E,CAAC;IAED;;;OAGG;IACH,KAAK;QACH,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAEtD,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,MAAM,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAClC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC7C,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnE,CAAC;QACD,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;YACvB,QAAQ,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,uBAAuB,IAAI,CAAC,MAAM,UAAU,IAAI,CAAC,SAAS,aAAa,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9G,CAAC;IAED,wCAAwC;IACxC,IAAI,WAAW,KAAa,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAEjD,sCAAsC;IACtC,IAAI,UAAU,KAAa,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;CACpD"}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -5,3 +5,4 @@ export { decode } from './decode.js';
|
|
|
5
5
|
export { Session, encodeWithSession } from './session.js';
|
|
6
6
|
export { encodeDelta } from './delta.js';
|
|
7
7
|
export { encodeGeneric } from './generic.js';
|
|
8
|
+
export { StreamEncoder, type StreamWriter, type StreamOptions } from './stream.js';
|
package/src/stream.ts
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { KIND_ABBREV } from './constants.js';
|
|
2
|
+
import type { Symbol, Edge } from './types.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Options for the streaming encoder.
|
|
6
|
+
*/
|
|
7
|
+
export interface StreamOptions {
|
|
8
|
+
tokenBudget?: number;
|
|
9
|
+
tokensUsed?: number;
|
|
10
|
+
packRoot?: string;
|
|
11
|
+
session?: boolean;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* A writable sink for streaming output. Accepts string chunks.
|
|
16
|
+
* Compatible with Node.js streams, web WritableStreams, or simple callbacks.
|
|
17
|
+
*/
|
|
18
|
+
export interface StreamWriter {
|
|
19
|
+
write(chunk: string): void;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* StreamEncoder writes GCF output incrementally as symbols and edges arrive.
|
|
24
|
+
* Zero buffering: each symbol/edge is written immediately. A trailer summary
|
|
25
|
+
* is emitted on close() with the final counts.
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```ts
|
|
29
|
+
* const chunks: string[] = [];
|
|
30
|
+
* const enc = new StreamEncoder({ write: (s) => chunks.push(s) }, 'context_for_task', { tokenBudget: 5000 });
|
|
31
|
+
* enc.writeSymbol({ qualifiedName: 'pkg.Auth', kind: 'function', score: 0.95, provenance: 'lsp', distance: 0 });
|
|
32
|
+
* enc.writeEdge({ source: 'pkg.Server', target: 'pkg.Auth', edgeType: 'calls' });
|
|
33
|
+
* enc.close();
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export class StreamEncoder {
|
|
37
|
+
private w: StreamWriter;
|
|
38
|
+
private symIndex: Map<string, number> = new Map();
|
|
39
|
+
private nextID = 0;
|
|
40
|
+
private currentGroup = '';
|
|
41
|
+
private groupCounts: Map<string, number> = new Map();
|
|
42
|
+
private edgeCount = 0;
|
|
43
|
+
private edgesStarted = false;
|
|
44
|
+
|
|
45
|
+
constructor(w: StreamWriter, tool: string, opts: StreamOptions = {}) {
|
|
46
|
+
this.w = w;
|
|
47
|
+
this.writeHeader(tool, opts);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
private writeHeader(tool: string, opts: StreamOptions): void {
|
|
51
|
+
const parts = [`GCF tool=${tool}`];
|
|
52
|
+
if (opts.tokenBudget) parts.push(`budget=${opts.tokenBudget}`);
|
|
53
|
+
if (opts.tokensUsed) parts.push(`tokens=${opts.tokensUsed}`);
|
|
54
|
+
if (opts.packRoot) parts.push(`pack_root=${opts.packRoot}`);
|
|
55
|
+
if (opts.session) parts.push('session=true');
|
|
56
|
+
this.w.write(parts.join(' ') + '\n');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Emit a symbol line immediately. Group headers are emitted automatically
|
|
61
|
+
* when the distance changes.
|
|
62
|
+
*/
|
|
63
|
+
writeSymbol(s: Symbol): void {
|
|
64
|
+
const groupNames = ['targets', 'related', 'extended'];
|
|
65
|
+
const groupName = s.distance < groupNames.length
|
|
66
|
+
? groupNames[s.distance]
|
|
67
|
+
: `distance_${s.distance}`;
|
|
68
|
+
|
|
69
|
+
if (groupName !== this.currentGroup) {
|
|
70
|
+
this.w.write(`## ${groupName}\n`);
|
|
71
|
+
this.currentGroup = groupName;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const id = this.nextID++;
|
|
75
|
+
this.symIndex.set(s.qualifiedName, id);
|
|
76
|
+
|
|
77
|
+
const kind = KIND_ABBREV[s.kind] || s.kind;
|
|
78
|
+
this.w.write(`@${id} ${kind} ${s.qualifiedName} ${s.score.toFixed(2)} ${s.provenance}\n`);
|
|
79
|
+
|
|
80
|
+
this.groupCounts.set(groupName, (this.groupCounts.get(groupName) || 0) + 1);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Emit an edge line immediately. The edges section header is emitted
|
|
85
|
+
* automatically on the first edge (with [?] deferred count).
|
|
86
|
+
* Source and target must reference previously-written symbols.
|
|
87
|
+
*/
|
|
88
|
+
writeEdge(e: Edge): void {
|
|
89
|
+
const srcIdx = this.symIndex.get(e.source);
|
|
90
|
+
const tgtIdx = this.symIndex.get(e.target);
|
|
91
|
+
if (srcIdx === undefined || tgtIdx === undefined) return;
|
|
92
|
+
|
|
93
|
+
if (!this.edgesStarted) {
|
|
94
|
+
this.w.write('## edges [?]\n');
|
|
95
|
+
this.edgesStarted = true;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
let line = `@${tgtIdx}<@${srcIdx} ${e.edgeType}`;
|
|
99
|
+
if (e.status && e.status !== 'unchanged') {
|
|
100
|
+
line += ` ${e.status}`;
|
|
101
|
+
}
|
|
102
|
+
this.w.write(line + '\n');
|
|
103
|
+
this.edgeCount++;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Emit a bare reference for a previously-transmitted symbol (session mode).
|
|
108
|
+
*/
|
|
109
|
+
writeBareRef(qname: string, distance: number): void {
|
|
110
|
+
const groupNames = ['targets', 'related', 'extended'];
|
|
111
|
+
const groupName = distance < groupNames.length
|
|
112
|
+
? groupNames[distance]
|
|
113
|
+
: `distance_${distance}`;
|
|
114
|
+
|
|
115
|
+
if (groupName !== this.currentGroup) {
|
|
116
|
+
this.w.write(`## ${groupName}\n`);
|
|
117
|
+
this.currentGroup = groupName;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const id = this.nextID++;
|
|
121
|
+
this.symIndex.set(qname, id);
|
|
122
|
+
this.w.write(`@${id} # previously transmitted\n`);
|
|
123
|
+
|
|
124
|
+
this.groupCounts.set(groupName, (this.groupCounts.get(groupName) || 0) + 1);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Emit the ## _summary trailer with final counts. Must be called after all
|
|
129
|
+
* symbols and edges have been written.
|
|
130
|
+
*/
|
|
131
|
+
close(): void {
|
|
132
|
+
const sections: string[] = [];
|
|
133
|
+
const groupOrder = ['targets', 'related', 'extended'];
|
|
134
|
+
|
|
135
|
+
for (const g of groupOrder) {
|
|
136
|
+
const c = this.groupCounts.get(g);
|
|
137
|
+
if (c && c > 0) sections.push(`${g}:${c}`);
|
|
138
|
+
}
|
|
139
|
+
for (const [g, c] of this.groupCounts) {
|
|
140
|
+
if (!groupOrder.includes(g) && c > 0) sections.push(`${g}:${c}`);
|
|
141
|
+
}
|
|
142
|
+
if (this.edgeCount > 0) {
|
|
143
|
+
sections.push(`edges:${this.edgeCount}`);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
this.w.write(`## _summary symbols=${this.nextID} edges=${this.edgeCount} sections=${sections.join(',')}\n`);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/** Number of symbols written so far. */
|
|
150
|
+
get symbolCount(): number { return this.nextID; }
|
|
151
|
+
|
|
152
|
+
/** Number of edges written so far. */
|
|
153
|
+
get edgeCount_(): number { return this.edgeCount; }
|
|
154
|
+
}
|