@blackwell-systems/gcf 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  TypeScript implementation of [GCF (Graph Compact Format)](https://gcformat.com/) — the most token-efficient wire format for LLMs. A drop-in alternative to JSON and TOON for any structured data.
10
10
 
11
- **79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON fails at 66.7%.**
11
+ **79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON scores 76.9% and TOON scores 92.3%.**
12
12
 
13
13
  Docs: [gcformat.com](https://gcformat.com/) · [Playground](https://gcformat.com/playground.html) · [GCF vs TOON](https://gcformat.com/guide/vs-toon.html)
14
14
 
@@ -97,6 +97,40 @@ const out2 = encodeWithSession(payload2, sess); // reused symbols as "@N # prev
97
97
 
98
98
  By the 5th call in a session: 92.7% token savings vs JSON.
99
99
 
100
+ ## Streaming Encode
101
+
102
+ Write GCF output incrementally as symbols and edges arrive. Zero buffering, O(1) memory per row. Ideal for MCP servers that walk large graphs or paginate results:
103
+
104
+ ```typescript
105
+ import { StreamEncoder } from '@blackwell-systems/gcf';
106
+
107
+ const enc = new StreamEncoder(writer, 'context_for_task', { tokenBudget: 5000 });
108
+
109
+ // Symbols emit immediately as they're discovered.
110
+ enc.writeSymbol({ qualifiedName: 'pkg.Auth', kind: 'function', score: 0.95, provenance: 'lsp', distance: 0 });
111
+ enc.writeSymbol({ qualifiedName: 'pkg.Server', kind: 'function', score: 0.60, provenance: 'lsp', distance: 1 });
112
+
113
+ // Edges emit immediately too.
114
+ enc.writeEdge({ source: 'pkg.Server', target: 'pkg.Auth', edgeType: 'calls' });
115
+
116
+ // Close emits the ## _summary trailer with final counts.
117
+ enc.close();
118
+ ```
119
+
120
+ Output:
121
+ ```
122
+ GCF tool=context_for_task budget=5000
123
+ ## targets
124
+ @0 fn pkg.Auth 0.95 lsp
125
+ ## related
126
+ @1 fn pkg.Server 0.60 lsp
127
+ ## edges [?]
128
+ @0<@1 calls
129
+ ## _summary symbols=2 edges=1 sections=targets:1,related:1,edges:1
130
+ ```
131
+
132
+ The `writer` is any object with a `write(s: string)` method (Node.js streams, web WritableStreams, or a simple callback). Standard `decode()` handles streaming output with no changes.
133
+
100
134
  ## Delta Encoding
101
135
 
102
136
  When the consumer already has a prior context pack, send only what changed:
@@ -153,6 +187,7 @@ Works on objects, arrays, and primitives. Arrays of uniform objects get tabular
153
187
  | `encodeGeneric(data: unknown): string` | Encode any value to GCF tabular format |
154
188
  | `decode(input: string): Payload` | Parse GCF text back to a Payload |
155
189
  | `encodeWithSession(p: Payload, s: Session): string` | Encode with session deduplication |
190
+ | `new StreamEncoder(w, tool, opts)` | Create a streaming encoder (zero-buffering) |
156
191
  | `encodeDelta(d: DeltaPayload): string` | Encode a delta (added/removed only) |
157
192
  | `new Session()` | Create a new session tracker |
158
193
 
@@ -169,15 +204,17 @@ Works on objects, arrays, and primitives. Arrays of uniform objects get tabular
169
204
 
170
205
  ## Comprehension Eval
171
206
 
172
- A rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. Six structured extraction questions sent to an LLM:
207
+ Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. 13 structured extraction questions sent to an LLM with zero format instructions:
173
208
 
174
209
  | Format | Accuracy | Tokens | vs JSON |
175
210
  |--------|----------|--------|---------|
176
- | **GCF** | **100%** (6/6) | **11,090** | **79% fewer** |
177
- | TOON | 100% (6/6) | 16,378 | 69% fewer |
178
- | JSON | 66.7% (4/6) | 53,341 | baseline |
211
+ | **GCF** | **100%** (13/13) | **11,090** | **79% fewer** |
212
+ | TOON | 92.3% (12/13) | 16,378 | 69% fewer |
213
+ | JSON | 76.9% (10/13) | 53,341 | baseline |
214
+
215
+ GCF is the only format with perfect accuracy at scale, at 32% fewer tokens than TOON.
179
216
 
180
- JSON failed on counting tasks. GCF and TOON both achieved perfect accuracy. GCF does it in 32% fewer tokens.
217
+ Reproduce: `git clone https://github.com/blackwell-systems/gcf-go && cd gcf-go/eval && GOWORK=off go test -run TestComprehension -v -timeout 0`
181
218
 
182
219
  ## Token Efficiency (TOON's Own Benchmark)
183
220
 
@@ -185,13 +222,13 @@ Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tre
185
222
 
186
223
  | Track | GCF | TOON | Result |
187
224
  |-------|-----|------|--------|
188
- | Mixed-structure (nested, semi-uniform) | 169,554 | 227,896 | **GCF 34% smaller** |
189
- | Flat-only (tabular) | 66,026 | 67,837 | **GCF 3% smaller** |
190
- | Semi-uniform event logs | 107,269 | 154,032 | **GCF 44% smaller** |
225
+ | Mixed-structure (nested, semi-uniform) | 170,367 | 227,896 | **GCF 34% smaller** |
226
+ | Flat-only (tabular) | 66,029 | 67,837 | **GCF 3% smaller** |
227
+ | Semi-uniform event logs | 108,158 | 154,032 | **GCF 42% smaller** |
191
228
 
192
- GCF wins on every dataset except deeply nested config (75 tokens on a 618-token payload). On semi-uniform data, GCF uses 44% fewer tokens than TOON.
229
+ GCF wins all 6 datasets. On semi-uniform data (the most common real-world pattern), GCF uses 42% fewer tokens than TOON.
193
230
 
194
- Reproducible: [blackwell-systems/toon@gcf-comparison](https://github.com/blackwell-systems/toon/tree/gcf-comparison)
231
+ Reproduce: `git clone https://github.com/blackwell-systems/toon && cd toon && git checkout gcf-comparison && cd benchmarks && pnpm install && pnpm benchmark:tokens`
195
232
 
196
233
  ## Links
197
234
 
package/dist/index.d.ts CHANGED
@@ -5,4 +5,5 @@ export { decode } from './decode.js';
5
5
  export { Session, encodeWithSession } from './session.js';
6
6
  export { encodeDelta } from './delta.js';
7
7
  export { encodeGeneric } from './generic.js';
8
+ export { StreamEncoder, type StreamWriter, type StreamOptions } from './stream.js';
8
9
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAClF,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAClF,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,KAAK,YAAY,EAAE,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC"}
package/dist/index.js CHANGED
@@ -4,4 +4,5 @@ export { decode } from './decode.js';
4
4
  export { Session, encodeWithSession } from './session.js';
5
5
  export { encodeDelta } from './delta.js';
6
6
  export { encodeGeneric } from './generic.js';
7
+ export { StreamEncoder } from './stream.js';
7
8
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAyC,MAAM,aAAa,CAAC"}
@@ -0,0 +1,67 @@
1
+ import type { Symbol, Edge } from './types.js';
2
+ /**
3
+ * Options for the streaming encoder.
4
+ */
5
+ export interface StreamOptions {
6
+ tokenBudget?: number;
7
+ tokensUsed?: number;
8
+ packRoot?: string;
9
+ session?: boolean;
10
+ }
11
+ /**
12
+ * A writable sink for streaming output. Accepts string chunks.
13
+ * Compatible with Node.js streams, web WritableStreams, or simple callbacks.
14
+ */
15
+ export interface StreamWriter {
16
+ write(chunk: string): void;
17
+ }
18
+ /**
19
+ * StreamEncoder writes GCF output incrementally as symbols and edges arrive.
20
+ * Zero buffering: each symbol/edge is written immediately. A trailer summary
21
+ * is emitted on close() with the final counts.
22
+ *
23
+ * @example
24
+ * ```ts
25
+ * const chunks: string[] = [];
26
+ * const enc = new StreamEncoder({ write: (s) => chunks.push(s) }, 'context_for_task', { tokenBudget: 5000 });
27
+ * enc.writeSymbol({ qualifiedName: 'pkg.Auth', kind: 'function', score: 0.95, provenance: 'lsp', distance: 0 });
28
+ * enc.writeEdge({ source: 'pkg.Server', target: 'pkg.Auth', edgeType: 'calls' });
29
+ * enc.close();
30
+ * ```
31
+ */
32
+ export declare class StreamEncoder {
33
+ private w;
34
+ private symIndex;
35
+ private nextID;
36
+ private currentGroup;
37
+ private groupCounts;
38
+ private edgeCount;
39
+ private edgesStarted;
40
+ constructor(w: StreamWriter, tool: string, opts?: StreamOptions);
41
+ private writeHeader;
42
+ /**
43
+ * Emit a symbol line immediately. Group headers are emitted automatically
44
+ * when the distance changes.
45
+ */
46
+ writeSymbol(s: Symbol): void;
47
+ /**
48
+ * Emit an edge line immediately. The edges section header is emitted
49
+ * automatically on the first edge (with [?] deferred count).
50
+ * Source and target must reference previously-written symbols.
51
+ */
52
+ writeEdge(e: Edge): void;
53
+ /**
54
+ * Emit a bare reference for a previously-transmitted symbol (session mode).
55
+ */
56
+ writeBareRef(qname: string, distance: number): void;
57
+ /**
58
+ * Emit the ## _summary trailer with final counts. Must be called after all
59
+ * symbols and edges have been written.
60
+ */
61
+ close(): void;
62
+ /** Number of symbols written so far. */
63
+ get symbolCount(): number;
64
+ /** Number of edges written so far. */
65
+ get edgeCount_(): number;
66
+ }
67
+ //# sourceMappingURL=stream.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stream.d.ts","sourceRoot":"","sources":["../src/stream.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAE/C;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;;GAGG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;;;;GAaG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,CAAC,CAAe;IACxB,OAAO,CAAC,QAAQ,CAAkC;IAClD,OAAO,CAAC,MAAM,CAAK;IACnB,OAAO,CAAC,YAAY,CAAM;IAC1B,OAAO,CAAC,WAAW,CAAkC;IACrD,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,YAAY,CAAS;gBAEjB,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB;IAKnE,OAAO,CAAC,WAAW;IASnB;;;OAGG;IACH,WAAW,CAAC,CAAC,EAAE,MAAM,GAAG,IAAI;IAoB5B;;;;OAIG;IACH,SAAS,CAAC,CAAC,EAAE,IAAI,GAAG,IAAI;IAkBxB;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAkBnD;;;OAGG;IACH,KAAK,IAAI,IAAI;IAkBb,wCAAwC;IACxC,IAAI,WAAW,IAAI,MAAM,CAAwB;IAEjD,sCAAsC;IACtC,IAAI,UAAU,IAAI,MAAM,CAA2B;CACpD"}
package/dist/stream.js ADDED
@@ -0,0 +1,123 @@
1
+ import { KIND_ABBREV } from './constants.js';
2
+ /**
3
+ * StreamEncoder writes GCF output incrementally as symbols and edges arrive.
4
+ * Zero buffering: each symbol/edge is written immediately. A trailer summary
5
+ * is emitted on close() with the final counts.
6
+ *
7
+ * @example
8
+ * ```ts
9
+ * const chunks: string[] = [];
10
+ * const enc = new StreamEncoder({ write: (s) => chunks.push(s) }, 'context_for_task', { tokenBudget: 5000 });
11
+ * enc.writeSymbol({ qualifiedName: 'pkg.Auth', kind: 'function', score: 0.95, provenance: 'lsp', distance: 0 });
12
+ * enc.writeEdge({ source: 'pkg.Server', target: 'pkg.Auth', edgeType: 'calls' });
13
+ * enc.close();
14
+ * ```
15
+ */
16
+ export class StreamEncoder {
17
+ w;
18
+ symIndex = new Map();
19
+ nextID = 0;
20
+ currentGroup = '';
21
+ groupCounts = new Map();
22
+ edgeCount = 0;
23
+ edgesStarted = false;
24
+ constructor(w, tool, opts = {}) {
25
+ this.w = w;
26
+ this.writeHeader(tool, opts);
27
+ }
28
+ writeHeader(tool, opts) {
29
+ const parts = [`GCF tool=${tool}`];
30
+ if (opts.tokenBudget)
31
+ parts.push(`budget=${opts.tokenBudget}`);
32
+ if (opts.tokensUsed)
33
+ parts.push(`tokens=${opts.tokensUsed}`);
34
+ if (opts.packRoot)
35
+ parts.push(`pack_root=${opts.packRoot}`);
36
+ if (opts.session)
37
+ parts.push('session=true');
38
+ this.w.write(parts.join(' ') + '\n');
39
+ }
40
+ /**
41
+ * Emit a symbol line immediately. Group headers are emitted automatically
42
+ * when the distance changes.
43
+ */
44
+ writeSymbol(s) {
45
+ const groupNames = ['targets', 'related', 'extended'];
46
+ const groupName = s.distance < groupNames.length
47
+ ? groupNames[s.distance]
48
+ : `distance_${s.distance}`;
49
+ if (groupName !== this.currentGroup) {
50
+ this.w.write(`## ${groupName}\n`);
51
+ this.currentGroup = groupName;
52
+ }
53
+ const id = this.nextID++;
54
+ this.symIndex.set(s.qualifiedName, id);
55
+ const kind = KIND_ABBREV[s.kind] || s.kind;
56
+ this.w.write(`@${id} ${kind} ${s.qualifiedName} ${s.score.toFixed(2)} ${s.provenance}\n`);
57
+ this.groupCounts.set(groupName, (this.groupCounts.get(groupName) || 0) + 1);
58
+ }
59
+ /**
60
+ * Emit an edge line immediately. The edges section header is emitted
61
+ * automatically on the first edge (with [?] deferred count).
62
+ * Source and target must reference previously-written symbols.
63
+ */
64
+ writeEdge(e) {
65
+ const srcIdx = this.symIndex.get(e.source);
66
+ const tgtIdx = this.symIndex.get(e.target);
67
+ if (srcIdx === undefined || tgtIdx === undefined)
68
+ return;
69
+ if (!this.edgesStarted) {
70
+ this.w.write('## edges [?]\n');
71
+ this.edgesStarted = true;
72
+ }
73
+ let line = `@${tgtIdx}<@${srcIdx} ${e.edgeType}`;
74
+ if (e.status && e.status !== 'unchanged') {
75
+ line += ` ${e.status}`;
76
+ }
77
+ this.w.write(line + '\n');
78
+ this.edgeCount++;
79
+ }
80
+ /**
81
+ * Emit a bare reference for a previously-transmitted symbol (session mode).
82
+ */
83
+ writeBareRef(qname, distance) {
84
+ const groupNames = ['targets', 'related', 'extended'];
85
+ const groupName = distance < groupNames.length
86
+ ? groupNames[distance]
87
+ : `distance_${distance}`;
88
+ if (groupName !== this.currentGroup) {
89
+ this.w.write(`## ${groupName}\n`);
90
+ this.currentGroup = groupName;
91
+ }
92
+ const id = this.nextID++;
93
+ this.symIndex.set(qname, id);
94
+ this.w.write(`@${id} # previously transmitted\n`);
95
+ this.groupCounts.set(groupName, (this.groupCounts.get(groupName) || 0) + 1);
96
+ }
97
+ /**
98
+ * Emit the ## _summary trailer with final counts. Must be called after all
99
+ * symbols and edges have been written.
100
+ */
101
+ close() {
102
+ const sections = [];
103
+ const groupOrder = ['targets', 'related', 'extended'];
104
+ for (const g of groupOrder) {
105
+ const c = this.groupCounts.get(g);
106
+ if (c && c > 0)
107
+ sections.push(`${g}:${c}`);
108
+ }
109
+ for (const [g, c] of this.groupCounts) {
110
+ if (!groupOrder.includes(g) && c > 0)
111
+ sections.push(`${g}:${c}`);
112
+ }
113
+ if (this.edgeCount > 0) {
114
+ sections.push(`edges:${this.edgeCount}`);
115
+ }
116
+ this.w.write(`## _summary symbols=${this.nextID} edges=${this.edgeCount} sections=${sections.join(',')}\n`);
117
+ }
118
+ /** Number of symbols written so far. */
119
+ get symbolCount() { return this.nextID; }
120
+ /** Number of edges written so far. */
121
+ get edgeCount_() { return this.edgeCount; }
122
+ }
123
+ //# sourceMappingURL=stream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stream.js","sourceRoot":"","sources":["../src/stream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAqB7C;;;;;;;;;;;;;GAaG;AACH,MAAM,OAAO,aAAa;IAChB,CAAC,CAAe;IAChB,QAAQ,GAAwB,IAAI,GAAG,EAAE,CAAC;IAC1C,MAAM,GAAG,CAAC,CAAC;IACX,YAAY,GAAG,EAAE,CAAC;IAClB,WAAW,GAAwB,IAAI,GAAG,EAAE,CAAC;IAC7C,SAAS,GAAG,CAAC,CAAC;IACd,YAAY,GAAG,KAAK,CAAC;IAE7B,YAAY,CAAe,EAAE,IAAY,EAAE,OAAsB,EAAE;QACjE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACX,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAC/B,CAAC;IAEO,WAAW,CAAC,IAAY,EAAE,IAAmB;QACnD,MAAM,KAAK,GAAG,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;QAC/D,IAAI,IAAI,CAAC,UAAU;YAAE,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAC7D,IAAI,IAAI,CAAC,QAAQ;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC5D,IAAI,IAAI,CAAC,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC7C,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACvC,CAAC;IAED;;;OAGG;IACH,WAAW,CAAC,CAAS;QACnB,MAAM,UAAU,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QACtD,MAAM,SAAS,GAAG,CAAC,CAAC,QAAQ,GAAG,UAAU,CAAC,MAAM;YAC9C,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC;YACxB,CAAC,CAAC,YAAY,CAAC,CAAC,QAAQ,EAAE,CAAC;QAE7B,IAAI,SAAS,KAAK,IAAI,CAAC,YAAY,EAAE,CAAC;YACpC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,SAAS,IAAI,CAAC,CAAC;YAClC,IAAI,CAAC,YAAY,GAAG,SAAS,CAAC;QAChC,CAAC;QAED,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;QAEvC,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC;QAC3C,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,aAAa,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;QAE1F,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9E,CAAC;IAED;;;;OAIG;IACH,SAAS,CAAC,CAAO;QACf,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC3C,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC3C,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO;QAEzD,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YAC/B,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAC3B,CAAC;QAED,IAAI,IAAI,GAAG,IAAI,MAAM,KAAK,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;QACjD,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YACzC,IAAI,IAAI,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;QACzB,CAAC;QACD,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAC1B,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,KAAa,EAAE,QAAgB;QAC1C,MAAM,UAAU,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QACtD,MAAM,SAAS,GAAG,QAAQ,GAAG,UAAU,CAAC,MAAM;YAC5C,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC;YACtB,CAAC,CAAC,YAAY,QAAQ,EAAE,CAAC;QAE3B,IAAI,SAAS,KAAK,IAAI,CAAC,YAAY,EAAE,CAAC;YACpC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,SAAS,IAAI,CAAC,CAAC;YAClC,IAAI,CAAC,YAAY,GAAG,SAAS,CAAC;QAChC,CAAC;QAED,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7B,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,8BAA8B,CAAC,CAAC;QAEnD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9E,CAAC;IAED;;;OAGG;IACH,KAAK;QACH,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAEtD,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,MAAM,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAClC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC7C,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnE,CAAC;QACD,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;YACvB,QAAQ,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,uBAAuB,IAAI,CAAC,MAAM,UAAU,IAAI,CAAC,SAAS,aAAa,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9G,CAAC;IAED,wCAAwC;IACxC,IAAI,WAAW,KAAa,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAEjD,sCAAsC;IACtC,IAAI,UAAU,KAAa,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;CACpD"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blackwell-systems/gcf",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "TypeScript implementation of GCF (Graph Compact Format) - token-optimized wire format for LLM tool responses",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
package/src/index.ts CHANGED
@@ -5,3 +5,4 @@ export { decode } from './decode.js';
5
5
  export { Session, encodeWithSession } from './session.js';
6
6
  export { encodeDelta } from './delta.js';
7
7
  export { encodeGeneric } from './generic.js';
8
+ export { StreamEncoder, type StreamWriter, type StreamOptions } from './stream.js';
package/src/stream.ts ADDED
@@ -0,0 +1,154 @@
1
+ import { KIND_ABBREV } from './constants.js';
2
+ import type { Symbol, Edge } from './types.js';
3
+
4
+ /**
5
+ * Options for the streaming encoder.
6
+ */
7
+ export interface StreamOptions {
8
+ tokenBudget?: number;
9
+ tokensUsed?: number;
10
+ packRoot?: string;
11
+ session?: boolean;
12
+ }
13
+
14
+ /**
15
+ * A writable sink for streaming output. Accepts string chunks.
16
+ * Compatible with Node.js streams, web WritableStreams, or simple callbacks.
17
+ */
18
+ export interface StreamWriter {
19
+ write(chunk: string): void;
20
+ }
21
+
22
+ /**
23
+ * StreamEncoder writes GCF output incrementally as symbols and edges arrive.
24
+ * Zero buffering: each symbol/edge is written immediately. A trailer summary
25
+ * is emitted on close() with the final counts.
26
+ *
27
+ * @example
28
+ * ```ts
29
+ * const chunks: string[] = [];
30
+ * const enc = new StreamEncoder({ write: (s) => chunks.push(s) }, 'context_for_task', { tokenBudget: 5000 });
31
+ * enc.writeSymbol({ qualifiedName: 'pkg.Auth', kind: 'function', score: 0.95, provenance: 'lsp', distance: 0 });
32
+ * enc.writeEdge({ source: 'pkg.Server', target: 'pkg.Auth', edgeType: 'calls' });
33
+ * enc.close();
34
+ * ```
35
+ */
36
+ export class StreamEncoder {
37
+ private w: StreamWriter;
38
+ private symIndex: Map<string, number> = new Map();
39
+ private nextID = 0;
40
+ private currentGroup = '';
41
+ private groupCounts: Map<string, number> = new Map();
42
+ private edgeCount = 0;
43
+ private edgesStarted = false;
44
+
45
+ constructor(w: StreamWriter, tool: string, opts: StreamOptions = {}) {
46
+ this.w = w;
47
+ this.writeHeader(tool, opts);
48
+ }
49
+
50
+ private writeHeader(tool: string, opts: StreamOptions): void {
51
+ const parts = [`GCF tool=${tool}`];
52
+ if (opts.tokenBudget) parts.push(`budget=${opts.tokenBudget}`);
53
+ if (opts.tokensUsed) parts.push(`tokens=${opts.tokensUsed}`);
54
+ if (opts.packRoot) parts.push(`pack_root=${opts.packRoot}`);
55
+ if (opts.session) parts.push('session=true');
56
+ this.w.write(parts.join(' ') + '\n');
57
+ }
58
+
59
+ /**
60
+ * Emit a symbol line immediately. Group headers are emitted automatically
61
+ * when the distance changes.
62
+ */
63
+ writeSymbol(s: Symbol): void {
64
+ const groupNames = ['targets', 'related', 'extended'];
65
+ const groupName = s.distance < groupNames.length
66
+ ? groupNames[s.distance]
67
+ : `distance_${s.distance}`;
68
+
69
+ if (groupName !== this.currentGroup) {
70
+ this.w.write(`## ${groupName}\n`);
71
+ this.currentGroup = groupName;
72
+ }
73
+
74
+ const id = this.nextID++;
75
+ this.symIndex.set(s.qualifiedName, id);
76
+
77
+ const kind = KIND_ABBREV[s.kind] || s.kind;
78
+ this.w.write(`@${id} ${kind} ${s.qualifiedName} ${s.score.toFixed(2)} ${s.provenance}\n`);
79
+
80
+ this.groupCounts.set(groupName, (this.groupCounts.get(groupName) || 0) + 1);
81
+ }
82
+
83
+ /**
84
+ * Emit an edge line immediately. The edges section header is emitted
85
+ * automatically on the first edge (with [?] deferred count).
86
+ * Source and target must reference previously-written symbols.
87
+ */
88
+ writeEdge(e: Edge): void {
89
+ const srcIdx = this.symIndex.get(e.source);
90
+ const tgtIdx = this.symIndex.get(e.target);
91
+ if (srcIdx === undefined || tgtIdx === undefined) return;
92
+
93
+ if (!this.edgesStarted) {
94
+ this.w.write('## edges [?]\n');
95
+ this.edgesStarted = true;
96
+ }
97
+
98
+ let line = `@${tgtIdx}<@${srcIdx} ${e.edgeType}`;
99
+ if (e.status && e.status !== 'unchanged') {
100
+ line += ` ${e.status}`;
101
+ }
102
+ this.w.write(line + '\n');
103
+ this.edgeCount++;
104
+ }
105
+
106
+ /**
107
+ * Emit a bare reference for a previously-transmitted symbol (session mode).
108
+ */
109
+ writeBareRef(qname: string, distance: number): void {
110
+ const groupNames = ['targets', 'related', 'extended'];
111
+ const groupName = distance < groupNames.length
112
+ ? groupNames[distance]
113
+ : `distance_${distance}`;
114
+
115
+ if (groupName !== this.currentGroup) {
116
+ this.w.write(`## ${groupName}\n`);
117
+ this.currentGroup = groupName;
118
+ }
119
+
120
+ const id = this.nextID++;
121
+ this.symIndex.set(qname, id);
122
+ this.w.write(`@${id} # previously transmitted\n`);
123
+
124
+ this.groupCounts.set(groupName, (this.groupCounts.get(groupName) || 0) + 1);
125
+ }
126
+
127
+ /**
128
+ * Emit the ## _summary trailer with final counts. Must be called after all
129
+ * symbols and edges have been written.
130
+ */
131
+ close(): void {
132
+ const sections: string[] = [];
133
+ const groupOrder = ['targets', 'related', 'extended'];
134
+
135
+ for (const g of groupOrder) {
136
+ const c = this.groupCounts.get(g);
137
+ if (c && c > 0) sections.push(`${g}:${c}`);
138
+ }
139
+ for (const [g, c] of this.groupCounts) {
140
+ if (!groupOrder.includes(g) && c > 0) sections.push(`${g}:${c}`);
141
+ }
142
+ if (this.edgeCount > 0) {
143
+ sections.push(`edges:${this.edgeCount}`);
144
+ }
145
+
146
+ this.w.write(`## _summary symbols=${this.nextID} edges=${this.edgeCount} sections=${sections.join(',')}\n`);
147
+ }
148
+
149
+ /** Number of symbols written so far. */
150
+ get symbolCount(): number { return this.nextID; }
151
+
152
+ /** Number of edges written so far. */
153
+ get edgeCount_(): number { return this.edgeCount; }
154
+ }