trellis 2.0.13 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +1 -1
- package/dist/embeddings/index.js +1 -1
- package/dist/{index-7gvjxt27.js → index-2917tjd8.js} +1 -1
- package/package.json +2 -10
- package/dist/transformers.node-bx3q9d7k.js +0 -33130
- package/src/cli/index.ts +0 -3356
- package/src/core/agents/harness.ts +0 -380
- package/src/core/agents/index.ts +0 -18
- package/src/core/agents/types.ts +0 -90
- package/src/core/index.ts +0 -118
- package/src/core/kernel/middleware.ts +0 -44
- package/src/core/kernel/trellis-kernel.ts +0 -593
- package/src/core/ontology/builtins.ts +0 -248
- package/src/core/ontology/index.ts +0 -34
- package/src/core/ontology/registry.ts +0 -209
- package/src/core/ontology/types.ts +0 -124
- package/src/core/ontology/validator.ts +0 -382
- package/src/core/persist/backend.ts +0 -74
- package/src/core/persist/sqlite-backend.ts +0 -298
- package/src/core/plugins/index.ts +0 -17
- package/src/core/plugins/registry.ts +0 -322
- package/src/core/plugins/types.ts +0 -126
- package/src/core/query/datalog.ts +0 -188
- package/src/core/query/engine.ts +0 -370
- package/src/core/query/index.ts +0 -34
- package/src/core/query/parser.ts +0 -481
- package/src/core/query/types.ts +0 -200
- package/src/core/store/eav-store.ts +0 -467
- package/src/decisions/auto-capture.ts +0 -136
- package/src/decisions/hooks.ts +0 -163
- package/src/decisions/index.ts +0 -261
- package/src/decisions/types.ts +0 -103
- package/src/embeddings/auto-embed.ts +0 -248
- package/src/embeddings/chunker.ts +0 -327
- package/src/embeddings/index.ts +0 -48
- package/src/embeddings/model.ts +0 -112
- package/src/embeddings/search.ts +0 -305
- package/src/embeddings/store.ts +0 -313
- package/src/embeddings/types.ts +0 -92
- package/src/engine.ts +0 -1125
- package/src/garden/cluster.ts +0 -330
- package/src/garden/garden.ts +0 -306
- package/src/garden/index.ts +0 -29
- package/src/git/git-exporter.ts +0 -286
- package/src/git/git-importer.ts +0 -329
- package/src/git/git-reader.ts +0 -189
- package/src/git/index.ts +0 -22
- package/src/identity/governance.ts +0 -211
- package/src/identity/identity.ts +0 -224
- package/src/identity/index.ts +0 -30
- package/src/identity/signing-middleware.ts +0 -97
- package/src/index.ts +0 -29
- package/src/links/index.ts +0 -49
- package/src/links/lifecycle.ts +0 -400
- package/src/links/parser.ts +0 -484
- package/src/links/ref-index.ts +0 -186
- package/src/links/resolver.ts +0 -314
- package/src/links/types.ts +0 -108
- package/src/mcp/index.ts +0 -22
- package/src/mcp/server.ts +0 -1278
- package/src/semantic/csharp-parser.ts +0 -493
- package/src/semantic/go-parser.ts +0 -585
- package/src/semantic/index.ts +0 -34
- package/src/semantic/java-parser.ts +0 -456
- package/src/semantic/python-parser.ts +0 -659
- package/src/semantic/ruby-parser.ts +0 -446
- package/src/semantic/rust-parser.ts +0 -784
- package/src/semantic/semantic-merge.ts +0 -210
- package/src/semantic/ts-parser.ts +0 -681
- package/src/semantic/types.ts +0 -175
- package/src/sync/http-transport.ts +0 -144
- package/src/sync/index.ts +0 -43
- package/src/sync/memory-transport.ts +0 -66
- package/src/sync/multi-repo.ts +0 -200
- package/src/sync/reconciler.ts +0 -237
- package/src/sync/sync-engine.ts +0 -258
- package/src/sync/types.ts +0 -104
- package/src/sync/ws-transport.ts +0 -145
- package/src/ui/client.html +0 -695
- package/src/ui/server.ts +0 -419
- package/src/vcs/blob-store.ts +0 -124
- package/src/vcs/branch.ts +0 -150
- package/src/vcs/checkpoint.ts +0 -64
- package/src/vcs/decompose.ts +0 -469
- package/src/vcs/diff.ts +0 -409
- package/src/vcs/engine-context.ts +0 -26
- package/src/vcs/index.ts +0 -23
- package/src/vcs/issue.ts +0 -800
- package/src/vcs/merge.ts +0 -425
- package/src/vcs/milestone.ts +0 -124
- package/src/vcs/ops.ts +0 -59
- package/src/vcs/types.ts +0 -213
- package/src/vcs/vcs-middleware.ts +0 -81
- package/src/watcher/fs-watcher.ts +0 -255
- package/src/watcher/index.ts +0 -9
- package/src/watcher/ingestion.ts +0 -116
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Auto-Embedding Middleware
|
|
3
|
-
*
|
|
4
|
-
* Kernel middleware that automatically embeds entity facts and links
|
|
5
|
-
* on graph mutations. Runs after successful ops to index new/changed
|
|
6
|
-
* content into the vector store.
|
|
7
|
-
*
|
|
8
|
-
* @module trellis/embeddings
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
import type { KernelOp } from '../core/persist/backend.js';
|
|
12
|
-
import type { KernelMiddleware, MiddlewareContext, OpMiddlewareNext } from '../core/kernel/middleware.js';
|
|
13
|
-
import type { Fact, Link } from '../core/store/eav-store.js';
|
|
14
|
-
import type { ChunkMeta, EmbeddingRecord } from './types.js';
|
|
15
|
-
import type { Embedder } from './search.js';
|
|
16
|
-
import { VectorStore } from './store.js';
|
|
17
|
-
import { embed } from './model.js';
|
|
18
|
-
|
|
19
|
-
// ---------------------------------------------------------------------------
|
|
20
|
-
// Entity text builder — converts facts/links into embeddable text
|
|
21
|
-
// ---------------------------------------------------------------------------
|
|
22
|
-
|
|
23
|
-
function factsToText(facts: Fact[]): string {
|
|
24
|
-
return facts
|
|
25
|
-
.filter((f) => f.a !== 'createdAt' && f.a !== 'updatedAt')
|
|
26
|
-
.map((f) => `${f.a}: ${f.v}`)
|
|
27
|
-
.join('\n');
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
function linksToText(links: Link[]): string {
|
|
31
|
-
return links.map((l) => `${l.e1} —[${l.a}]→ ${l.e2}`).join('\n');
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
function entitySummaryText(entityId: string, facts: Fact[], links: Link[]): string {
|
|
35
|
-
const type = facts.find((f) => f.a === 'type')?.v ?? 'Entity';
|
|
36
|
-
const name = facts.find((f) => f.a === 'name' || f.a === 'title')?.v ?? entityId;
|
|
37
|
-
const parts = [`${type}: ${name} (${entityId})`];
|
|
38
|
-
|
|
39
|
-
const attrs = facts.filter((f) => !['type', 'name', 'title', 'createdAt', 'updatedAt'].includes(f.a));
|
|
40
|
-
if (attrs.length > 0) {
|
|
41
|
-
parts.push(attrs.map((f) => ` ${f.a} = ${f.v}`).join('\n'));
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
if (links.length > 0) {
|
|
45
|
-
parts.push('Relations:');
|
|
46
|
-
parts.push(links.map((l) => ` ${l.a} → ${l.e2}`).join('\n'));
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
return parts.join('\n');
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
// ---------------------------------------------------------------------------
|
|
53
|
-
// Middleware factory
|
|
54
|
-
// ---------------------------------------------------------------------------
|
|
55
|
-
|
|
56
|
-
export interface AutoEmbedOptions {
|
|
57
|
-
/** Path to the vector store SQLite database. */
|
|
58
|
-
dbPath: string;
|
|
59
|
-
/** Custom embedder function (default: transformers.js embed). */
|
|
60
|
-
embedFn?: Embedder;
|
|
61
|
-
/** Whether to embed facts individually (default: false — only entity summaries). */
|
|
62
|
-
embedIndividualFacts?: boolean;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Creates a kernel middleware that auto-embeds entities on mutation.
|
|
67
|
-
*
|
|
68
|
-
* On addFacts/addLinks: embeds entity summaries into the vector store.
|
|
69
|
-
* On deleteFacts/deleteLinks: removes stale embeddings.
|
|
70
|
-
*/
|
|
71
|
-
export function createAutoEmbedMiddleware(options: AutoEmbedOptions): KernelMiddleware & { close: () => void } {
|
|
72
|
-
const store = new VectorStore(options.dbPath);
|
|
73
|
-
const embedFn = options.embedFn ?? embed;
|
|
74
|
-
const embedIndividual = options.embedIndividualFacts ?? false;
|
|
75
|
-
|
|
76
|
-
return {
|
|
77
|
-
name: 'auto-embed',
|
|
78
|
-
|
|
79
|
-
handleOp: async (op: KernelOp, ctx: MiddlewareContext, next: OpMiddlewareNext) => {
|
|
80
|
-
// Let the op proceed first
|
|
81
|
-
await next(op, ctx);
|
|
82
|
-
|
|
83
|
-
// Then asynchronously embed (don't block the mutation)
|
|
84
|
-
try {
|
|
85
|
-
await _processOp(op, store, embedFn, embedIndividual);
|
|
86
|
-
} catch {
|
|
87
|
-
// Embedding failures are non-fatal
|
|
88
|
-
}
|
|
89
|
-
},
|
|
90
|
-
|
|
91
|
-
close: () => {
|
|
92
|
-
store.close();
|
|
93
|
-
},
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
async function _processOp(
|
|
98
|
-
op: KernelOp,
|
|
99
|
-
store: VectorStore,
|
|
100
|
-
embedFn: Embedder,
|
|
101
|
-
embedIndividual: boolean,
|
|
102
|
-
): Promise<void> {
|
|
103
|
-
const now = new Date().toISOString();
|
|
104
|
-
|
|
105
|
-
// Collect affected entity IDs
|
|
106
|
-
const entityIds = new Set<string>();
|
|
107
|
-
if (op.facts) for (const f of op.facts) entityIds.add(f.e);
|
|
108
|
-
if (op.links) for (const l of op.links) { entityIds.add(l.e1); entityIds.add(l.e2); }
|
|
109
|
-
if (op.deleteFacts) for (const f of op.deleteFacts) entityIds.add(f.e);
|
|
110
|
-
if (op.deleteLinks) for (const l of op.deleteLinks) { entityIds.add(l.e1); entityIds.add(l.e2); }
|
|
111
|
-
|
|
112
|
-
// Handle deletions — remove old embeddings for deleted entities
|
|
113
|
-
if (op.deleteFacts || op.deleteLinks) {
|
|
114
|
-
for (const eid of entityIds) {
|
|
115
|
-
store.deleteByEntity(eid);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// Handle additions — embed entity summaries
|
|
120
|
-
if (op.facts && op.facts.length > 0) {
|
|
121
|
-
// Group facts by entity
|
|
122
|
-
const factsByEntity = new Map<string, Fact[]>();
|
|
123
|
-
for (const f of op.facts) {
|
|
124
|
-
const existing = factsByEntity.get(f.e) ?? [];
|
|
125
|
-
existing.push(f);
|
|
126
|
-
factsByEntity.set(f.e, existing);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
const linksByEntity = new Map<string, Link[]>();
|
|
130
|
-
if (op.links) {
|
|
131
|
-
for (const l of op.links) {
|
|
132
|
-
const existing = linksByEntity.get(l.e1) ?? [];
|
|
133
|
-
existing.push(l);
|
|
134
|
-
linksByEntity.set(l.e1, existing);
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
const records: EmbeddingRecord[] = [];
|
|
139
|
-
|
|
140
|
-
for (const [eid, facts] of factsByEntity) {
|
|
141
|
-
const links = linksByEntity.get(eid) ?? [];
|
|
142
|
-
|
|
143
|
-
// Entity summary embedding
|
|
144
|
-
const summaryText = entitySummaryText(eid, facts, links);
|
|
145
|
-
if (summaryText.trim()) {
|
|
146
|
-
try {
|
|
147
|
-
const vector = await embedFn(summaryText);
|
|
148
|
-
records.push({
|
|
149
|
-
id: `entity:${eid}:summary`,
|
|
150
|
-
entityId: eid,
|
|
151
|
-
content: summaryText,
|
|
152
|
-
chunkType: 'summary_md' as any,
|
|
153
|
-
updatedAt: now,
|
|
154
|
-
embedding: vector,
|
|
155
|
-
});
|
|
156
|
-
} catch {}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
// Individual fact embeddings (optional)
|
|
160
|
-
if (embedIndividual) {
|
|
161
|
-
for (const fact of facts) {
|
|
162
|
-
if (['type', 'createdAt', 'updatedAt'].includes(fact.a)) continue;
|
|
163
|
-
const text = `${fact.a}: ${fact.v}`;
|
|
164
|
-
try {
|
|
165
|
-
const vector = await embedFn(text);
|
|
166
|
-
records.push({
|
|
167
|
-
id: `entity:${eid}:fact:${fact.a}`,
|
|
168
|
-
entityId: eid,
|
|
169
|
-
content: text,
|
|
170
|
-
chunkType: 'doc_comment' as any,
|
|
171
|
-
updatedAt: now,
|
|
172
|
-
embedding: vector,
|
|
173
|
-
});
|
|
174
|
-
} catch {}
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (records.length > 0) {
|
|
180
|
-
store.upsertBatch(records);
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
// ---------------------------------------------------------------------------
|
|
186
|
-
// RAG Context Builder
|
|
187
|
-
// ---------------------------------------------------------------------------
|
|
188
|
-
|
|
189
|
-
export interface RAGContext {
|
|
190
|
-
/** The original query. */
|
|
191
|
-
query: string;
|
|
192
|
-
/** Retrieved chunks ranked by relevance. */
|
|
193
|
-
chunks: Array<{
|
|
194
|
-
content: string;
|
|
195
|
-
entityId: string;
|
|
196
|
-
score: number;
|
|
197
|
-
chunkType: string;
|
|
198
|
-
}>;
|
|
199
|
-
/** Total token estimate (rough: 1 token ≈ 4 chars). */
|
|
200
|
-
estimatedTokens: number;
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
/**
|
|
204
|
-
* Build a RAG context from a natural language query.
|
|
205
|
-
* Searches the vector store and assembles ranked context chunks.
|
|
206
|
-
*/
|
|
207
|
-
export async function buildRAGContext(
|
|
208
|
-
query: string,
|
|
209
|
-
vectorStore: VectorStore,
|
|
210
|
-
embedFn: Embedder = embed,
|
|
211
|
-
options?: {
|
|
212
|
-
maxChunks?: number;
|
|
213
|
-
maxTokens?: number;
|
|
214
|
-
minScore?: number;
|
|
215
|
-
},
|
|
216
|
-
): Promise<RAGContext> {
|
|
217
|
-
const maxChunks = options?.maxChunks ?? 10;
|
|
218
|
-
const maxTokens = options?.maxTokens ?? 4000;
|
|
219
|
-
const minScore = options?.minScore ?? 0.1;
|
|
220
|
-
|
|
221
|
-
const queryVector = await embedFn(query);
|
|
222
|
-
const results = vectorStore.search(queryVector, {
|
|
223
|
-
limit: maxChunks * 2,
|
|
224
|
-
minScore,
|
|
225
|
-
});
|
|
226
|
-
|
|
227
|
-
const chunks: RAGContext['chunks'] = [];
|
|
228
|
-
let totalChars = 0;
|
|
229
|
-
|
|
230
|
-
for (const r of results) {
|
|
231
|
-
if (chunks.length >= maxChunks) break;
|
|
232
|
-
if (totalChars + r.chunk.content.length > maxTokens * 4) break;
|
|
233
|
-
|
|
234
|
-
chunks.push({
|
|
235
|
-
content: r.chunk.content,
|
|
236
|
-
entityId: r.chunk.entityId,
|
|
237
|
-
score: r.score,
|
|
238
|
-
chunkType: r.chunk.chunkType,
|
|
239
|
-
});
|
|
240
|
-
totalChars += r.chunk.content.length;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
return {
|
|
244
|
-
query,
|
|
245
|
-
chunks,
|
|
246
|
-
estimatedTokens: Math.ceil(totalChars / 4),
|
|
247
|
-
};
|
|
248
|
-
}
|
|
@@ -1,327 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Text Chunker
|
|
3
|
-
*
|
|
4
|
-
* Strategies for splitting different content types into embeddable chunks.
|
|
5
|
-
* Short text is embedded as-is; markdown is split by headings; code entities
|
|
6
|
-
* are chunked by declaration; large text uses sliding window.
|
|
7
|
-
*
|
|
8
|
-
* @see TRL-19
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
import type { ChunkMeta, ChunkType } from './types.js';
|
|
12
|
-
|
|
13
|
-
// ---------------------------------------------------------------------------
|
|
14
|
-
// Chunk configuration
|
|
15
|
-
// ---------------------------------------------------------------------------
|
|
16
|
-
|
|
17
|
-
/** Max characters per chunk before splitting (roughly ~128 tokens) */
|
|
18
|
-
const MAX_CHUNK_CHARS = 512;
|
|
19
|
-
/** Overlap characters for sliding window */
|
|
20
|
-
const OVERLAP_CHARS = 64;
|
|
21
|
-
|
|
22
|
-
// ---------------------------------------------------------------------------
|
|
23
|
-
// Public API
|
|
24
|
-
// ---------------------------------------------------------------------------
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Chunk an issue into embeddable pieces.
|
|
28
|
-
*/
|
|
29
|
-
export function chunkIssue(issue: {
|
|
30
|
-
id: string;
|
|
31
|
-
title?: string;
|
|
32
|
-
description?: string;
|
|
33
|
-
}): ChunkMeta[] {
|
|
34
|
-
const now = new Date().toISOString();
|
|
35
|
-
const chunks: ChunkMeta[] = [];
|
|
36
|
-
|
|
37
|
-
if (issue.title) {
|
|
38
|
-
chunks.push({
|
|
39
|
-
id: `issue:${issue.id}:title`,
|
|
40
|
-
entityId: `issue:${issue.id}`,
|
|
41
|
-
content: issue.title,
|
|
42
|
-
chunkType: 'issue_title',
|
|
43
|
-
updatedAt: now,
|
|
44
|
-
});
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
if (issue.description) {
|
|
48
|
-
chunks.push({
|
|
49
|
-
id: `issue:${issue.id}:desc`,
|
|
50
|
-
entityId: `issue:${issue.id}`,
|
|
51
|
-
content: issue.description,
|
|
52
|
-
chunkType: 'issue_desc',
|
|
53
|
-
updatedAt: now,
|
|
54
|
-
});
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
return chunks;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* Chunk a decision trace into embeddable pieces.
|
|
62
|
-
* Combines tool name, rationale, context, and output summary.
|
|
63
|
-
*/
|
|
64
|
-
export function chunkDecision(decision: {
|
|
65
|
-
id: string;
|
|
66
|
-
toolName: string;
|
|
67
|
-
rationale?: string;
|
|
68
|
-
context?: string;
|
|
69
|
-
outputSummary?: string;
|
|
70
|
-
}): ChunkMeta[] {
|
|
71
|
-
const parts: string[] = [];
|
|
72
|
-
parts.push(`Decision ${decision.id}: ${decision.toolName}`);
|
|
73
|
-
if (decision.rationale) parts.push(`Rationale: ${decision.rationale}`);
|
|
74
|
-
if (decision.context) parts.push(`Context: ${decision.context}`);
|
|
75
|
-
if (decision.outputSummary) parts.push(`Output: ${decision.outputSummary}`);
|
|
76
|
-
|
|
77
|
-
const content = parts.join('\n');
|
|
78
|
-
if (!content.trim()) return [];
|
|
79
|
-
|
|
80
|
-
return [
|
|
81
|
-
{
|
|
82
|
-
id: `decision:${decision.id}:rationale`,
|
|
83
|
-
entityId: `decision:${decision.id}`,
|
|
84
|
-
content,
|
|
85
|
-
chunkType: 'decision_rationale',
|
|
86
|
-
updatedAt: new Date().toISOString(),
|
|
87
|
-
},
|
|
88
|
-
];
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
/**
|
|
92
|
-
* Chunk a milestone message.
|
|
93
|
-
*/
|
|
94
|
-
export function chunkMilestone(milestone: {
|
|
95
|
-
id: string;
|
|
96
|
-
message?: string;
|
|
97
|
-
}): ChunkMeta[] {
|
|
98
|
-
if (!milestone.message) return [];
|
|
99
|
-
|
|
100
|
-
return [
|
|
101
|
-
{
|
|
102
|
-
id: `milestone:${milestone.id}:msg`,
|
|
103
|
-
entityId: `milestone:${milestone.id}`,
|
|
104
|
-
content: milestone.message,
|
|
105
|
-
chunkType: 'milestone_msg',
|
|
106
|
-
updatedAt: new Date().toISOString(),
|
|
107
|
-
},
|
|
108
|
-
];
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Chunk a markdown file by heading sections.
|
|
113
|
-
* Each H1/H2/H3 section becomes a separate chunk.
|
|
114
|
-
* Sections that exceed MAX_CHUNK_CHARS get sliding-window split.
|
|
115
|
-
*/
|
|
116
|
-
export function chunkMarkdown(filePath: string, content: string): ChunkMeta[] {
|
|
117
|
-
if (!content.trim()) return [];
|
|
118
|
-
|
|
119
|
-
const entityId = `file:${filePath}`;
|
|
120
|
-
const now = new Date().toISOString();
|
|
121
|
-
const sections = splitByHeadings(content);
|
|
122
|
-
const chunks: ChunkMeta[] = [];
|
|
123
|
-
|
|
124
|
-
for (let i = 0; i < sections.length; i++) {
|
|
125
|
-
const section = sections[i];
|
|
126
|
-
if (!section.text.trim()) continue;
|
|
127
|
-
|
|
128
|
-
if (section.text.length <= MAX_CHUNK_CHARS) {
|
|
129
|
-
chunks.push({
|
|
130
|
-
id: `${entityId}:section:${i}`,
|
|
131
|
-
entityId,
|
|
132
|
-
content: section.text,
|
|
133
|
-
chunkType: 'markdown',
|
|
134
|
-
filePath,
|
|
135
|
-
updatedAt: now,
|
|
136
|
-
});
|
|
137
|
-
} else {
|
|
138
|
-
// Split long sections with sliding window
|
|
139
|
-
const windows = slidingWindow(section.text);
|
|
140
|
-
for (let w = 0; w < windows.length; w++) {
|
|
141
|
-
chunks.push({
|
|
142
|
-
id: `${entityId}:section:${i}:w${w}`,
|
|
143
|
-
entityId,
|
|
144
|
-
content: windows[w],
|
|
145
|
-
chunkType: 'markdown',
|
|
146
|
-
filePath,
|
|
147
|
-
updatedAt: now,
|
|
148
|
-
});
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
return chunks;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
/**
|
|
157
|
-
* Chunk code entities (functions, classes, interfaces) from a parsed file.
|
|
158
|
-
* Each declaration's signature + doc-comment becomes a chunk.
|
|
159
|
-
*/
|
|
160
|
-
export function chunkCodeEntities(
|
|
161
|
-
filePath: string,
|
|
162
|
-
declarations: Array<{
|
|
163
|
-
id: string;
|
|
164
|
-
name: string;
|
|
165
|
-
kind: string;
|
|
166
|
-
signature: string;
|
|
167
|
-
docComment?: string;
|
|
168
|
-
}>,
|
|
169
|
-
): ChunkMeta[] {
|
|
170
|
-
const now = new Date().toISOString();
|
|
171
|
-
const chunks: ChunkMeta[] = [];
|
|
172
|
-
|
|
173
|
-
for (const decl of declarations) {
|
|
174
|
-
const parts: string[] = [];
|
|
175
|
-
if (decl.docComment) parts.push(decl.docComment);
|
|
176
|
-
parts.push(`${decl.kind} ${decl.name}`);
|
|
177
|
-
parts.push(decl.signature);
|
|
178
|
-
|
|
179
|
-
const content = parts.join('\n').slice(0, MAX_CHUNK_CHARS);
|
|
180
|
-
|
|
181
|
-
chunks.push({
|
|
182
|
-
id: `symbol:${filePath}#${decl.name}`,
|
|
183
|
-
entityId: `symbol:${filePath}#${decl.name}`,
|
|
184
|
-
content,
|
|
185
|
-
chunkType: 'code_entity',
|
|
186
|
-
filePath,
|
|
187
|
-
updatedAt: now,
|
|
188
|
-
});
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
return chunks;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
/**
|
|
195
|
-
* Chunk doc-comments extracted from source files.
|
|
196
|
-
*/
|
|
197
|
-
export function chunkDocComments(
|
|
198
|
-
filePath: string,
|
|
199
|
-
comments: Array<{ line: number; text: string }>,
|
|
200
|
-
): ChunkMeta[] {
|
|
201
|
-
if (comments.length === 0) return [];
|
|
202
|
-
|
|
203
|
-
const entityId = `file:${filePath}`;
|
|
204
|
-
const now = new Date().toISOString();
|
|
205
|
-
const chunks: ChunkMeta[] = [];
|
|
206
|
-
|
|
207
|
-
for (let i = 0; i < comments.length; i++) {
|
|
208
|
-
const comment = comments[i];
|
|
209
|
-
if (!comment.text.trim()) continue;
|
|
210
|
-
|
|
211
|
-
chunks.push({
|
|
212
|
-
id: `${entityId}:doc:${i}`,
|
|
213
|
-
entityId,
|
|
214
|
-
content: comment.text.slice(0, MAX_CHUNK_CHARS),
|
|
215
|
-
chunkType: 'doc_comment',
|
|
216
|
-
filePath,
|
|
217
|
-
updatedAt: now,
|
|
218
|
-
});
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
return chunks;
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
/**
|
|
225
|
-
* Chunk a summary.md or similar short-to-medium text file.
|
|
226
|
-
*/
|
|
227
|
-
export function chunkSummary(filePath: string, content: string): ChunkMeta[] {
|
|
228
|
-
if (!content.trim()) return [];
|
|
229
|
-
|
|
230
|
-
const entityId = `file:${filePath}`;
|
|
231
|
-
const now = new Date().toISOString();
|
|
232
|
-
|
|
233
|
-
if (content.length <= MAX_CHUNK_CHARS) {
|
|
234
|
-
return [
|
|
235
|
-
{
|
|
236
|
-
id: `${entityId}:summary`,
|
|
237
|
-
entityId,
|
|
238
|
-
content,
|
|
239
|
-
chunkType: 'summary_md',
|
|
240
|
-
filePath,
|
|
241
|
-
updatedAt: now,
|
|
242
|
-
},
|
|
243
|
-
];
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
// Split by headings for longer summaries
|
|
247
|
-
return chunkMarkdown(filePath, content).map((c) => ({
|
|
248
|
-
...c,
|
|
249
|
-
chunkType: 'summary_md' as ChunkType,
|
|
250
|
-
}));
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
/**
|
|
254
|
-
* Auto-detect file type and chunk accordingly.
|
|
255
|
-
*/
|
|
256
|
-
export function chunkFile(filePath: string, content: string): ChunkMeta[] {
|
|
257
|
-
if (!content.trim()) return [];
|
|
258
|
-
|
|
259
|
-
const ext = filePath.split('.').pop()?.toLowerCase() ?? '';
|
|
260
|
-
|
|
261
|
-
// summary.md files
|
|
262
|
-
if (filePath.endsWith('summary.md')) {
|
|
263
|
-
return chunkSummary(filePath, content);
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
// Markdown files
|
|
267
|
-
if (ext === 'md') {
|
|
268
|
-
return chunkMarkdown(filePath, content);
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
// Source files — we only chunk doc-comments (code entities are handled separately)
|
|
272
|
-
// Return empty — code entities are handled via chunkCodeEntities from parsed AST
|
|
273
|
-
return [];
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// ---------------------------------------------------------------------------
|
|
277
|
-
// Internal helpers
|
|
278
|
-
// ---------------------------------------------------------------------------
|
|
279
|
-
|
|
280
|
-
interface Section {
|
|
281
|
-
heading?: string;
|
|
282
|
-
text: string;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
/**
|
|
286
|
-
* Split markdown content by heading boundaries (# H1, ## H2, ### H3).
|
|
287
|
-
*/
|
|
288
|
-
function splitByHeadings(content: string): Section[] {
|
|
289
|
-
const lines = content.split('\n');
|
|
290
|
-
const sections: Section[] = [];
|
|
291
|
-
let currentSection: Section = { text: '' };
|
|
292
|
-
|
|
293
|
-
for (const line of lines) {
|
|
294
|
-
if (/^#{1,3}\s/.test(line)) {
|
|
295
|
-
// New heading — start a new section
|
|
296
|
-
if (currentSection.text.trim()) {
|
|
297
|
-
sections.push(currentSection);
|
|
298
|
-
}
|
|
299
|
-
currentSection = { heading: line, text: line + '\n' };
|
|
300
|
-
} else {
|
|
301
|
-
currentSection.text += line + '\n';
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
if (currentSection.text.trim()) {
|
|
306
|
-
sections.push(currentSection);
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
return sections;
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
/**
|
|
313
|
-
* Split long text into overlapping windows.
|
|
314
|
-
*/
|
|
315
|
-
export function slidingWindow(text: string): string[] {
|
|
316
|
-
const windows: string[] = [];
|
|
317
|
-
let start = 0;
|
|
318
|
-
|
|
319
|
-
while (start < text.length) {
|
|
320
|
-
const end = Math.min(start + MAX_CHUNK_CHARS, text.length);
|
|
321
|
-
windows.push(text.slice(start, end));
|
|
322
|
-
if (end >= text.length) break;
|
|
323
|
-
start += MAX_CHUNK_CHARS - OVERLAP_CHARS;
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
return windows;
|
|
327
|
-
}
|
package/src/embeddings/index.ts
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Embeddings Module — Public API
|
|
3
|
-
*
|
|
4
|
-
* Provides semantic embedding and vector search for TrellisVCS entities.
|
|
5
|
-
*
|
|
6
|
-
* @see TRL-18
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
// Types
|
|
10
|
-
export type {
|
|
11
|
-
ChunkType,
|
|
12
|
-
ChunkMeta,
|
|
13
|
-
EmbeddingRecord,
|
|
14
|
-
SearchResult,
|
|
15
|
-
SearchOptions,
|
|
16
|
-
EmbeddingModelConfig,
|
|
17
|
-
} from './types.js';
|
|
18
|
-
export { DEFAULT_MODEL_CONFIG } from './types.js';
|
|
19
|
-
|
|
20
|
-
// Model
|
|
21
|
-
export { embed, embedBatch, loadModel, resetModel } from './model.js';
|
|
22
|
-
|
|
23
|
-
// Store
|
|
24
|
-
export { VectorStore, cosineSimilarity } from './store.js';
|
|
25
|
-
|
|
26
|
-
// Search
|
|
27
|
-
export { EmbeddingManager } from './search.js';
|
|
28
|
-
export type { SearchableEngine, Embedder } from './search.js';
|
|
29
|
-
|
|
30
|
-
// Auto-embedding middleware + RAG
|
|
31
|
-
export { createAutoEmbedMiddleware, buildRAGContext } from './auto-embed.js';
|
|
32
|
-
export type { AutoEmbedOptions, RAGContext } from './auto-embed.js';
|
|
33
|
-
|
|
34
|
-
// New graph chunk types
|
|
35
|
-
export type { GraphChunkType } from './types.js';
|
|
36
|
-
|
|
37
|
-
// Chunker
|
|
38
|
-
export {
|
|
39
|
-
chunkIssue,
|
|
40
|
-
chunkMilestone,
|
|
41
|
-
chunkDecision,
|
|
42
|
-
chunkMarkdown,
|
|
43
|
-
chunkCodeEntities,
|
|
44
|
-
chunkDocComments,
|
|
45
|
-
chunkSummary,
|
|
46
|
-
chunkFile,
|
|
47
|
-
slidingWindow,
|
|
48
|
-
} from './chunker.js';
|