baselineos 0.2.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +17 -0
  2. package/README.md +198 -0
  3. package/dist/__evals__/runner.d.ts +2 -0
  4. package/dist/__evals__/runner.js +14687 -0
  5. package/dist/__evals__/runner.js.map +1 -0
  6. package/dist/api/server.d.ts +21 -0
  7. package/dist/api/server.js +1007 -0
  8. package/dist/api/server.js.map +1 -0
  9. package/dist/cli/bin.d.ts +1 -0
  10. package/dist/cli/bin.js +8427 -0
  11. package/dist/cli/bin.js.map +1 -0
  12. package/dist/core/agent-bus.d.ts +110 -0
  13. package/dist/core/agent-bus.js +242 -0
  14. package/dist/core/agent-bus.js.map +1 -0
  15. package/dist/core/cache.d.ts +66 -0
  16. package/dist/core/cache.js +160 -0
  17. package/dist/core/cache.js.map +1 -0
  18. package/dist/core/config.d.ts +1002 -0
  19. package/dist/core/config.js +429 -0
  20. package/dist/core/config.js.map +1 -0
  21. package/dist/core/indexer.d.ts +152 -0
  22. package/dist/core/indexer.js +481 -0
  23. package/dist/core/indexer.js.map +1 -0
  24. package/dist/core/llm-tracer.d.ts +2 -0
  25. package/dist/core/llm-tracer.js +241 -0
  26. package/dist/core/llm-tracer.js.map +1 -0
  27. package/dist/core/memory.d.ts +86 -0
  28. package/dist/core/memory.js +346 -0
  29. package/dist/core/memory.js.map +1 -0
  30. package/dist/core/opa-client.d.ts +51 -0
  31. package/dist/core/opa-client.js +157 -0
  32. package/dist/core/opa-client.js.map +1 -0
  33. package/dist/core/opa-policy-gate.d.ts +133 -0
  34. package/dist/core/opa-policy-gate.js +454 -0
  35. package/dist/core/opa-policy-gate.js.map +1 -0
  36. package/dist/core/orchestrator.d.ts +14 -0
  37. package/dist/core/orchestrator.js +1297 -0
  38. package/dist/core/orchestrator.js.map +1 -0
  39. package/dist/core/pii-detector.d.ts +82 -0
  40. package/dist/core/pii-detector.js +126 -0
  41. package/dist/core/pii-detector.js.map +1 -0
  42. package/dist/core/rag-engine.d.ts +121 -0
  43. package/dist/core/rag-engine.js +504 -0
  44. package/dist/core/rag-engine.js.map +1 -0
  45. package/dist/core/task-queue.d.ts +69 -0
  46. package/dist/core/task-queue.js +124 -0
  47. package/dist/core/task-queue.js.map +1 -0
  48. package/dist/core/telemetry.d.ts +56 -0
  49. package/dist/core/telemetry.js +94 -0
  50. package/dist/core/telemetry.js.map +1 -0
  51. package/dist/core/types.d.ts +328 -0
  52. package/dist/core/types.js +24 -0
  53. package/dist/core/types.js.map +1 -0
  54. package/dist/index.d.ts +21 -0
  55. package/dist/index.js +12444 -0
  56. package/dist/index.js.map +1 -0
  57. package/dist/llm-tracer-CIIujuO-.d.ts +493 -0
  58. package/dist/mcp/server.d.ts +2651 -0
  59. package/dist/mcp/server.js +676 -0
  60. package/dist/mcp/server.js.map +1 -0
  61. package/dist/orchestrator-DF89k_AK.d.ts +506 -0
  62. package/package.json +157 -0
  63. package/templates/README.md +7 -0
  64. package/templates/baseline.config.ts +207 -0
@@ -0,0 +1,121 @@
1
+ import { KnowledgeIndexer } from './indexer.js';
2
+
3
+ /**
4
+ * BaselineOS RAG Engine — Hybrid Retrieval (BM25 + Vector)
5
+ *
6
+ * Combines keyword search (MiniSearch/BM25) with optional vector search
7
+ * (ChromaDB + OpenAI embeddings) for high-quality context retrieval.
8
+ *
9
+ * 6 Collections:
10
+ * codebase — TypeScript/Python source files
11
+ * protocols — Protocol specs, READMEs, architecture docs
12
+ * decisions — ADRs, design decisions
13
+ * errors — Error patterns, debugging context
14
+ * compliance — Regulatory frameworks, audit evidence
15
+ * domain — GTCX domain knowledge, CLAUDE.md files
16
+ *
17
+ * @license Apache-2.0
18
+ */
19
+
20
+ type CollectionName = 'codebase' | 'protocols' | 'decisions' | 'errors' | 'compliance' | 'domain';
21
+ interface RAGConfig {
22
+ projectRoot: string;
23
+ /** OpenAI API key for embeddings (optional — BM25 works without it) */
24
+ openaiApiKey?: string;
25
+ /** ChromaDB host (optional — BM25 works without it) */
26
+ chromaHost?: string;
27
+ chromaPort?: number;
28
+ /** Max chunks per collection */
29
+ maxChunksPerCollection?: number;
30
+ /** Chunk size in characters */
31
+ chunkSize?: number;
32
+ /** Chunk overlap in characters */
33
+ chunkOverlap?: number;
34
+ /** BM25 result limit */
35
+ bm25Limit?: number;
36
+ /** Vector result limit */
37
+ vectorLimit?: number;
38
+ /** Minimum relevance score (0-1) */
39
+ minRelevance?: number;
40
+ }
41
+ interface RAGChunk {
42
+ id: string;
43
+ content: string;
44
+ collection: CollectionName;
45
+ source: string;
46
+ repo: string;
47
+ /** Section within the file */
48
+ section?: string;
49
+ /** Content hash for dedup */
50
+ hash: string;
51
+ /** Token estimate (content.length / 4) */
52
+ tokenEstimate: number;
53
+ metadata: Record<string, string>;
54
+ }
55
+ interface RAGResult {
56
+ chunks: Array<{
57
+ chunk: RAGChunk;
58
+ score: number;
59
+ matchType: 'bm25' | 'vector' | 'hybrid';
60
+ }>;
61
+ query: string;
62
+ totalChunks: number;
63
+ bm25Hits: number;
64
+ vectorHits: number;
65
+ duration: number;
66
+ }
67
+ interface IngestionResult {
68
+ totalFiles: number;
69
+ totalChunks: number;
70
+ collections: Record<CollectionName, number>;
71
+ duration: number;
72
+ errors: string[];
73
+ }
74
+ declare class RAGEngine {
75
+ private config;
76
+ private chunks;
77
+ private searchIndex;
78
+ private initialized;
79
+ private chromaCollections;
80
+ private chromaAvailable;
81
+ constructor(config: RAGConfig);
82
+ ingest(knowledgePaths?: string[]): Promise<IngestionResult>;
83
+ search(query: string, options?: {
84
+ collections?: CollectionName[];
85
+ repo?: string;
86
+ limit?: number;
87
+ minScore?: number;
88
+ }): Promise<RAGResult>;
89
+ /**
90
+ * Get context for a subject using RAG + knowledge indexer.
91
+ * This is the primary retrieval method — combines structured
92
+ * context from the indexer with chunk-level RAG results.
93
+ */
94
+ getContext(query: string, indexer?: KnowledgeIndexer, options?: {
95
+ collections?: CollectionName[];
96
+ repo?: string;
97
+ maxTokens?: number;
98
+ }): Promise<{
99
+ content: string;
100
+ sources: string[];
101
+ tokenEstimate: number;
102
+ }>;
103
+ private initChroma;
104
+ private vectorSearch;
105
+ private bm25Search;
106
+ private rebuildSearchIndex;
107
+ private resolveFiles;
108
+ private walkGlob;
109
+ private walkDir;
110
+ private matchesFilePattern;
111
+ private extractRepo;
112
+ private extractSection;
113
+ getStats(): {
114
+ totalChunks: number;
115
+ collections: Record<CollectionName, number>;
116
+ initialized: boolean;
117
+ vectorStoreAvailable: boolean;
118
+ };
119
+ }
120
+
121
+ export { type CollectionName, type IngestionResult, type RAGChunk, type RAGConfig, RAGEngine, type RAGResult };
@@ -0,0 +1,504 @@
1
+ import { readFileSync, existsSync, readdirSync, statSync } from 'fs';
2
+ import { relative, extname, basename, join } from 'path';
3
+ import { createHash } from 'crypto';
4
+ import MiniSearch from 'minisearch';
5
+
6
+ // src/core/rag-engine.ts
7
+ var COLLECTION_RULES = [
8
+ {
9
+ collection: "decisions",
10
+ pathPatterns: [/decisions\//, /ADR-/, /adr-/],
11
+ extensions: [".md"]
12
+ },
13
+ {
14
+ collection: "protocols",
15
+ pathPatterns: [/3-protocols\//, /SPEC\.md$/, /protocol/i],
16
+ extensions: [".md"]
17
+ },
18
+ {
19
+ collection: "compliance",
20
+ pathPatterns: [/compliance/, /regulatory/, /fatf/i, /esg/i, /10-compliance/],
21
+ extensions: [".md", ".json"]
22
+ },
23
+ {
24
+ collection: "errors",
25
+ pathPatterns: [/error/, /debug/, /troubleshoot/],
26
+ extensions: [".md", ".ts", ".json"]
27
+ },
28
+ {
29
+ collection: "codebase",
30
+ pathPatterns: [/src\//, /lib\//, /packages\//],
31
+ extensions: [".ts", ".tsx", ".js", ".py"]
32
+ },
33
+ {
34
+ collection: "domain",
35
+ pathPatterns: [/CLAUDE\.md$/, /_cannon\//, /_sop\//, /README\.md$/],
36
+ extensions: [".md"]
37
+ }
38
+ ];
39
+ function classifyFile(filePath) {
40
+ for (const rule of COLLECTION_RULES) {
41
+ const ext2 = extname(filePath);
42
+ if (!rule.extensions.includes(ext2)) continue;
43
+ for (const pattern of rule.pathPatterns) {
44
+ if (pattern.test(filePath)) return rule.collection;
45
+ }
46
+ }
47
+ const ext = extname(filePath);
48
+ if ([".ts", ".tsx", ".js", ".py"].includes(ext)) return "codebase";
49
+ return "domain";
50
+ }
51
+ function chunkContent(content, chunkSize, overlap) {
52
+ const chunks = [];
53
+ const sections = content.split(/(?=^##\s)/m);
54
+ if (sections.length > 1) {
55
+ let current = "";
56
+ for (const section of sections) {
57
+ if (current.length + section.length > chunkSize && current.length > 0) {
58
+ chunks.push(current.trim());
59
+ current = current.slice(-overlap) + section;
60
+ } else {
61
+ current += section;
62
+ }
63
+ }
64
+ if (current.trim()) chunks.push(current.trim());
65
+ } else {
66
+ for (let i = 0; i < content.length; i += chunkSize - overlap) {
67
+ const chunk = content.slice(i, i + chunkSize);
68
+ if (chunk.trim()) chunks.push(chunk.trim());
69
+ }
70
+ }
71
+ return chunks.length > 0 ? chunks : [content.trim()];
72
+ }
73
+ var RAGEngine = class {
74
+ config;
75
+ chunks = /* @__PURE__ */ new Map();
76
+ searchIndex;
77
+ initialized = false;
78
+ chromaCollections = /* @__PURE__ */ new Map();
79
+ chromaAvailable = false;
80
+ constructor(config) {
81
+ this.config = {
82
+ projectRoot: config.projectRoot,
83
+ openaiApiKey: config.openaiApiKey ?? process.env.OPENAI_API_KEY ?? "",
84
+ chromaHost: config.chromaHost ?? "localhost",
85
+ chromaPort: config.chromaPort ?? 8e3,
86
+ maxChunksPerCollection: config.maxChunksPerCollection ?? 1e3,
87
+ chunkSize: config.chunkSize ?? 1500,
88
+ chunkOverlap: config.chunkOverlap ?? 200,
89
+ bm25Limit: config.bm25Limit ?? 10,
90
+ vectorLimit: config.vectorLimit ?? 5,
91
+ minRelevance: config.minRelevance ?? 0.3
92
+ };
93
+ this.searchIndex = new MiniSearch({
94
+ fields: ["content", "source", "section", "repo"],
95
+ storeFields: ["id", "collection", "source", "repo", "section"],
96
+ searchOptions: {
97
+ boost: { content: 2, section: 1.5, source: 1 },
98
+ fuzzy: 0.2,
99
+ prefix: true
100
+ }
101
+ });
102
+ }
103
+ // ─── Ingestion ────────────────────────────────────────────────────────────
104
+ async ingest(knowledgePaths) {
105
+ const start = Date.now();
106
+ const errors = [];
107
+ const collectionCounts = {
108
+ codebase: 0,
109
+ protocols: 0,
110
+ decisions: 0,
111
+ errors: 0,
112
+ compliance: 0,
113
+ domain: 0
114
+ };
115
+ const paths = knowledgePaths ?? [
116
+ "**/_sop/.gtcx/decisions/*.md",
117
+ "**/CLAUDE.md",
118
+ "**/_cannon/*.md",
119
+ "3-protocols/**/SPEC.md",
120
+ "3-protocols/**/README.md",
121
+ "6-platforms/**/SPEC.md",
122
+ "6-platforms/**/README.md",
123
+ "5-intelligence/**/README.md",
124
+ "2-core/packages/*/src/**/*.ts",
125
+ "compliance-os/**/*.md",
126
+ "ai-1-agile/_sop/**/*.md"
127
+ ];
128
+ const files = this.resolveFiles(paths);
129
+ let totalChunks = 0;
130
+ for (const filePath of files) {
131
+ try {
132
+ const content = readFileSync(filePath, "utf-8");
133
+ if (!content.trim()) continue;
134
+ const relPath = relative(this.config.projectRoot, filePath);
135
+ const collection = classifyFile(relPath);
136
+ const repo = this.extractRepo(relPath);
137
+ if (collectionCounts[collection] >= this.config.maxChunksPerCollection) continue;
138
+ const fileChunks = chunkContent(
139
+ content,
140
+ this.config.chunkSize,
141
+ this.config.chunkOverlap
142
+ );
143
+ for (let i = 0; i < fileChunks.length; i++) {
144
+ const chunkContent2 = fileChunks[i];
145
+ const hash = createHash("sha256").update(chunkContent2).digest("hex").slice(0, 16);
146
+ const id = `${collection}:${hash}`;
147
+ if (this.chunks.has(id)) continue;
148
+ const chunk = {
149
+ id,
150
+ content: chunkContent2,
151
+ collection,
152
+ source: relPath,
153
+ repo,
154
+ section: this.extractSection(chunkContent2),
155
+ hash,
156
+ tokenEstimate: Math.ceil(chunkContent2.length / 4),
157
+ metadata: {
158
+ file: basename(filePath),
159
+ ext: extname(filePath),
160
+ chunkIndex: String(i),
161
+ totalChunks: String(fileChunks.length)
162
+ }
163
+ };
164
+ this.chunks.set(id, chunk);
165
+ collectionCounts[collection]++;
166
+ totalChunks++;
167
+ }
168
+ } catch (err) {
169
+ const msg = err instanceof Error ? err.message : String(err);
170
+ errors.push(`${filePath}: ${msg}`);
171
+ }
172
+ }
173
+ this.rebuildSearchIndex();
174
+ await this.initChroma(errors);
175
+ this.initialized = true;
176
+ return {
177
+ totalFiles: files.length,
178
+ totalChunks,
179
+ collections: collectionCounts,
180
+ duration: Date.now() - start,
181
+ errors
182
+ };
183
+ }
184
+ // ─── Hybrid Search ────────────────────────────────────────────────────────
185
+ async search(query, options) {
186
+ const start = Date.now();
187
+ const limit = options?.limit ?? this.config.bm25Limit;
188
+ const minScore = options?.minScore ?? this.config.minRelevance;
189
+ const bm25Results = this.bm25Search(query, {
190
+ collections: options?.collections,
191
+ repo: options?.repo,
192
+ limit
193
+ });
194
+ const vectorResults = await this.vectorSearch(query, {
195
+ collections: options?.collections,
196
+ repo: options?.repo,
197
+ limit: this.config.vectorLimit
198
+ });
199
+ const merged = /* @__PURE__ */ new Map();
200
+ for (let i = 0; i < bm25Results.length; i++) {
201
+ const r = bm25Results[i];
202
+ if (r.score < minScore) continue;
203
+ const rrfScore = 1 / (60 + i);
204
+ merged.set(r.chunk.id, { chunk: r.chunk, score: rrfScore, matchType: "bm25" });
205
+ }
206
+ for (let i = 0; i < vectorResults.length; i++) {
207
+ const r = vectorResults[i];
208
+ const rrfScore = 1 / (60 + i);
209
+ const existing = merged.get(r.chunk.id);
210
+ if (existing) {
211
+ existing.score += rrfScore;
212
+ existing.matchType = "hybrid";
213
+ } else {
214
+ merged.set(r.chunk.id, { chunk: r.chunk, score: rrfScore, matchType: "vector" });
215
+ }
216
+ }
217
+ const sorted = Array.from(merged.values()).sort((a, b) => b.score - a.score);
218
+ return {
219
+ chunks: sorted.slice(0, limit),
220
+ query,
221
+ totalChunks: this.chunks.size,
222
+ bm25Hits: bm25Results.length,
223
+ vectorHits: vectorResults.length,
224
+ duration: Date.now() - start
225
+ };
226
+ }
227
+ /**
228
+ * Get context for a subject using RAG + knowledge indexer.
229
+ * This is the primary retrieval method — combines structured
230
+ * context from the indexer with chunk-level RAG results.
231
+ */
232
+ async getContext(query, indexer, options) {
233
+ const maxTokens = options?.maxTokens ?? 4e3;
234
+ const ragResult = await this.search(query, {
235
+ collections: options?.collections,
236
+ repo: options?.repo,
237
+ limit: 15
238
+ });
239
+ const parts = [];
240
+ const sources = [];
241
+ let tokenCount = 0;
242
+ for (const { chunk, score } of ragResult.chunks) {
243
+ if (tokenCount + chunk.tokenEstimate > maxTokens) break;
244
+ parts.push(`<!-- source: ${chunk.source} (${chunk.collection}, score: ${score.toFixed(2)}) -->`);
245
+ parts.push(chunk.content);
246
+ parts.push("");
247
+ if (!sources.includes(chunk.source)) {
248
+ sources.push(chunk.source);
249
+ }
250
+ tokenCount += chunk.tokenEstimate;
251
+ }
252
+ return {
253
+ content: parts.join("\n"),
254
+ sources,
255
+ tokenEstimate: tokenCount
256
+ };
257
+ }
258
+ // ─── ChromaDB Initialization ─────────────────────────────────────────────
259
+ async initChroma(errors) {
260
+ if (!this.config.openaiApiKey) return;
261
+ let chromadb;
262
+ try {
263
+ chromadb = await import('chromadb');
264
+ } catch {
265
+ return;
266
+ }
267
+ try {
268
+ const ChromaClientCtor = chromadb.ChromaClient;
269
+ const client = new ChromaClientCtor({
270
+ path: `http://${this.config.chromaHost}:${this.config.chromaPort}`
271
+ });
272
+ const collectionNames = [
273
+ "codebase",
274
+ "protocols",
275
+ "decisions",
276
+ "errors",
277
+ "compliance",
278
+ "domain"
279
+ ];
280
+ for (const name of collectionNames) {
281
+ const collection = await client.getOrCreateCollection({
282
+ name: `baseline-${name}`,
283
+ metadata: { source: "baselineos-rag", collection: name }
284
+ });
285
+ this.chromaCollections.set(name, collection);
286
+ }
287
+ for (const name of collectionNames) {
288
+ const collection = this.chromaCollections.get(name);
289
+ if (!collection) continue;
290
+ const collectionChunks = Array.from(this.chunks.values()).filter(
291
+ (c) => c.collection === name
292
+ );
293
+ if (collectionChunks.length === 0) continue;
294
+ const existingCount = await collection.count();
295
+ if (existingCount >= collectionChunks.length) continue;
296
+ const batchSize = 100;
297
+ for (let i = 0; i < collectionChunks.length; i += batchSize) {
298
+ const batch = collectionChunks.slice(i, i + batchSize);
299
+ await collection.add({
300
+ ids: batch.map((c) => c.id),
301
+ documents: batch.map((c) => c.content),
302
+ metadatas: batch.map((c) => ({
303
+ source: c.source,
304
+ repo: c.repo,
305
+ section: c.section ?? "",
306
+ hash: c.hash,
307
+ collection: c.collection
308
+ }))
309
+ });
310
+ }
311
+ }
312
+ this.chromaAvailable = true;
313
+ } catch (err) {
314
+ const msg = err instanceof Error ? err.message : String(err);
315
+ errors.push(`ChromaDB init failed (BM25-only mode): ${msg}`);
316
+ }
317
+ }
318
+ // ─── Vector Search ──────────────────────────────────────────────────────
319
+ async vectorSearch(query, options) {
320
+ if (!this.chromaAvailable || !this.config.openaiApiKey) return [];
321
+ const limit = options?.limit ?? this.config.vectorLimit;
322
+ const targetCollections = options?.collections ?? [
323
+ "codebase",
324
+ "protocols",
325
+ "decisions",
326
+ "errors",
327
+ "compliance",
328
+ "domain"
329
+ ];
330
+ const results = [];
331
+ for (const collName of targetCollections) {
332
+ const collection = this.chromaCollections.get(collName);
333
+ if (!collection) continue;
334
+ try {
335
+ const where = options?.repo ? { repo: options.repo } : void 0;
336
+ const queryResult = await collection.query({
337
+ queryTexts: [query],
338
+ nResults: limit,
339
+ where,
340
+ include: ["documents", "metadatas", "distances"]
341
+ });
342
+ const ids = queryResult.ids[0] ?? [];
343
+ const distances = queryResult.distances[0] ?? [];
344
+ for (let i = 0; i < ids.length; i++) {
345
+ const chunkId = ids[i];
346
+ const chunk = this.chunks.get(chunkId);
347
+ if (!chunk) continue;
348
+ const distance = distances[i] ?? 1;
349
+ const score = 1 / (1 + distance);
350
+ results.push({ chunk, score, matchType: "vector" });
351
+ }
352
+ } catch (err) {
353
+ process.stderr.write(`[baseline:rag] Vector query failed: ${err.message}
354
+ `);
355
+ }
356
+ }
357
+ return results.sort((a, b) => b.score - a.score).slice(0, limit);
358
+ }
359
+ // ─── BM25 Search ─────────────────────────────────────────────────────────
360
+ bm25Search(query, options) {
361
+ if (!this.initialized || this.chunks.size === 0) return [];
362
+ const limit = options?.limit ?? this.config.bm25Limit;
363
+ let results = this.searchIndex.search(query).slice(0, limit * 3);
364
+ if (options?.collections?.length) {
365
+ results = results.filter(
366
+ (r) => options.collections.includes(r.collection)
367
+ );
368
+ }
369
+ if (options?.repo) {
370
+ results = results.filter((r) => r.repo === options.repo);
371
+ }
372
+ return results.slice(0, limit).map((r) => ({
373
+ chunk: this.chunks.get(r.id),
374
+ score: r.score / (results[0]?.score ?? 1),
375
+ // Normalize to 0-1
376
+ matchType: "bm25"
377
+ })).filter((r) => r.chunk != null);
378
+ }
379
+ // ─── Index Management ─────────────────────────────────────────────────────
380
+ rebuildSearchIndex() {
381
+ this.searchIndex = new MiniSearch({
382
+ fields: ["content", "source", "section", "repo"],
383
+ storeFields: ["id", "collection", "source", "repo", "section"],
384
+ searchOptions: {
385
+ boost: { content: 2, section: 1.5, source: 1 },
386
+ fuzzy: 0.2,
387
+ prefix: true
388
+ }
389
+ });
390
+ const docs = Array.from(this.chunks.values()).map((chunk) => ({
391
+ id: chunk.id,
392
+ content: chunk.content,
393
+ source: chunk.source,
394
+ section: chunk.section ?? "",
395
+ repo: chunk.repo,
396
+ collection: chunk.collection
397
+ }));
398
+ this.searchIndex.addAll(docs);
399
+ }
400
+ // ─── File Resolution ──────────────────────────────────────────────────────
401
+ resolveFiles(patterns) {
402
+ const files = /* @__PURE__ */ new Set();
403
+ for (const pattern of patterns) {
404
+ if (pattern.includes("**")) {
405
+ this.walkGlob(this.config.projectRoot, pattern, files);
406
+ } else {
407
+ const fullPath = join(this.config.projectRoot, pattern);
408
+ if (existsSync(fullPath)) {
409
+ files.add(fullPath);
410
+ }
411
+ }
412
+ }
413
+ return Array.from(files);
414
+ }
415
+ walkGlob(root, pattern, results) {
416
+ const parts = pattern.split("**/");
417
+ const prefix = parts[0] ?? "";
418
+ const suffix = parts.slice(1).join("**/");
419
+ const startDir = prefix ? join(root, prefix) : root;
420
+ if (!existsSync(startDir)) return;
421
+ this.walkDir(startDir, suffix, results);
422
+ }
423
+ walkDir(dir, filePattern, results) {
424
+ try {
425
+ const entries = readdirSync(dir);
426
+ for (const entry of entries) {
427
+ if (entry.startsWith(".") || entry === "node_modules" || entry === "dist" || entry === "coverage") {
428
+ continue;
429
+ }
430
+ const fullPath = join(dir, entry);
431
+ try {
432
+ const stat = statSync(fullPath);
433
+ if (stat.isDirectory()) {
434
+ this.walkDir(fullPath, filePattern, results);
435
+ } else if (stat.isFile() && this.matchesFilePattern(entry, fullPath, filePattern)) {
436
+ results.add(fullPath);
437
+ }
438
+ } catch {
439
+ }
440
+ }
441
+ } catch {
442
+ }
443
+ }
444
+ matchesFilePattern(filename, fullPath, pattern) {
445
+ if (!pattern) return true;
446
+ if (pattern.startsWith("*")) {
447
+ const ext = pattern.slice(1);
448
+ return filename.endsWith(ext);
449
+ }
450
+ if (!pattern.includes("/") && !pattern.includes("*")) {
451
+ return filename === pattern;
452
+ }
453
+ return fullPath.endsWith(pattern.replace(/\*/g, ""));
454
+ }
455
+ // ─── Helpers ──────────────────────────────────────────────────────────────
456
+ extractRepo(relPath) {
457
+ const parts = relPath.split("/");
458
+ return parts[0] ?? "unknown";
459
+ }
460
+ extractSection(content) {
461
+ const match = content.match(/^#+\s+(.+)/m);
462
+ return match?.[1]?.trim();
463
+ }
464
+ // ─── Stats ────────────────────────────────────────────────────────────────
465
+ getStats() {
466
+ const collections = {
467
+ codebase: 0,
468
+ protocols: 0,
469
+ decisions: 0,
470
+ errors: 0,
471
+ compliance: 0,
472
+ domain: 0
473
+ };
474
+ for (const chunk of this.chunks.values()) {
475
+ collections[chunk.collection]++;
476
+ }
477
+ return {
478
+ totalChunks: this.chunks.size,
479
+ collections,
480
+ initialized: this.initialized,
481
+ vectorStoreAvailable: this.chromaAvailable
482
+ };
483
+ }
484
+ };
485
+ /**
486
+ * BaselineOS RAG Engine — Hybrid Retrieval (BM25 + Vector)
487
+ *
488
+ * Combines keyword search (MiniSearch/BM25) with optional vector search
489
+ * (ChromaDB + OpenAI embeddings) for high-quality context retrieval.
490
+ *
491
+ * 6 Collections:
492
+ * codebase — TypeScript/Python source files
493
+ * protocols — Protocol specs, READMEs, architecture docs
494
+ * decisions — ADRs, design decisions
495
+ * errors — Error patterns, debugging context
496
+ * compliance — Regulatory frameworks, audit evidence
497
+ * domain — GTCX domain knowledge, CLAUDE.md files
498
+ *
499
+ * @license Apache-2.0
500
+ */
501
+
502
+ export { RAGEngine };
503
+ //# sourceMappingURL=rag-engine.js.map
504
+ //# sourceMappingURL=rag-engine.js.map