@itkoren/sqmd 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +1052 -0
- package/dist/api/app.d.ts +14 -0
- package/dist/api/app.d.ts.map +1 -0
- package/dist/api/app.js +32 -0
- package/dist/api/app.js.map +1 -0
- package/dist/api/middleware.d.ts +5 -0
- package/dist/api/middleware.d.ts.map +1 -0
- package/dist/api/middleware.js +37 -0
- package/dist/api/middleware.js.map +1 -0
- package/dist/api/models.d.ts +178 -0
- package/dist/api/models.d.ts.map +1 -0
- package/dist/api/models.js +39 -0
- package/dist/api/models.js.map +1 -0
- package/dist/api/routes/documents.d.ts +4 -0
- package/dist/api/routes/documents.d.ts.map +1 -0
- package/dist/api/routes/documents.js +92 -0
- package/dist/api/routes/documents.js.map +1 -0
- package/dist/api/routes/health.d.ts +6 -0
- package/dist/api/routes/health.d.ts.map +1 -0
- package/dist/api/routes/health.js +38 -0
- package/dist/api/routes/health.js.map +1 -0
- package/dist/api/routes/index.d.ts +5 -0
- package/dist/api/routes/index.d.ts.map +1 -0
- package/dist/api/routes/index.js +83 -0
- package/dist/api/routes/index.js.map +1 -0
- package/dist/api/routes/search.d.ts +6 -0
- package/dist/api/routes/search.d.ts.map +1 -0
- package/dist/api/routes/search.js +104 -0
- package/dist/api/routes/search.js.map +1 -0
- package/dist/config/loader.d.ts +4 -0
- package/dist/config/loader.d.ts.map +1 -0
- package/dist/config/loader.js +144 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/schema.d.ts +298 -0
- package/dist/config/schema.d.ts.map +1 -0
- package/dist/config/schema.js +50 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/embeddings/ollama.d.ts +14 -0
- package/dist/embeddings/ollama.d.ts.map +1 -0
- package/dist/embeddings/ollama.js +46 -0
- package/dist/embeddings/ollama.js.map +1 -0
- package/dist/embeddings/transformers.d.ts +14 -0
- package/dist/embeddings/transformers.d.ts.map +1 -0
- package/dist/embeddings/transformers.js +64 -0
- package/dist/embeddings/transformers.js.map +1 -0
- package/dist/embeddings/types.d.ts +6 -0
- package/dist/embeddings/types.d.ts.map +1 -0
- package/dist/embeddings/types.js +2 -0
- package/dist/embeddings/types.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +233 -0
- package/dist/index.js.map +1 -0
- package/dist/ingestion/chunker.d.ts +21 -0
- package/dist/ingestion/chunker.d.ts.map +1 -0
- package/dist/ingestion/chunker.js +117 -0
- package/dist/ingestion/chunker.js.map +1 -0
- package/dist/ingestion/fingerprint.d.ts +6 -0
- package/dist/ingestion/fingerprint.d.ts.map +1 -0
- package/dist/ingestion/fingerprint.js +17 -0
- package/dist/ingestion/fingerprint.js.map +1 -0
- package/dist/ingestion/parser.d.ts +16 -0
- package/dist/ingestion/parser.d.ts.map +1 -0
- package/dist/ingestion/parser.js +98 -0
- package/dist/ingestion/parser.js.map +1 -0
- package/dist/ingestion/pipeline.d.ts +32 -0
- package/dist/ingestion/pipeline.d.ts.map +1 -0
- package/dist/ingestion/pipeline.js +191 -0
- package/dist/ingestion/pipeline.js.map +1 -0
- package/dist/ingestion/scanner.d.ts +2 -0
- package/dist/ingestion/scanner.d.ts.map +1 -0
- package/dist/ingestion/scanner.js +54 -0
- package/dist/ingestion/scanner.js.map +1 -0
- package/dist/mcp/server.d.ts +8 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +73 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools.d.ts +6 -0
- package/dist/mcp/tools.d.ts.map +1 -0
- package/dist/mcp/tools.js +276 -0
- package/dist/mcp/tools.js.map +1 -0
- package/dist/rag/context-builder.d.ts +3 -0
- package/dist/rag/context-builder.d.ts.map +1 -0
- package/dist/rag/context-builder.js +27 -0
- package/dist/rag/context-builder.js.map +1 -0
- package/dist/rag/prompt-templates.d.ts +5 -0
- package/dist/rag/prompt-templates.d.ts.map +1 -0
- package/dist/rag/prompt-templates.js +41 -0
- package/dist/rag/prompt-templates.js.map +1 -0
- package/dist/search/hybrid.d.ts +14 -0
- package/dist/search/hybrid.d.ts.map +1 -0
- package/dist/search/hybrid.js +58 -0
- package/dist/search/hybrid.js.map +1 -0
- package/dist/search/query.d.ts +4 -0
- package/dist/search/query.d.ts.map +1 -0
- package/dist/search/query.js +23 -0
- package/dist/search/query.js.map +1 -0
- package/dist/search/reranker.d.ts +11 -0
- package/dist/search/reranker.d.ts.map +1 -0
- package/dist/search/reranker.js +44 -0
- package/dist/search/reranker.js.map +1 -0
- package/dist/store/db.d.ts +11 -0
- package/dist/store/db.d.ts.map +1 -0
- package/dist/store/db.js +75 -0
- package/dist/store/db.js.map +1 -0
- package/dist/store/reader.d.ts +8 -0
- package/dist/store/reader.d.ts.map +1 -0
- package/dist/store/reader.js +122 -0
- package/dist/store/reader.js.map +1 -0
- package/dist/store/schema.d.ts +39 -0
- package/dist/store/schema.d.ts.map +1 -0
- package/dist/store/schema.js +33 -0
- package/dist/store/schema.js.map +1 -0
- package/dist/store/writer.d.ts +6 -0
- package/dist/store/writer.d.ts.map +1 -0
- package/dist/store/writer.js +43 -0
- package/dist/store/writer.js.map +1 -0
- package/dist/watcher/daemon.d.ts +5 -0
- package/dist/watcher/daemon.d.ts.map +1 -0
- package/dist/watcher/daemon.js +43 -0
- package/dist/watcher/daemon.js.map +1 -0
- package/dist/watcher/handler.d.ts +14 -0
- package/dist/watcher/handler.d.ts.map +1 -0
- package/dist/watcher/handler.js +82 -0
- package/dist/watcher/handler.js.map +1 -0
- package/package.json +56 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
export class OllamaEmbedder {
|
|
2
|
+
modelName;
|
|
3
|
+
vectorDim = 768;
|
|
4
|
+
baseUrl;
|
|
5
|
+
constructor(modelName, baseUrl) {
|
|
6
|
+
this.modelName = modelName;
|
|
7
|
+
this.baseUrl = baseUrl.replace(/\/$/, '');
|
|
8
|
+
}
|
|
9
|
+
async embed(texts) {
|
|
10
|
+
const results = [];
|
|
11
|
+
for (const text of texts) {
|
|
12
|
+
const embedding = await this.embedSingle(text);
|
|
13
|
+
results.push(embedding);
|
|
14
|
+
}
|
|
15
|
+
return results;
|
|
16
|
+
}
|
|
17
|
+
async embedSingle(text) {
|
|
18
|
+
const response = await fetch(`${this.baseUrl}/api/embeddings`, {
|
|
19
|
+
method: 'POST',
|
|
20
|
+
headers: {
|
|
21
|
+
'Content-Type': 'application/json',
|
|
22
|
+
},
|
|
23
|
+
body: JSON.stringify({
|
|
24
|
+
model: this.modelName,
|
|
25
|
+
prompt: text,
|
|
26
|
+
}),
|
|
27
|
+
});
|
|
28
|
+
if (!response.ok) {
|
|
29
|
+
throw new Error(`Ollama embeddings API error: ${response.status} ${response.statusText}`);
|
|
30
|
+
}
|
|
31
|
+
const data = (await response.json());
|
|
32
|
+
if (!data.embedding || !Array.isArray(data.embedding)) {
|
|
33
|
+
throw new Error('Invalid response from Ollama embeddings API: missing embedding field');
|
|
34
|
+
}
|
|
35
|
+
return data.embedding;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
export function createEmbedder(backend, modelName, options = {}) {
|
|
39
|
+
if (backend === 'ollama') {
|
|
40
|
+
return new OllamaEmbedder(modelName, options.ollamaBaseUrl ?? 'http://localhost:11434');
|
|
41
|
+
}
|
|
42
|
+
// Default to transformers
|
|
43
|
+
const { TransformersEmbedder } = require('./transformers.js');
|
|
44
|
+
return new TransformersEmbedder(modelName, options.cacheDir ?? '~/.sqmd/models');
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=ollama.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ollama.js","sourceRoot":"","sources":["../../src/embeddings/ollama.ts"],"names":[],"mappings":"AAMA,MAAM,OAAO,cAAc;IAChB,SAAS,CAAS;IAClB,SAAS,GAAG,GAAG,CAAC;IACR,OAAO,CAAS;IAEjC,YAAY,SAAiB,EAAE,OAAe;QAC5C,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,KAAe;QACzB,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC/C,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC1B,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,IAAY;QACpC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,iBAAiB,EAAE;YAC7D,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,IAAI,CAAC,SAAS;gBACrB,MAAM,EAAE,IAAI;aACb,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,gCAAgC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QAC5F,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA4B,CAAC;QAEhE,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,sEAAsE,CAAC,CAAC;QAC1F,CAAC;QAED,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;CACF;AAED,MAAM,UAAU,cAAc,CAC5B,OAAkC,EAClC,SAAiB,EACjB,UAAyD,EAAE;IAE3D,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzB,OAAO,IAAI,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,aAAa,IAAI,wBAAwB,CAAC,CAAC;IAC1F,CAAC;IAED,0BAA0B;IAC1B,MAAM,EAAE,oBAAoB,EAAE,GAAG,OAAO,CAAC,mBAAmB,CAE3D,CAAC;IACF,OAAO,IAAI,oBAAoB,CAAC,SAAS,EAAE,OAAO,CAAC,QAAQ,IAAI,gBAAgB,CAAC,CAAC;AACnF,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { Embedder } from './types.js';
|
|
2
|
+
export declare class TransformersEmbedder implements Embedder {
|
|
3
|
+
readonly modelName: string;
|
|
4
|
+
readonly vectorDim = 768;
|
|
5
|
+
private readonly cacheDir;
|
|
6
|
+
private pipeline;
|
|
7
|
+
constructor(modelName: string, cacheDir: string);
|
|
8
|
+
private loadPipeline;
|
|
9
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
10
|
+
embedQuery(query: string): Promise<number[]>;
|
|
11
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
12
|
+
private runEmbedding;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=transformers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transformers.d.ts","sourceRoot":"","sources":["../../src/embeddings/transformers.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAI3C,qBAAa,oBAAqB,YAAW,QAAQ;IACnD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,OAAO;IACzB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAA6B;gBAEjC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM;YAKjC,YAAY;IAgBpB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKpD,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAM5C,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAKnC,YAAY;CAkC3B"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
export class TransformersEmbedder {
|
|
2
|
+
modelName;
|
|
3
|
+
vectorDim = 768;
|
|
4
|
+
cacheDir;
|
|
5
|
+
pipeline = null;
|
|
6
|
+
constructor(modelName, cacheDir) {
|
|
7
|
+
this.modelName = modelName;
|
|
8
|
+
this.cacheDir = cacheDir;
|
|
9
|
+
}
|
|
10
|
+
async loadPipeline() {
|
|
11
|
+
if (this.pipeline)
|
|
12
|
+
return this.pipeline;
|
|
13
|
+
// Dynamic import to avoid loading at startup
|
|
14
|
+
const { pipeline, env } = await import('@huggingface/transformers');
|
|
15
|
+
// Configure cache directory
|
|
16
|
+
env.cacheDir = this.cacheDir;
|
|
17
|
+
this.pipeline = (await pipeline('feature-extraction', this.modelName, {
|
|
18
|
+
dtype: 'fp32',
|
|
19
|
+
}));
|
|
20
|
+
return this.pipeline;
|
|
21
|
+
}
|
|
22
|
+
async embedDocuments(texts) {
|
|
23
|
+
const prefixed = texts.map((t) => `search_document: ${t}`);
|
|
24
|
+
return this.runEmbedding(prefixed);
|
|
25
|
+
}
|
|
26
|
+
async embedQuery(query) {
|
|
27
|
+
const prefixed = `search_query: ${query}`;
|
|
28
|
+
const results = await this.runEmbedding([prefixed]);
|
|
29
|
+
return results[0];
|
|
30
|
+
}
|
|
31
|
+
async embed(texts) {
|
|
32
|
+
// Default embed without prefix — callers that need prefixes use embedDocuments/embedQuery
|
|
33
|
+
return this.runEmbedding(texts);
|
|
34
|
+
}
|
|
35
|
+
async runEmbedding(texts) {
|
|
36
|
+
const pipe = await this.loadPipeline();
|
|
37
|
+
const output = await pipe(texts, { pooling: 'mean', normalize: true });
|
|
38
|
+
// Handle tensor output from transformers.js v3
|
|
39
|
+
if (output && typeof output === 'object' && 'tolist' in output) {
|
|
40
|
+
const list = output.tolist();
|
|
41
|
+
return list;
|
|
42
|
+
}
|
|
43
|
+
if (output && typeof output === 'object' && 'data' in output) {
|
|
44
|
+
const data = output.data;
|
|
45
|
+
const dims = output.dims;
|
|
46
|
+
const batchSize = dims[0] ?? texts.length;
|
|
47
|
+
const embDim = dims[1] ?? this.vectorDim;
|
|
48
|
+
const results = [];
|
|
49
|
+
for (let i = 0; i < batchSize; i++) {
|
|
50
|
+
const vec = [];
|
|
51
|
+
for (let j = 0; j < embDim; j++) {
|
|
52
|
+
vec.push(Number(data[i * embDim + j]));
|
|
53
|
+
}
|
|
54
|
+
results.push(vec);
|
|
55
|
+
}
|
|
56
|
+
return results;
|
|
57
|
+
}
|
|
58
|
+
if (Array.isArray(output)) {
|
|
59
|
+
return output.map((v) => Array.from(v));
|
|
60
|
+
}
|
|
61
|
+
throw new Error('Unexpected output format from transformers pipeline');
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=transformers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transformers.js","sourceRoot":"","sources":["../../src/embeddings/transformers.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,oBAAoB;IACtB,SAAS,CAAS;IAClB,SAAS,GAAG,GAAG,CAAC;IACR,QAAQ,CAAS;IAC1B,QAAQ,GAAwB,IAAI,CAAC;IAE7C,YAAY,SAAiB,EAAE,QAAgB;QAC7C,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;IAEO,KAAK,CAAC,YAAY;QACxB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC,QAAQ,CAAC;QAExC,6CAA6C;QAC7C,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;QAEpE,4BAA4B;QAC5B,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAE7B,IAAI,CAAC,QAAQ,GAAG,CAAC,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,SAAS,EAAE;YACpE,KAAK,EAAE,MAAM;SACd,CAAC,CAA4B,CAAC;QAE/B,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,KAAe;QAClC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC;QAC3D,OAAO,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAa;QAC5B,MAAM,QAAQ,GAAG,iBAAiB,KAAK,EAAE,CAAC;QAC1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;QACpD,OAAO,OAAO,CAAC,CAAC,CAAE,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,KAAe;QACzB,0FAA0F;QAC1F,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;IAClC,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,KAAe;QACxC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAEvC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAEvE,+CAA+C;QAC/C,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;YAC/D,MAAM,IAAI,GAAI,MAAmC,CAAC,MAAM,EAAE,CAAC;YAC3D,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,IAAI,MAAM,EAAE,CAAC;YAC7D,MAAM,IAAI,GAAI,MAA4D,CAAC,IAAI,CAAC;YAChF,MAAM,IAAI,GAAI,MAA4D,CAAC,IAAI,CAAC;YAChF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC;YAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC;YAEzC,MAAM,OAAO,GAAe,EAAE,CAAC;YAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnC,MAAM,GAAG,GAAa,EAAE,CAAC;gBACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAChC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,CAAC;gBACD,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACpB,CAAC;YACD,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAQ,MAAqB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1D,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,qDAAqD,CAAC,CAAC;IACzE,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/embeddings/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,QAAQ;IACvB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC5C,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/embeddings/types.ts"],"names":[],"mappings":""}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import * as os from 'node:os';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
import chalk from 'chalk';
|
|
5
|
+
import { Command } from 'commander';
|
|
6
|
+
import { loadConfig, writeDefaultConfig } from './config/loader.js';
|
|
7
|
+
import { OllamaEmbedder } from './embeddings/ollama.js';
|
|
8
|
+
import { TransformersEmbedder } from './embeddings/transformers.js';
|
|
9
|
+
import { IndexPipeline } from './ingestion/pipeline.js';
|
|
10
|
+
import { getChunksTable, getDb, getDbStats, getFilesTable } from './store/db.js';
|
|
11
|
+
import { getAllFiles } from './store/reader.js';
|
|
12
|
+
const program = new Command();
|
|
13
|
+
program
|
|
14
|
+
.name('sqmd')
|
|
15
|
+
.description('Local semantic search engine over Markdown files')
|
|
16
|
+
.version('0.1.0');
|
|
17
|
+
// ─── index command ───────────────────────────────────────────────────────────
|
|
18
|
+
program
|
|
19
|
+
.command('index')
|
|
20
|
+
.description('Index Markdown files for search')
|
|
21
|
+
.option('--path <path>', 'Path to directory or file to index')
|
|
22
|
+
.option('--force', 'Force re-indexing even if files are unchanged', false)
|
|
23
|
+
.option('--watch', 'Start file watcher after indexing', false)
|
|
24
|
+
.option('--config <path>', 'Path to config file')
|
|
25
|
+
.action(async (opts) => {
|
|
26
|
+
const config = loadConfig(opts.config);
|
|
27
|
+
const paths = opts.path ? [path.resolve(opts.path)] : config.paths.watch_dirs;
|
|
28
|
+
console.log(chalk.blue('Starting indexing...'));
|
|
29
|
+
console.log(chalk.gray(`Paths: ${paths.join(', ')}`));
|
|
30
|
+
const pipeline = new IndexPipeline(config);
|
|
31
|
+
const result = await pipeline.run({
|
|
32
|
+
paths,
|
|
33
|
+
force: opts.force,
|
|
34
|
+
onProgress: (event) => {
|
|
35
|
+
if (event.type === 'file_start') {
|
|
36
|
+
process.stdout.write(chalk.gray(` Processing: ${event.filePath}\r`));
|
|
37
|
+
}
|
|
38
|
+
else if (event.type === 'file_done') {
|
|
39
|
+
console.log(chalk.green(` ✓ ${event.filePath}`));
|
|
40
|
+
}
|
|
41
|
+
else if (event.type === 'file_skip') {
|
|
42
|
+
// Silent skip
|
|
43
|
+
}
|
|
44
|
+
else if (event.type === 'file_error') {
|
|
45
|
+
console.log(chalk.red(` ✗ ${event.filePath}`));
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
console.log('');
|
|
50
|
+
console.log(chalk.green('Done!'));
|
|
51
|
+
console.log(` Indexed: ${result.indexed}`);
|
|
52
|
+
console.log(` Skipped: ${result.skipped}`);
|
|
53
|
+
console.log(` Errors: ${result.errors.length}`);
|
|
54
|
+
if (result.errors.length > 0) {
|
|
55
|
+
console.log(chalk.red('\nErrors:'));
|
|
56
|
+
for (const e of result.errors) {
|
|
57
|
+
console.log(chalk.red(` ${e.filePath}: ${e.error}`));
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (opts.watch) {
|
|
61
|
+
const { startWatcher } = await import('./watcher/daemon.js');
|
|
62
|
+
const db = await getDb(config.paths.db_path);
|
|
63
|
+
startWatcher(config, db);
|
|
64
|
+
console.log(chalk.blue('\nWatcher started. Press Ctrl+C to stop.'));
|
|
65
|
+
// Keep process alive
|
|
66
|
+
process.on('SIGINT', () => {
|
|
67
|
+
console.log('\nStopping watcher...');
|
|
68
|
+
process.exit(0);
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
// ─── search command ──────────────────────────────────────────────────────────
|
|
73
|
+
program
|
|
74
|
+
.command('search <query>')
|
|
75
|
+
.description('Search indexed documents')
|
|
76
|
+
.option('--top-k <n>', 'Number of results', '10')
|
|
77
|
+
.option('--mode <mode>', 'Search mode: hybrid, vector, fts', 'hybrid')
|
|
78
|
+
.option('--filter <path>', 'Filter results to files matching path')
|
|
79
|
+
.option('--config <path>', 'Path to config file')
|
|
80
|
+
.action(async (query, opts) => {
|
|
81
|
+
const config = loadConfig(opts.config);
|
|
82
|
+
const db = await getDb(config.paths.db_path);
|
|
83
|
+
const chunksTable = await getChunksTable(db);
|
|
84
|
+
let embedder;
|
|
85
|
+
if (config.embeddings.backend === 'ollama') {
|
|
86
|
+
embedder = new OllamaEmbedder(config.embeddings.model, config.embeddings.ollama_base_url);
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
embedder = new TransformersEmbedder(config.embeddings.model, config.paths.model_cache_dir);
|
|
90
|
+
}
|
|
91
|
+
const { hybridSearch } = await import('./search/hybrid.js');
|
|
92
|
+
const topK = Number.parseInt(opts.topK, 10);
|
|
93
|
+
const mode = opts.mode;
|
|
94
|
+
console.log(chalk.blue(`Searching for: "${query}"`));
|
|
95
|
+
console.log(chalk.gray(`Mode: ${mode}, Top-K: ${topK}`));
|
|
96
|
+
console.log('');
|
|
97
|
+
const start = Date.now();
|
|
98
|
+
const results = await hybridSearch(chunksTable, embedder, {
|
|
99
|
+
query,
|
|
100
|
+
topK,
|
|
101
|
+
mode,
|
|
102
|
+
rrfK: config.search.rrf_k,
|
|
103
|
+
filterPath: opts.filter,
|
|
104
|
+
modelName: config.embeddings.model,
|
|
105
|
+
});
|
|
106
|
+
const duration = Date.now() - start;
|
|
107
|
+
if (results.length === 0) {
|
|
108
|
+
console.log(chalk.yellow('No results found.'));
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
results.forEach((result, idx) => {
|
|
112
|
+
console.log(chalk.bold(`${idx + 1}. ${result.file_path}`));
|
|
113
|
+
if (result.heading_path) {
|
|
114
|
+
console.log(chalk.cyan(` § ${result.heading_path}`));
|
|
115
|
+
}
|
|
116
|
+
console.log(chalk.gray(` Score: ${result.score.toFixed(4)} | Lines ${result.line_start}-${result.line_end}`));
|
|
117
|
+
console.log('');
|
|
118
|
+
// Show snippet
|
|
119
|
+
const snippet = result.text_raw.slice(0, 200).replace(/\n/g, ' ');
|
|
120
|
+
console.log(` ${snippet}${result.text_raw.length > 200 ? '...' : ''}`);
|
|
121
|
+
console.log('');
|
|
122
|
+
});
|
|
123
|
+
console.log(chalk.gray(`Found ${results.length} results in ${duration}ms`));
|
|
124
|
+
});
|
|
125
|
+
// ─── serve command ───────────────────────────────────────────────────────────
|
|
126
|
+
program
|
|
127
|
+
.command('serve')
|
|
128
|
+
.description('Start the REST API server')
|
|
129
|
+
.option('--host <host>', 'Host to bind to')
|
|
130
|
+
.option('--port <port>', 'Port to listen on')
|
|
131
|
+
.option('--config <path>', 'Path to config file')
|
|
132
|
+
.action(async (opts) => {
|
|
133
|
+
const config = loadConfig(opts.config);
|
|
134
|
+
if (opts.host)
|
|
135
|
+
config.api.host = opts.host;
|
|
136
|
+
if (opts.port)
|
|
137
|
+
config.api.port = Number.parseInt(opts.port, 10);
|
|
138
|
+
const db = await getDb(config.paths.db_path);
|
|
139
|
+
let embedder;
|
|
140
|
+
if (config.embeddings.backend === 'ollama') {
|
|
141
|
+
embedder = new OllamaEmbedder(config.embeddings.model, config.embeddings.ollama_base_url);
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
embedder = new TransformersEmbedder(config.embeddings.model, config.paths.model_cache_dir);
|
|
145
|
+
}
|
|
146
|
+
const watcherStatus = { running: false };
|
|
147
|
+
if (config.watcher.enabled) {
|
|
148
|
+
const { startWatcher } = await import('./watcher/daemon.js');
|
|
149
|
+
startWatcher(config, db);
|
|
150
|
+
watcherStatus.running = true;
|
|
151
|
+
}
|
|
152
|
+
const { createApp } = await import('./api/app.js');
|
|
153
|
+
const { serve } = await import('@hono/node-server');
|
|
154
|
+
const app = createApp({ db, embedder, config, watcherStatus });
|
|
155
|
+
const { host, port } = config.api;
|
|
156
|
+
console.log(chalk.blue('Starting sqmd API server'));
|
|
157
|
+
console.log(chalk.gray(`Listening on http://${host}:${port}`));
|
|
158
|
+
serve({
|
|
159
|
+
fetch: app.fetch,
|
|
160
|
+
hostname: host,
|
|
161
|
+
port,
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
// ─── mcp command ─────────────────────────────────────────────────────────────
|
|
165
|
+
program
|
|
166
|
+
.command('mcp')
|
|
167
|
+
.description('Start the MCP server')
|
|
168
|
+
.option('--transport <transport>', 'Transport: stdio or sse', 'stdio')
|
|
169
|
+
.option('--port <port>', 'Port for SSE transport')
|
|
170
|
+
.option('--config <path>', 'Path to config file')
|
|
171
|
+
.action(async (opts) => {
|
|
172
|
+
const config = loadConfig(opts.config);
|
|
173
|
+
const db = await getDb(config.paths.db_path);
|
|
174
|
+
let embedder;
|
|
175
|
+
if (config.embeddings.backend === 'ollama') {
|
|
176
|
+
embedder = new OllamaEmbedder(config.embeddings.model, config.embeddings.ollama_base_url);
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
embedder = new TransformersEmbedder(config.embeddings.model, config.paths.model_cache_dir);
|
|
180
|
+
}
|
|
181
|
+
const { startMcpServer } = await import('./mcp/server.js');
|
|
182
|
+
await startMcpServer(db, embedder, config, {
|
|
183
|
+
transport: opts.transport,
|
|
184
|
+
port: opts.port ? Number.parseInt(opts.port, 10) : undefined,
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
// ─── status command ──────────────────────────────────────────────────────────
|
|
188
|
+
program
|
|
189
|
+
.command('status')
|
|
190
|
+
.description('Show index statistics')
|
|
191
|
+
.option('--config <path>', 'Path to config file')
|
|
192
|
+
.action(async (opts) => {
|
|
193
|
+
const config = loadConfig(opts.config);
|
|
194
|
+
try {
|
|
195
|
+
const db = await getDb(config.paths.db_path);
|
|
196
|
+
const stats = await getDbStats(db);
|
|
197
|
+
const filesTable = await getFilesTable(db);
|
|
198
|
+
const files = await getAllFiles(filesTable);
|
|
199
|
+
const indexedFiles = files.filter((f) => f.status === 'indexed');
|
|
200
|
+
const lastIndexed = indexedFiles.length > 0
|
|
201
|
+
? new Date(Math.max(...indexedFiles.map((f) => f.indexed_at))).toLocaleString()
|
|
202
|
+
: 'Never';
|
|
203
|
+
console.log(chalk.bold('sqmd Status'));
|
|
204
|
+
console.log(chalk.gray('─'.repeat(40)));
|
|
205
|
+
console.log(`DB path: ${config.paths.db_path}`);
|
|
206
|
+
console.log(`Files indexed: ${chalk.green(stats.fileCount)}`);
|
|
207
|
+
console.log(`Chunks stored: ${chalk.green(stats.chunkCount)}`);
|
|
208
|
+
console.log(`Last indexed: ${lastIndexed}`);
|
|
209
|
+
console.log(`Watch dirs: ${config.paths.watch_dirs.join(', ')}`);
|
|
210
|
+
console.log(`Embedder: ${config.embeddings.backend} / ${config.embeddings.model}`);
|
|
211
|
+
}
|
|
212
|
+
catch (err) {
|
|
213
|
+
console.log(chalk.red('Error reading status:'), err instanceof Error ? err.message : String(err));
|
|
214
|
+
console.log(chalk.gray('Database may not be initialized. Run `sqmd index` first.'));
|
|
215
|
+
}
|
|
216
|
+
});
|
|
217
|
+
// ─── config command ──────────────────────────────────────────────────────────
|
|
218
|
+
program
|
|
219
|
+
.command('config')
|
|
220
|
+
.description('Manage configuration')
|
|
221
|
+
.option('--init <path>', 'Write default config to specified path')
|
|
222
|
+
.action((opts) => {
|
|
223
|
+
if (opts.init) {
|
|
224
|
+
const targetPath = opts.init.replace(/^~/, os.homedir());
|
|
225
|
+
writeDefaultConfig(targetPath);
|
|
226
|
+
console.log(chalk.green(`Default config written to: ${targetPath}`));
|
|
227
|
+
}
|
|
228
|
+
else {
|
|
229
|
+
console.log('Usage: sqmd config --init <path>');
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
program.parse(process.argv);
|
|
233
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AAEpE,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AACjF,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,MAAM,CAAC;KACZ,WAAW,CAAC,kDAAkD,CAAC;KAC/D,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,gFAAgF;AAChF,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,iCAAiC,CAAC;KAC9C,MAAM,CAAC,eAAe,EAAE,oCAAoC,CAAC;KAC7D,MAAM,CAAC,SAAS,EAAE,+CAA+C,EAAE,KAAK,CAAC;KACzE,MAAM,CAAC,SAAS,EAAE,mCAAmC,EAAE,KAAK,CAAC;KAC7D,MAAM,CAAC,iBAAiB,EAAE,qBAAqB,CAAC;KAChD,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;IACrB,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC;IAE9E,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IAEtD,MAAM,QAAQ,GAAG,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;IAE3C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,GAAG,CAAC;QAChC,KAAK;QACL,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,UAAU,EAAE,CAAC,KAAK,EAAE,EAAE;YACpB,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBAChC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,iBAAiB,KAAK,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;YACxE,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACtC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;YACpD,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACtC,cAAc;YAChB,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBACvC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAChB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;IAClC,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAEjD,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC;QACpC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAC9B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACf,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;QAC7D,MAAM,EAAE,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC7C,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC,CAAC;QAEpE,qBAAqB;QACrB,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;YACxB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;YACrC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,gFAAgF;AAChF,OAAO;KACJ,OAAO,CAAC,gBAAgB,CAAC;KACzB,WAAW,CAAC,0BAA0B,CAAC;KACvC,MAAM,CAAC,aAAa,EAAE,mBAAmB,EAAE,IAAI,CAAC;KAChD,MAAM,CAAC,eAAe,EAAE,kCAAkC,EAAE,QAAQ,CAAC;KACrE,MAAM,CAAC,iBAAiB,EAAE,uCAAuC,CAAC;KAClE,MAAM,CAAC,iBAAiB,EAAE,qBAAqB,CAAC;KAChD,MAAM,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;IAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,MAAM,EAAE,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,MAAM,cAAc,CAAC,EAAE,CAAC,CAAC;IAE7C,IAAI,QAAkB,CAAC;IACvB,IAAI,MAAM,CAAC,UAAU,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QAC3C,QAAQ,GAAG,IAAI,cAAc,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;IAC5F,CAAC;SAAM,CAAC;QACN,QAAQ,GAAG,IAAI,oBAAoB,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IAC7F,CAAC;IAED,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAE5D,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC5C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAmC,CAAC;IAEtD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,mBAAmB,KAAK,GAAG,CAAC,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC;IACzD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEhB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,OAAO,GAAG,MAAM,YAAY,CAAC,WAAW,EAAE,QAAQ,EAAE;QACxD,KAAK;QACL,IAAI;QACJ,IAAI;QACJ,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK;QACzB,UAAU,EAAE,IAAI,CAAC,MAAM;QACvB,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC,KAAK;KACnC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;IAEpC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC,CAAC;QAC/C,OAAO;IACT,CAAC;IAED,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;QAC9B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,KAAK,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;QAC3D,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QACzD,CAAC;QACD,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CACR,aAAa,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,QAAQ,EAAE,CACvF,CACF,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAEhB,eAAe;QACf,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAClE,OAAO,CAAC,GAAG,CAAC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACzE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,OAAO,CAAC,MAAM,eAAe,QAAQ,IAAI,CAAC,CAAC,CAAC;AAC9E,CAAC,CAAC,CAAC;AAEL,gFAAgF;AAChF,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,2BAA2B,CAAC;KACxC,MAAM,CAAC,eAAe,EAAE,iBAAiB,CAAC;KAC1C,MAAM,CAAC,eAAe,EAAE,mBAAmB,CAAC;KAC5C,MAAM,CAAC,iBAAiB,EAAE,qBAAqB,CAAC;KAChD,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;IACrB,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,IAAI,IAAI,CAAC,IAAI;QAAE,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;IAC3C,IAAI,IAAI,CAAC,IAAI;QAAE,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAEhE,MAAM,EAAE,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAE7C,IAAI,QAAkB,CAAC;IACvB,IAAI,MAAM,CAAC,UAAU,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QAC3C,QAAQ,GAAG,IAAI,cAAc,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;IAC5F,CAAC;SAAM,CAAC;QACN,QAAQ,GAAG,IAAI,oBAAoB,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IAC7F,CAAC;IAED,MAAM,aAAa,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAEzC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;QAC7D,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACzB,aAAa,CAAC,OAAO,GAAG,IAAI,CAAC;IAC/B,CAAC;IAED,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;IACnD,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;IAEpD,MAAM,GAAG,GAAG,SAAS,CAAC,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC,CAAC;IAE/D,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC;IAElC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,uBAAuB,IAAI,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC;IAE/D,KAAK,CAAC;QACJ,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,QAAQ,EAAE,IAAI;QACd,IAAI;KACL,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEL,gFAAgF;AAChF,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,sBAAsB,CAAC;KACnC,MAAM,CAAC,yBAAyB,EAAE,yBAAyB,EAAE,OAAO,CAAC;KACrE,MAAM,CAAC,eAAe,EAAE,wBAAwB,CAAC;KACjD,MAAM,CAAC,iBAAiB,EAAE,qBAAqB,CAAC;KAChD,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;IACrB,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,MAAM,EAAE,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAE7C,IAAI,QAAkB,CAAC;IACvB,IAAI,MAAM,CAAC,UAAU,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QAC3C,QAAQ,GAAG,IAAI,cAAc,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;IAC5F,CAAC;SAAM,CAAC;QACN,QAAQ,GAAG,IAAI,oBAAoB,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IAC7F,CAAC;IAED,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IAE3D,MAAM,cAAc,CAAC,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE;QACzC,SAAS,EAAE,IAAI,CAAC,SAA4B;QAC5C,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;KAC7D,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEL,gFAAgF;AAChF,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,uBAAuB,CAAC;KACpC,MAAM,CAAC,iBAAiB,EAAE,qBAAqB,CAAC;KAChD,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;IACrB,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC7C,MAAM,KAAK,GAAG,MAAM,UAAU,CAAC,EAAE,CAAC,CAAC;QAEnC,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC,EAAE,CAAC,CAAC;QAC3C,MAAM,KAAK,GAAG,MAAM,WAAW,CAAC,UAAU,CAAC,CAAC;QAE5C,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;QACjE,MAAM,WAAW,GACf,YAAY,CAAC,MAAM,GAAG,CAAC;YACrB,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,cAAc,EAAE;YAC/E,CAAC,CAAC,OAAO,CAAC;QAEd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,kBAAkB,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACtD,OAAO,CAAC,GAAG,CAAC,kBAAkB,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;QAC9D,OAAO,CAAC,GAAG,CAAC,kBAAkB,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;QAC/D,OAAO,CAAC,GAAG,CAAC,kBAAkB,WAAW,EAAE,CAAC,CAAC;QAC7C,OAAO,CAAC,GAAG,CAAC,kBAAkB,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpE,OAAO,CAAC,GAAG,CAAC,kBAAkB,MAAM,CAAC,UAAU,CAAC,OAAO,MAAM,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,CAAC;IAC1F,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,GAAG,CAAC,uBAAuB,CAAC,EAClC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CACjD,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,0DAA0D,CAAC,CAAC,CAAC;IACtF,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,gFAAgF;AAChF,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,sBAAsB,CAAC;KACnC,MAAM,CAAC,eAAe,EAAE,wCAAwC,CAAC;KACjE,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;IACf,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACd,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,EAAE,CAAC,CAAC;QACzD,kBAAkB,CAAC,UAAU,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,8BAA8B,UAAU,EAAE,CAAC,CAAC,CAAC;IACvE,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;IAClD,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { ChunkRecord } from '../store/schema.js';
|
|
2
|
+
import type { ParsedDocument } from './parser.js';
|
|
3
|
+
export interface ChunkInput {
|
|
4
|
+
fileId: string;
|
|
5
|
+
fileHash: string;
|
|
6
|
+
filePath: string;
|
|
7
|
+
fileMtime: number;
|
|
8
|
+
maxTokens: number;
|
|
9
|
+
minChars: number;
|
|
10
|
+
overlapTokens: number;
|
|
11
|
+
includeBreadcrumb: boolean;
|
|
12
|
+
}
|
|
13
|
+
export interface ChunkOptions {
|
|
14
|
+
maxTokens: number;
|
|
15
|
+
minChars: number;
|
|
16
|
+
overlapTokens: number;
|
|
17
|
+
includeBreadcrumb: boolean;
|
|
18
|
+
}
|
|
19
|
+
export declare function estimateTokens(text: string): number;
|
|
20
|
+
export declare function chunkDocument(doc: ParsedDocument, input: ChunkInput): ChunkRecord[];
|
|
21
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/ingestion/chunker.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,CAAC;IACtB,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAED,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,CAAC;IACtB,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAGD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAgFD,wBAAgB,aAAa,CAAC,GAAG,EAAE,cAAc,EAAE,KAAK,EAAE,UAAU,GAAG,WAAW,EAAE,CA8EnF"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import * as path from 'node:path';
|
|
2
|
+
// Rough token count estimate
|
|
3
|
+
export function estimateTokens(text) {
|
|
4
|
+
return Math.ceil(text.split(/\s+/).length * 1.3);
|
|
5
|
+
}
|
|
6
|
+
function buildBreadcrumb(headingPath, filePath) {
|
|
7
|
+
const stem = path.basename(filePath, path.extname(filePath));
|
|
8
|
+
if (headingPath && headingPath.length > 0) {
|
|
9
|
+
return `Section: ${headingPath}\n\n`;
|
|
10
|
+
}
|
|
11
|
+
return `Section: ${stem}\n\n`;
|
|
12
|
+
}
|
|
13
|
+
function splitIntoParagraphs(text) {
|
|
14
|
+
return text.split(/\n{2,}/).filter((p) => p.trim().length > 0);
|
|
15
|
+
}
|
|
16
|
+
function splitSectionIntoChunks(content, breadcrumb, lineStart, lineEnd, maxTokens, overlapTokens, startChunkIndex) {
|
|
17
|
+
const paragraphs = splitIntoParagraphs(content);
|
|
18
|
+
const chunks = [];
|
|
19
|
+
let currentParagraphs = [];
|
|
20
|
+
let chunkIndex = startChunkIndex;
|
|
21
|
+
for (const para of paragraphs) {
|
|
22
|
+
const candidate = [...currentParagraphs, para].join('\n\n');
|
|
23
|
+
const tokens = estimateTokens(breadcrumb + candidate);
|
|
24
|
+
if (tokens > maxTokens && currentParagraphs.length > 0) {
|
|
25
|
+
// Flush current chunk
|
|
26
|
+
const chunkText = currentParagraphs.join('\n\n');
|
|
27
|
+
chunks.push({
|
|
28
|
+
text: breadcrumb + chunkText,
|
|
29
|
+
textRaw: chunkText,
|
|
30
|
+
lineStart,
|
|
31
|
+
lineEnd,
|
|
32
|
+
chunkIndex: chunkIndex++,
|
|
33
|
+
});
|
|
34
|
+
// Start overlap: carry over last few "tokens" worth of content
|
|
35
|
+
// Simple approximation: carry over last paragraph if it's within overlapTokens
|
|
36
|
+
const lastPara = currentParagraphs[currentParagraphs.length - 1] ?? '';
|
|
37
|
+
const lastParaTokens = estimateTokens(lastPara);
|
|
38
|
+
if (lastParaTokens <= overlapTokens) {
|
|
39
|
+
currentParagraphs = [lastPara, para];
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
currentParagraphs = [para];
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
currentParagraphs.push(para);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// Flush remaining
|
|
50
|
+
if (currentParagraphs.length > 0) {
|
|
51
|
+
const chunkText = currentParagraphs.join('\n\n');
|
|
52
|
+
chunks.push({
|
|
53
|
+
text: breadcrumb + chunkText,
|
|
54
|
+
textRaw: chunkText,
|
|
55
|
+
lineStart,
|
|
56
|
+
lineEnd,
|
|
57
|
+
chunkIndex: chunkIndex++,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
return chunks;
|
|
61
|
+
}
|
|
62
|
+
export function chunkDocument(doc, input) {
|
|
63
|
+
const results = [];
|
|
64
|
+
const { fileId, fileHash, filePath, fileMtime, maxTokens, minChars, overlapTokens, includeBreadcrumb, } = input;
|
|
65
|
+
for (let sectionIdx = 0; sectionIdx < doc.sections.length; sectionIdx++) {
|
|
66
|
+
const section = doc.sections[sectionIdx];
|
|
67
|
+
const breadcrumb = includeBreadcrumb ? buildBreadcrumb(section.headingPath, filePath) : '';
|
|
68
|
+
const content = section.content;
|
|
69
|
+
const totalTokens = estimateTokens(breadcrumb + content);
|
|
70
|
+
let rawChunks;
|
|
71
|
+
if (totalTokens <= maxTokens) {
|
|
72
|
+
// Single chunk for this section
|
|
73
|
+
rawChunks = [
|
|
74
|
+
{
|
|
75
|
+
text: breadcrumb + content,
|
|
76
|
+
textRaw: content,
|
|
77
|
+
lineStart: section.lineStart,
|
|
78
|
+
lineEnd: section.lineEnd,
|
|
79
|
+
chunkIndex: 0,
|
|
80
|
+
},
|
|
81
|
+
];
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
// Split into multiple chunks
|
|
85
|
+
rawChunks = splitSectionIntoChunks(content, breadcrumb, section.lineStart, section.lineEnd, maxTokens, overlapTokens, 0);
|
|
86
|
+
}
|
|
87
|
+
for (const raw of rawChunks) {
|
|
88
|
+
// Filter stubs
|
|
89
|
+
if (raw.textRaw.length < minChars)
|
|
90
|
+
continue;
|
|
91
|
+
const chunkId = `${fileHash}:${sectionIdx}:${raw.chunkIndex}`;
|
|
92
|
+
const tokenCount = estimateTokens(raw.textRaw);
|
|
93
|
+
results.push({
|
|
94
|
+
chunk_id: chunkId,
|
|
95
|
+
file_id: fileId,
|
|
96
|
+
file_path: filePath,
|
|
97
|
+
file_hash: fileHash,
|
|
98
|
+
file_mtime: fileMtime,
|
|
99
|
+
heading_path: section.headingPath,
|
|
100
|
+
heading_level: section.headingLevel,
|
|
101
|
+
heading_text: section.headingText,
|
|
102
|
+
section_index: sectionIdx,
|
|
103
|
+
chunk_index: raw.chunkIndex,
|
|
104
|
+
text: raw.text,
|
|
105
|
+
text_raw: raw.textRaw,
|
|
106
|
+
token_count: tokenCount,
|
|
107
|
+
parent_headings: section.parentHeadings,
|
|
108
|
+
depth: section.headingLevel,
|
|
109
|
+
vector: [], // Will be filled during embedding
|
|
110
|
+
line_start: raw.lineStart,
|
|
111
|
+
line_end: raw.lineEnd,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return results;
|
|
116
|
+
}
|
|
117
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/ingestion/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAsBlC,6BAA6B;AAC7B,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;AACnD,CAAC;AAED,SAAS,eAAe,CAAC,WAAmB,EAAE,QAAgB;IAC5D,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC7D,IAAI,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1C,OAAO,YAAY,WAAW,MAAM,CAAC;IACvC,CAAC;IACD,OAAO,YAAY,IAAI,MAAM,CAAC;AAChC,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAY;IACvC,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjE,CAAC;AAUD,SAAS,sBAAsB,CAC7B,OAAe,EACf,UAAkB,EAClB,SAAiB,EACjB,OAAe,EACf,SAAiB,EACjB,aAAqB,EACrB,eAAuB;IAEvB,MAAM,UAAU,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,IAAI,iBAAiB,GAAa,EAAE,CAAC;IACrC,IAAI,UAAU,GAAG,eAAe,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,CAAC,GAAG,iBAAiB,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,cAAc,CAAC,UAAU,GAAG,SAAS,CAAC,CAAC;QAEtD,IAAI,MAAM,GAAG,SAAS,IAAI,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvD,sBAAsB;YACtB,MAAM,SAAS,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,UAAU,GAAG,SAAS;gBAC5B,OAAO,EAAE,SAAS;gBAClB,SAAS;gBACT,OAAO;gBACP,UAAU,EAAE,UAAU,EAAE;aACzB,CAAC,CAAC;YAEH,+DAA+D;YAC/D,+EAA+E;YAC/E,MAAM,QAAQ,GAAG,iBAAiB,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YACvE,MAAM,cAAc,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;YAChD,IAAI,cAAc,IAAI,aAAa,EAAE,CAAC;gBACpC,iBAAiB,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,iBAAiB,GAAG,CAAC,IAAI,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,IAAI,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACjC,MAAM,SAAS,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACjD,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,UAAU,GAAG,SAAS;YAC5B,OAAO,EAAE,SAAS;YAClB,SAAS;YACT,OAAO;YACP,UAAU,EAAE,UAAU,EAAE;SACzB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,GAAmB,EAAE,KAAiB;IAClE,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,EACJ,MAAM,EACN,QAAQ,EACR,QAAQ,EACR,SAAS,EACT,SAAS,EACT,QAAQ,EACR,aAAa,EACb,iBAAiB,GAClB,GAAG,KAAK,CAAC;IAEV,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC;QACxE,MAAM,OAAO,GAAG,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAE,CAAC;QAE1C,MAAM,UAAU,GAAG,iBAAiB,CAAC,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE3F,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;QAChC,MAAM,WAAW,GAAG,cAAc,CAAC,UAAU,GAAG,OAAO,CAAC,CAAC;QAEzD,IAAI,SAAqB,CAAC;QAE1B,IAAI,WAAW,IAAI,SAAS,EAAE,CAAC;YAC7B,gCAAgC;YAChC,SAAS,GAAG;gBACV;oBACE,IAAI,EAAE,UAAU,GAAG,OAAO;oBAC1B,OAAO,EAAE,OAAO;oBAChB,SAAS,EAAE,OAAO,CAAC,SAAS;oBAC5B,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,UAAU,EAAE,CAAC;iBACd;aACF,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,6BAA6B;YAC7B,SAAS,GAAG,sBAAsB,CAChC,OAAO,EACP,UAAU,EACV,OAAO,CAAC,SAAS,EACjB,OAAO,CAAC,OAAO,EACf,SAAS,EACT,aAAa,EACb,CAAC,CACF,CAAC;QACJ,CAAC;QAED,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,eAAe;YACf,IAAI,GAAG,CAAC,OAAO,CAAC,MAAM,GAAG,QAAQ;gBAAE,SAAS;YAE5C,MAAM,OAAO,GAAG,GAAG,QAAQ,IAAI,UAAU,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC;YAC9D,MAAM,UAAU,GAAG,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YAE/C,OAAO,CAAC,IAAI,CAAC;gBACX,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,QAAQ;gBACnB,SAAS,EAAE,QAAQ;gBACnB,UAAU,EAAE,SAAS;gBACrB,YAAY,EAAE,OAAO,CAAC,WAAW;gBACjC,aAAa,EAAE,OAAO,CAAC,YAAY;gBACnC,YAAY,EAAE,OAAO,CAAC,WAAW;gBACjC,aAAa,EAAE,UAAU;gBACzB,WAAW,EAAE,GAAG,CAAC,UAAU;gBAC3B,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,QAAQ,EAAE,GAAG,CAAC,OAAO;gBACrB,WAAW,EAAE,UAAU;gBACvB,eAAe,EAAE,OAAO,CAAC,cAAc;gBACvC,KAAK,EAAE,OAAO,CAAC,YAAY;gBAC3B,MAAM,EAAE,EAAE,EAAE,kCAAkC;gBAC9C,UAAU,EAAE,GAAG,CAAC,SAAS;gBACzB,QAAQ,EAAE,GAAG,CAAC,OAAO;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fingerprint.d.ts","sourceRoot":"","sources":["../../src/ingestion/fingerprint.ts"],"names":[],"mappings":"AAIA,wBAAsB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CASzF;AAED,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAIjD"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import * as crypto from 'node:crypto';
|
|
2
|
+
import * as fs from 'node:fs';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
export async function hashFile(filePath) {
|
|
5
|
+
const absolutePath = path.resolve(filePath);
|
|
6
|
+
const stat = fs.statSync(absolutePath);
|
|
7
|
+
const mtime = stat.mtimeMs;
|
|
8
|
+
const content = fs.readFileSync(absolutePath);
|
|
9
|
+
const hash = crypto.createHash('sha256').update(content).digest('hex');
|
|
10
|
+
return { hash, mtime };
|
|
11
|
+
}
|
|
12
|
+
export function hashPath(filePath) {
|
|
13
|
+
const absolutePath = path.resolve(filePath);
|
|
14
|
+
const normalized = absolutePath.replace(/\\/g, '/');
|
|
15
|
+
return crypto.createHash('sha256').update(normalized).digest('hex');
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=fingerprint.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fingerprint.js","sourceRoot":"","sources":["../../src/ingestion/fingerprint.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,QAAgB;IAC7C,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;IACvC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC;IAE3B,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;IAC9C,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEvE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;AACzB,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,QAAgB;IACvC,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACpD,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACtE,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface Section {
|
|
2
|
+
headingPath: string;
|
|
3
|
+
headingText: string;
|
|
4
|
+
headingLevel: number;
|
|
5
|
+
lineStart: number;
|
|
6
|
+
lineEnd: number;
|
|
7
|
+
content: string;
|
|
8
|
+
parentHeadings: string[];
|
|
9
|
+
}
|
|
10
|
+
export interface ParsedDocument {
|
|
11
|
+
filePath: string;
|
|
12
|
+
sections: Section[];
|
|
13
|
+
}
|
|
14
|
+
export declare function parseMarkdown(content: string, filePath: string): ParsedDocument;
|
|
15
|
+
export declare function getFileStem(filePath: string): string;
|
|
16
|
+
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/ingestion/parser.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,OAAO;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,OAAO,EAAE,CAAC;CACrB;AA+BD,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,cAAc,CAwF/E;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAEpD"}
|