expxagents 0.25.0 → 0.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/src/commands/info.d.ts +1 -2
- package/dist/cli/src/commands/login.d.ts +1 -2
- package/dist/cli/src/commands/logout.d.ts +1 -2
- package/dist/cli/src/commands/outdated.d.ts +1 -2
- package/dist/cli/src/commands/publish.d.ts +1 -2
- package/dist/cli/src/commands/registry-install.d.ts +1 -2
- package/dist/cli/src/commands/search.d.ts +1 -2
- package/dist/cli/src/commands/update.d.ts +1 -2
- package/dist/cli/src/commands/whoami.d.ts +1 -2
- package/dist/dashboard/assets/{BufferResource-A8ncaeYI.js → BufferResource-D79vaoFm.js} +1 -1
- package/dist/dashboard/assets/{CanvasRenderer-B9ePxyZx.js → CanvasRenderer-BUoxTNKV.js} +1 -1
- package/dist/dashboard/assets/JarvisView-DSN7xWMz.js +1 -0
- package/dist/dashboard/assets/{RenderTargetSystem-B6qE1bDj.js → RenderTargetSystem-B7rwTXA1.js} +1 -1
- package/dist/dashboard/assets/ThreeBackground-BQTdScX-.js +1 -0
- package/dist/dashboard/assets/{WebGLRenderer-D5vXNoQg.js → WebGLRenderer-DgdVNsZ9.js} +1 -1
- package/dist/dashboard/assets/{WebGPURenderer-CPOVD6U9.js → WebGPURenderer-DnQNvjEQ.js} +1 -1
- package/dist/dashboard/assets/{browserAll-BXl_rCrX.js → browserAll-Cbsk7DE4.js} +1 -1
- package/dist/dashboard/assets/index-CrlhoBta.js +783 -0
- package/dist/dashboard/assets/{webworkerAll-Dv96EAc4.js → webworkerAll-BLmfReEj.js} +1 -1
- package/dist/dashboard/index.html +1 -1
- package/package.json +2 -2
- package/dist/cli/src/__tests__/cli.test.d.ts +0 -1
- package/dist/cli/src/__tests__/cli.test.js +0 -23
- package/dist/cli/src/commands/__tests__/outdated.test.d.ts +0 -1
- package/dist/cli/src/commands/__tests__/outdated.test.js +0 -76
- package/dist/cli/src/mcp/__tests__/catalog.test.d.ts +0 -1
- package/dist/cli/src/mcp/__tests__/catalog.test.js +0 -101
- package/dist/cli/src/mcp/__tests__/detect.test.d.ts +0 -1
- package/dist/cli/src/mcp/__tests__/detect.test.js +0 -84
- package/dist/cli/src/mcp/__tests__/setup.test.d.ts +0 -1
- package/dist/cli/src/mcp/__tests__/setup.test.js +0 -75
- package/dist/cli/src/mcp/__tests__/validate.test.d.ts +0 -1
- package/dist/cli/src/mcp/__tests__/validate.test.js +0 -42
- package/dist/cli/src/pencil/__tests__/detect.test.d.ts +0 -1
- package/dist/cli/src/pencil/__tests__/detect.test.js +0 -71
- package/dist/cli/src/pencil/__tests__/property-mapper.test.d.ts +0 -1
- package/dist/cli/src/pencil/__tests__/property-mapper.test.js +0 -120
- package/dist/cli/src/pencil/__tests__/template-sync.test.d.ts +0 -1
- package/dist/cli/src/pencil/__tests__/template-sync.test.js +0 -95
- package/dist/cli/src/runners/__tests__/provider-registry.test.d.ts +0 -1
- package/dist/cli/src/runners/__tests__/provider-registry.test.js +0 -31
- package/dist/cli/src/runners/__tests__/provider-runner.test.d.ts +0 -1
- package/dist/cli/src/runners/__tests__/provider-runner.test.js +0 -86
- package/dist/cli/src/utils/__tests__/command-prefix.test.d.ts +0 -1
- package/dist/cli/src/utils/__tests__/command-prefix.test.js +0 -35
- package/dist/cli/src/utils/__tests__/global-install.test.d.ts +0 -1
- package/dist/cli/src/utils/__tests__/global-install.test.js +0 -25
- package/dist/dashboard/assets/JarvisView-BzSP9oge.js +0 -1
- package/dist/dashboard/assets/ThreeBackground-Ds6SAxmB.js +0 -1
- package/dist/dashboard/assets/index--ULBmMcf.js +0 -1195
- package/dist/data/opensquad.db +0 -0
- package/dist/data/opensquad.db-shm +0 -0
- package/dist/data/opensquad.db-wal +0 -0
- package/node_modules/expxagents-knowledge/dist/config.d.ts +0 -4
- package/node_modules/expxagents-knowledge/dist/config.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/config.js +0 -37
- package/node_modules/expxagents-knowledge/dist/config.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/db/connection.d.ts +0 -6
- package/node_modules/expxagents-knowledge/dist/db/connection.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/db/connection.js +0 -69
- package/node_modules/expxagents-knowledge/dist/db/connection.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/db/migrations.d.ts +0 -3
- package/node_modules/expxagents-knowledge/dist/db/migrations.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/db/migrations.js +0 -46
- package/node_modules/expxagents-knowledge/dist/db/migrations.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/db/schema.d.ts +0 -3
- package/node_modules/expxagents-knowledge/dist/db/schema.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/db/schema.js +0 -79
- package/node_modules/expxagents-knowledge/dist/db/schema.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/index.d.ts +0 -16
- package/node_modules/expxagents-knowledge/dist/index.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/index.js +0 -16
- package/node_modules/expxagents-knowledge/dist/index.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/chunker.d.ts +0 -10
- package/node_modules/expxagents-knowledge/dist/ingest/chunker.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/chunker.js +0 -221
- package/node_modules/expxagents-knowledge/dist/ingest/chunker.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/document-loader.d.ts +0 -4
- package/node_modules/expxagents-knowledge/dist/ingest/document-loader.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/document-loader.js +0 -56
- package/node_modules/expxagents-knowledge/dist/ingest/document-loader.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/embedder.d.ts +0 -4
- package/node_modules/expxagents-knowledge/dist/ingest/embedder.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/embedder.js +0 -25
- package/node_modules/expxagents-knowledge/dist/ingest/embedder.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/entity-extractor.d.ts +0 -21
- package/node_modules/expxagents-knowledge/dist/ingest/entity-extractor.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/entity-extractor.js +0 -54
- package/node_modules/expxagents-knowledge/dist/ingest/entity-extractor.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/extraction-queue.d.ts +0 -16
- package/node_modules/expxagents-knowledge/dist/ingest/extraction-queue.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/extraction-queue.js +0 -49
- package/node_modules/expxagents-knowledge/dist/ingest/extraction-queue.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/pdf-extractor.d.ts +0 -9
- package/node_modules/expxagents-knowledge/dist/ingest/pdf-extractor.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/pdf-extractor.js +0 -116
- package/node_modules/expxagents-knowledge/dist/ingest/pdf-extractor.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/pipeline.d.ts +0 -27
- package/node_modules/expxagents-knowledge/dist/ingest/pipeline.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/ingest/pipeline.js +0 -92
- package/node_modules/expxagents-knowledge/dist/ingest/pipeline.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/query/graph-traversal.d.ts +0 -41
- package/node_modules/expxagents-knowledge/dist/query/graph-traversal.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/query/graph-traversal.js +0 -62
- package/node_modules/expxagents-knowledge/dist/query/graph-traversal.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/query/knowledge-query.d.ts +0 -31
- package/node_modules/expxagents-knowledge/dist/query/knowledge-query.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/query/knowledge-query.js +0 -106
- package/node_modules/expxagents-knowledge/dist/query/knowledge-query.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/query/vector-search.d.ts +0 -26
- package/node_modules/expxagents-knowledge/dist/query/vector-search.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/query/vector-search.js +0 -57
- package/node_modules/expxagents-knowledge/dist/query/vector-search.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/sources/agent-output.d.ts +0 -10
- package/node_modules/expxagents-knowledge/dist/sources/agent-output.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/sources/agent-output.js +0 -29
- package/node_modules/expxagents-knowledge/dist/sources/agent-output.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/sources/watcher.d.ts +0 -6
- package/node_modules/expxagents-knowledge/dist/sources/watcher.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/sources/watcher.js +0 -42
- package/node_modules/expxagents-knowledge/dist/sources/watcher.js.map +0 -1
- package/node_modules/expxagents-knowledge/dist/types.d.ts +0 -138
- package/node_modules/expxagents-knowledge/dist/types.d.ts.map +0 -1
- package/node_modules/expxagents-knowledge/dist/types.js +0 -2
- package/node_modules/expxagents-knowledge/dist/types.js.map +0 -1
- package/node_modules/expxagents-knowledge/package.json +0 -7
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import crypto from 'node:crypto';
|
|
2
|
-
import fs from 'node:fs';
|
|
3
|
-
import path from 'node:path';
|
|
4
|
-
import { extractPdf } from './pdf-extractor.js';
|
|
5
|
-
const MIME_MAP = {
|
|
6
|
-
'.md': 'text/markdown',
|
|
7
|
-
'.markdown': 'text/markdown',
|
|
8
|
-
'.txt': 'text/plain',
|
|
9
|
-
'.json': 'application/json',
|
|
10
|
-
'.yaml': 'text/yaml',
|
|
11
|
-
'.yml': 'text/yaml',
|
|
12
|
-
'.pdf': 'application/pdf',
|
|
13
|
-
};
|
|
14
|
-
const SUPPORTED_EXTENSIONS = new Set(Object.keys(MIME_MAP));
|
|
15
|
-
const DEFAULT_PDF_CONFIG = {
|
|
16
|
-
ocr_enabled: true,
|
|
17
|
-
ocr_languages: ['eng'],
|
|
18
|
-
ocr_threshold: 50,
|
|
19
|
-
max_pages: 500,
|
|
20
|
-
};
|
|
21
|
-
export function computeHash(content) {
|
|
22
|
-
return crypto.createHash('sha256').update(content).digest('hex');
|
|
23
|
-
}
|
|
24
|
-
export async function loadDocument(filePath, maxSizeMb = 10, pdfConfig) {
|
|
25
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
26
|
-
if (!SUPPORTED_EXTENSIONS.has(ext)) {
|
|
27
|
-
throw new Error(`Unsupported file type: ${ext}`);
|
|
28
|
-
}
|
|
29
|
-
const stat = fs.statSync(filePath);
|
|
30
|
-
const sizeMb = stat.size / (1024 * 1024);
|
|
31
|
-
if (sizeMb > maxSizeMb) {
|
|
32
|
-
throw new Error(`File too large: ${sizeMb.toFixed(1)}MB exceeds limit of ${maxSizeMb}MB`);
|
|
33
|
-
}
|
|
34
|
-
if (ext === '.pdf') {
|
|
35
|
-
const result = await extractPdf(filePath, pdfConfig ?? DEFAULT_PDF_CONFIG);
|
|
36
|
-
return {
|
|
37
|
-
title: path.basename(filePath, '.pdf'),
|
|
38
|
-
content: result.content,
|
|
39
|
-
mime_type: 'application/pdf',
|
|
40
|
-
metadata: result.metadata,
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
const content = fs.readFileSync(filePath, 'utf-8');
|
|
44
|
-
const mime_type = MIME_MAP[ext];
|
|
45
|
-
const title = extractTitle(content, filePath, mime_type);
|
|
46
|
-
return { title, content, mime_type };
|
|
47
|
-
}
|
|
48
|
-
function extractTitle(content, filePath, mimeType) {
|
|
49
|
-
if (mimeType === 'text/markdown') {
|
|
50
|
-
const match = content.match(/^#{1,6}\s+(.+)$/m);
|
|
51
|
-
if (match)
|
|
52
|
-
return match[1].trim();
|
|
53
|
-
}
|
|
54
|
-
return path.basename(filePath);
|
|
55
|
-
}
|
|
56
|
-
//# sourceMappingURL=document-loader.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"document-loader.js","sourceRoot":"","sources":["../../src/ingest/document-loader.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AACjC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,MAAM,QAAQ,GAA2B;IACvC,KAAK,EAAE,eAAe;IACtB,WAAW,EAAE,eAAe;IAC5B,MAAM,EAAE,YAAY;IACpB,OAAO,EAAE,kBAAkB;IAC3B,OAAO,EAAE,WAAW;IACpB,MAAM,EAAE,WAAW;IACnB,MAAM,EAAE,iBAAiB;CAC1B,CAAC;AAEF,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;AAE5D,MAAM,kBAAkB,GAAc;IACpC,WAAW,EAAE,IAAI;IACjB,aAAa,EAAE,CAAC,KAAK,CAAC;IACtB,aAAa,EAAE,EAAE;IACjB,SAAS,EAAE,GAAG;CACf,CAAC;AAEF,MAAM,UAAU,WAAW,CAAC,OAAwB;IAClD,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,QAAgB,EAChB,SAAS,GAAG,EAAE,EACd,SAAqB;IAErB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACjD,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,EAAE,CAAC,CAAC;IACnD,CAAC;IAED,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;IACzC,IAAI,MAAM,GAAG,SAAS,EAAE,CAAC;QACvB,MAAM,IAAI,KAAK,CAAC,mBAAmB,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,uBAAuB,SAAS,IAAI,CAAC,CAAC;IAC5F,CAAC;IAED,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QACnB,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,QAAQ,EAAE,SAAS,IAAI,kBAAkB,CAAC,CAAC;QAC3E,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;YACtC,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,SAAS,EAAE,iBAAiB;YAC5B,QAAQ,EAAE,MAAM,CAAC,QAA8C;SAChE,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAE,CAAC;IACjC,MAAM,KAAK,GAAG,YAAY,CAAC,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;IAEzD,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AACvC,CAAC;AAED,SAAS,YAAY,CAAC,OAAe,EAAE,QAAgB,EAAE,QAAgB;IACvE,IAAI,QAAQ,KAAK,eAAe,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QAChD,IAAI,KAAK;YAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACpC,CAAC;IACD,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../src/ingest/embedder.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,CAiBzD;AAGD,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,CAGzD"}
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import crypto from 'node:crypto';
|
|
2
|
-
export function createMockEmbedder(dims) {
|
|
3
|
-
return {
|
|
4
|
-
dimensions: dims,
|
|
5
|
-
async embed(texts) {
|
|
6
|
-
return texts.map((text) => {
|
|
7
|
-
// Deterministic hash-based vector for testing
|
|
8
|
-
const hash = crypto.createHash('sha256').update(text).digest();
|
|
9
|
-
const vector = [];
|
|
10
|
-
for (let i = 0; i < dims; i++) {
|
|
11
|
-
vector.push((hash[i % hash.length] / 255) * 2 - 1);
|
|
12
|
-
}
|
|
13
|
-
// Normalize
|
|
14
|
-
const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0));
|
|
15
|
-
return vector.map((v) => v / norm);
|
|
16
|
-
});
|
|
17
|
-
},
|
|
18
|
-
};
|
|
19
|
-
}
|
|
20
|
-
// Placeholder for future real implementations
|
|
21
|
-
export function createEmbedder(provider) {
|
|
22
|
-
// For now, always use mock. Real providers added in follow-up.
|
|
23
|
-
return createMockEmbedder(384);
|
|
24
|
-
}
|
|
25
|
-
//# sourceMappingURL=embedder.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../src/ingest/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AAGjC,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO;QACL,UAAU,EAAE,IAAI;QAChB,KAAK,CAAC,KAAK,CAAC,KAAe;YACzB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;gBACxB,8CAA8C;gBAC9C,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;gBAC/D,MAAM,MAAM,GAAa,EAAE,CAAC;gBAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC9B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;gBACrD,CAAC;gBACD,YAAY;gBACZ,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBAC9D,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;YACrC,CAAC,CAAC,CAAC;QACL,CAAC;KACF,CAAC;AACJ,CAAC;AAED,8CAA8C;AAC9C,MAAM,UAAU,cAAc,CAAC,QAAgB;IAC7C,+DAA+D;IAC/D,OAAO,kBAAkB,CAAC,GAAG,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
interface RawEntity {
|
|
2
|
-
name: string;
|
|
3
|
-
type: string;
|
|
4
|
-
description?: string;
|
|
5
|
-
}
|
|
6
|
-
interface RawRelation {
|
|
7
|
-
source: string;
|
|
8
|
-
target: string;
|
|
9
|
-
type: string;
|
|
10
|
-
description?: string;
|
|
11
|
-
}
|
|
12
|
-
interface ExtractionResult {
|
|
13
|
-
entities: RawEntity[];
|
|
14
|
-
relations: RawRelation[];
|
|
15
|
-
}
|
|
16
|
-
export declare function parseExtractionResponse(response: string): ExtractionResult;
|
|
17
|
-
export declare function validateEntities(entities: RawEntity[]): RawEntity[];
|
|
18
|
-
export declare function validateRelations(relations: RawRelation[]): RawRelation[];
|
|
19
|
-
export declare function buildExtractionPrompt(content: string): string;
|
|
20
|
-
export {};
|
|
21
|
-
//# sourceMappingURL=entity-extractor.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"entity-extractor.d.ts","sourceRoot":"","sources":["../../src/ingest/entity-extractor.ts"],"names":[],"mappings":"AAwBA,UAAU,SAAS;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,UAAU,WAAW;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,UAAU,gBAAgB;IACxB,QAAQ,EAAE,SAAS,EAAE,CAAC;IACtB,SAAS,EAAE,WAAW,EAAE,CAAC;CAC1B;AAED,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,gBAAgB,CAW1E;AAED,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,SAAS,EAAE,GAAG,SAAS,EAAE,CAEnE;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,WAAW,EAAE,GAAG,WAAW,EAAE,CAIzE;AAED,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAgB7D"}
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
const VALID_ENTITY_TYPES = ['technology', 'person', 'decision', 'concept', 'squad', 'agent', 'learning'];
|
|
3
|
-
const VALID_RELATION_TYPES = ['produced', 'depends_on', 'learned_from', 'contradicts', 'decided', 'uses', 'replaced'];
|
|
4
|
-
const RawEntitySchema = z.object({
|
|
5
|
-
name: z.string(),
|
|
6
|
-
type: z.string(),
|
|
7
|
-
description: z.string().optional(),
|
|
8
|
-
});
|
|
9
|
-
const RawRelationSchema = z.object({
|
|
10
|
-
source: z.string(),
|
|
11
|
-
target: z.string(),
|
|
12
|
-
type: z.string(),
|
|
13
|
-
description: z.string().optional(),
|
|
14
|
-
});
|
|
15
|
-
const ExtractionResponseSchema = z.object({
|
|
16
|
-
entities: z.array(RawEntitySchema).default([]),
|
|
17
|
-
relations: z.array(RawRelationSchema).default([]),
|
|
18
|
-
});
|
|
19
|
-
export function parseExtractionResponse(response) {
|
|
20
|
-
const codeBlockMatch = response.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
|
|
21
|
-
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : response.trim();
|
|
22
|
-
try {
|
|
23
|
-
const parsed = JSON.parse(jsonStr);
|
|
24
|
-
const validated = ExtractionResponseSchema.parse(parsed);
|
|
25
|
-
return validated;
|
|
26
|
-
}
|
|
27
|
-
catch {
|
|
28
|
-
return { entities: [], relations: [] };
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
export function validateEntities(entities) {
|
|
32
|
-
return entities.filter((e) => VALID_ENTITY_TYPES.includes(e.type) && e.name?.trim());
|
|
33
|
-
}
|
|
34
|
-
export function validateRelations(relations) {
|
|
35
|
-
return relations.filter((r) => VALID_RELATION_TYPES.includes(r.type) && r.source?.trim() && r.target?.trim());
|
|
36
|
-
}
|
|
37
|
-
export function buildExtractionPrompt(content) {
|
|
38
|
-
return `Given the text below, extract:
|
|
39
|
-
1. Entities (name, type, description)
|
|
40
|
-
2. Relations between entities (source, target, type, description)
|
|
41
|
-
|
|
42
|
-
Valid entity types: ${VALID_ENTITY_TYPES.join(', ')}
|
|
43
|
-
Valid relation types: ${VALID_RELATION_TYPES.join(', ')}
|
|
44
|
-
|
|
45
|
-
Respond ONLY with JSON in this format:
|
|
46
|
-
{
|
|
47
|
-
"entities": [{"name": "...", "type": "...", "description": "..."}],
|
|
48
|
-
"relations": [{"source": "...", "target": "...", "type": "...", "description": "..."}]
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
TEXT:
|
|
52
|
-
${content}`;
|
|
53
|
-
}
|
|
54
|
-
//# sourceMappingURL=entity-extractor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"entity-extractor.js","sourceRoot":"","sources":["../../src/ingest/entity-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,MAAM,kBAAkB,GAAiB,CAAC,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;AACvH,MAAM,oBAAoB,GAAmB,CAAC,UAAU,EAAE,YAAY,EAAE,cAAc,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;AAEtI,MAAM,eAAe,GAAG,CAAC,CAAC,MAAM,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CACnC,CAAC,CAAC;AAEH,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CACnC,CAAC,CAAC;AAEH,MAAM,wBAAwB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;IAC9C,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CAClD,CAAC,CAAC;AAoBH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,MAAM,cAAc,GAAG,QAAQ,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;IAE5E,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,SAAS,GAAG,wBAAwB,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACzD,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,QAAqB;IACpD,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAkB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;AACrG,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,SAAwB;IACxD,OAAO,SAAS,CAAC,MAAM,CACrB,CAAC,CAAC,EAAE,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAoB,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,CACrG,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,OAAe;IACnD,OAAO;;;;sBAIa,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC;wBAC3B,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC;;;;;;;;;EASrD,OAAO,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import type Database from 'better-sqlite3';
|
|
2
|
-
export declare class ExtractionQueue {
|
|
3
|
-
private db;
|
|
4
|
-
constructor(db: Database.Database);
|
|
5
|
-
enqueue(documentId: string): void;
|
|
6
|
-
dequeue(): {
|
|
7
|
-
id: string;
|
|
8
|
-
document_id: string;
|
|
9
|
-
attempts: number;
|
|
10
|
-
} | null;
|
|
11
|
-
markComplete(jobId: string): void;
|
|
12
|
-
markFailed(jobId: string, error: string): void;
|
|
13
|
-
private cleanupStaleJobs;
|
|
14
|
-
pendingCount(): number;
|
|
15
|
-
}
|
|
16
|
-
//# sourceMappingURL=extraction-queue.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"extraction-queue.d.ts","sourceRoot":"","sources":["../../src/ingest/extraction-queue.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,QAAQ,MAAM,gBAAgB,CAAC;AAK3C,qBAAa,eAAe;IACd,OAAO,CAAC,EAAE;gBAAF,EAAE,EAAE,QAAQ,CAAC,QAAQ;IAIzC,OAAO,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAOjC,OAAO,IAAI;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI;IAWvE,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IASjC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;IAa9C,OAAO,CAAC,gBAAgB;IAIxB,YAAY,IAAI,MAAM;CAGvB"}
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
import { nanoid } from 'nanoid';
|
|
2
|
-
const MAX_RETRIES = 2;
|
|
3
|
-
export class ExtractionQueue {
|
|
4
|
-
db;
|
|
5
|
-
constructor(db) {
|
|
6
|
-
this.db = db;
|
|
7
|
-
this.cleanupStaleJobs();
|
|
8
|
-
}
|
|
9
|
-
enqueue(documentId) {
|
|
10
|
-
const now = new Date().toISOString();
|
|
11
|
-
this.db.prepare('INSERT OR IGNORE INTO extraction_jobs (id, document_id, status, attempts, created_at, updated_at) VALUES (?, ?, ?, 0, ?, ?)').run(nanoid(), documentId, 'pending', now, now);
|
|
12
|
-
}
|
|
13
|
-
dequeue() {
|
|
14
|
-
const job = this.db.prepare("SELECT id, document_id, attempts FROM extraction_jobs WHERE status = 'pending' ORDER BY created_at ASC LIMIT 1").get();
|
|
15
|
-
if (!job)
|
|
16
|
-
return null;
|
|
17
|
-
this.db.prepare("UPDATE extraction_jobs SET status = 'running', updated_at = ? WHERE id = ?")
|
|
18
|
-
.run(new Date().toISOString(), job.id);
|
|
19
|
-
return job;
|
|
20
|
-
}
|
|
21
|
-
markComplete(jobId) {
|
|
22
|
-
const now = new Date().toISOString();
|
|
23
|
-
this.db.prepare("UPDATE extraction_jobs SET status = 'complete', updated_at = ? WHERE id = ?").run(now, jobId);
|
|
24
|
-
const job = this.db.prepare('SELECT document_id FROM extraction_jobs WHERE id = ?').get(jobId);
|
|
25
|
-
if (job) {
|
|
26
|
-
this.db.prepare("UPDATE documents SET extraction_status = 'complete', updated_at = ? WHERE id = ?").run(now, job.document_id);
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
markFailed(jobId, error) {
|
|
30
|
-
const now = new Date().toISOString();
|
|
31
|
-
const job = this.db.prepare('SELECT document_id, attempts FROM extraction_jobs WHERE id = ?').get(jobId);
|
|
32
|
-
if (!job)
|
|
33
|
-
return;
|
|
34
|
-
if (job.attempts >= MAX_RETRIES) {
|
|
35
|
-
this.db.prepare("UPDATE extraction_jobs SET status = 'failed', error = ?, updated_at = ? WHERE id = ?").run(error, now, jobId);
|
|
36
|
-
this.db.prepare("UPDATE documents SET extraction_status = 'failed', updated_at = ? WHERE id = ?").run(now, job.document_id);
|
|
37
|
-
}
|
|
38
|
-
else {
|
|
39
|
-
this.db.prepare("UPDATE extraction_jobs SET status = 'pending', attempts = attempts + 1, error = ?, updated_at = ? WHERE id = ?").run(error, now, jobId);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
cleanupStaleJobs() {
|
|
43
|
-
this.db.prepare("UPDATE extraction_jobs SET status = 'pending' WHERE status = 'running'").run();
|
|
44
|
-
}
|
|
45
|
-
pendingCount() {
|
|
46
|
-
return this.db.prepare("SELECT COUNT(*) as count FROM extraction_jobs WHERE status = 'pending'").get().count;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
//# sourceMappingURL=extraction-queue.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"extraction-queue.js","sourceRoot":"","sources":["../../src/ingest/extraction-queue.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEhC,MAAM,WAAW,GAAG,CAAC,CAAC;AAEtB,MAAM,OAAO,eAAe;IACN;IAApB,YAAoB,EAAqB;QAArB,OAAE,GAAF,EAAE,CAAmB;QACvC,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC1B,CAAC;IAED,OAAO,CAAC,UAAkB;QACxB,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,EAAE,CAAC,OAAO,CACb,6HAA6H,CAC9H,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;IACnD,CAAC;IAED,OAAO;QACL,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CACzB,gHAAgH,CACjH,CAAC,GAAG,EAAuE,CAAC;QAC7E,IAAI,CAAC,GAAG;YAAE,OAAO,IAAI,CAAC;QAEtB,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,4EAA4E,CAAC;aAC1F,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACzC,OAAO,GAAG,CAAC;IACb,CAAC;IAED,YAAY,CAAC,KAAa;QACxB,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,6EAA6E,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAC/G,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sDAAsD,CAAC,CAAC,GAAG,CAAC,KAAK,CAAwC,CAAC;QACtI,IAAI,GAAG,EAAE,CAAC;YACR,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,kFAAkF,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,WAAW,CAAC,CAAC;QAChI,CAAC;IACH,CAAC;IAED,UAAU,CAAC,KAAa,EAAE,KAAa;QACrC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,gEAAgE,CAAC,CAAC,GAAG,CAAC,KAAK,CAA0D,CAAC;QAClK,IAAI,CAAC,GAAG;YAAE,OAAO;QAEjB,IAAI,GAAG,CAAC,QAAQ,IAAI,WAAW,EAAE,CAAC;YAChC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sFAAsF,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;YAC/H,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,gFAAgF,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,WAAW,CAAC,CAAC;QAC9H,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,gHAAgH,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;QAC3J,CAAC;IACH,CAAC;IAEO,gBAAgB;QACtB,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,wEAAwE,CAAC,CAAC,GAAG,EAAE,CAAC;IAClG,CAAC;IAED,YAAY;QACV,OAAQ,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,wEAAwE,CAAC,CAAC,GAAG,EAAwB,CAAC,KAAK,CAAC;IACtI,CAAC;CACF"}
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import type { PdfConfig, PdfMetadata } from '../types.js';
|
|
2
|
-
interface PdfExtractionResult {
|
|
3
|
-
content: string;
|
|
4
|
-
metadata: PdfMetadata;
|
|
5
|
-
}
|
|
6
|
-
export declare function extractPdf(filePath: string, config: PdfConfig): Promise<PdfExtractionResult>;
|
|
7
|
-
export declare function terminateOcrWorker(): Promise<void>;
|
|
8
|
-
export {};
|
|
9
|
-
//# sourceMappingURL=pdf-extractor.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pdf-extractor.d.ts","sourceRoot":"","sources":["../../src/ingest/pdf-extractor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1D,UAAU,mBAAmB;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;CACvB;AAID,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,SAAS,GAChB,OAAO,CAAC,mBAAmB,CAAC,CA6E9B;AAqCD,wBAAsB,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CASxD"}
|
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
import fs from 'node:fs';
|
|
2
|
-
let ocrWorker = null;
|
|
3
|
-
export async function extractPdf(filePath, config) {
|
|
4
|
-
const data = new Uint8Array(fs.readFileSync(filePath));
|
|
5
|
-
const pdfjsLib = await import('pdfjs-dist/legacy/build/pdf.mjs');
|
|
6
|
-
let doc;
|
|
7
|
-
try {
|
|
8
|
-
doc = await pdfjsLib.getDocument({ data, useSystemFonts: true }).promise;
|
|
9
|
-
}
|
|
10
|
-
catch (err) {
|
|
11
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
12
|
-
if (message.includes('password') || message.includes('encrypted')) {
|
|
13
|
-
throw new Error(`Password-protected PDF: ${filePath}`);
|
|
14
|
-
}
|
|
15
|
-
throw new Error(`Failed to parse PDF ${filePath}: ${message}`);
|
|
16
|
-
}
|
|
17
|
-
const pageCount = doc.numPages;
|
|
18
|
-
if (pageCount === 0) {
|
|
19
|
-
throw new Error(`Zero-page PDF: ${filePath}`);
|
|
20
|
-
}
|
|
21
|
-
const pagesToProcess = Math.min(pageCount, config.max_pages);
|
|
22
|
-
const truncated = pagesToProcess < pageCount;
|
|
23
|
-
const pageTexts = [];
|
|
24
|
-
const ocrPages = [];
|
|
25
|
-
for (let i = 1; i <= pagesToProcess; i++) {
|
|
26
|
-
const page = await doc.getPage(i);
|
|
27
|
-
const textContent = await page.getTextContent();
|
|
28
|
-
const text = textContent.items
|
|
29
|
-
.map(item => ('str' in item ? item.str : ''))
|
|
30
|
-
.join(' ')
|
|
31
|
-
.trim();
|
|
32
|
-
if (text.length >= config.ocr_threshold) {
|
|
33
|
-
pageTexts.push(text);
|
|
34
|
-
}
|
|
35
|
-
else if (config.ocr_enabled) {
|
|
36
|
-
try {
|
|
37
|
-
const ocrText = await ocrPage(page, config);
|
|
38
|
-
pageTexts.push(ocrText || text);
|
|
39
|
-
if (ocrText)
|
|
40
|
-
ocrPages.push(i);
|
|
41
|
-
}
|
|
42
|
-
catch {
|
|
43
|
-
pageTexts.push(text);
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
else {
|
|
47
|
-
pageTexts.push(text);
|
|
48
|
-
}
|
|
49
|
-
page.cleanup();
|
|
50
|
-
}
|
|
51
|
-
let content;
|
|
52
|
-
if (pageTexts.length <= 1) {
|
|
53
|
-
content = pageTexts[0] ?? '';
|
|
54
|
-
}
|
|
55
|
-
else {
|
|
56
|
-
content = pageTexts
|
|
57
|
-
.map((text, i) => (i > 0 ? `[PAGE ${i + 1}]\n\n${text}` : text))
|
|
58
|
-
.join('\n\n');
|
|
59
|
-
}
|
|
60
|
-
const extraction_method = ocrPages.length === 0
|
|
61
|
-
? 'native'
|
|
62
|
-
: ocrPages.length === pagesToProcess
|
|
63
|
-
? 'ocr'
|
|
64
|
-
: 'mixed';
|
|
65
|
-
return {
|
|
66
|
-
content,
|
|
67
|
-
metadata: {
|
|
68
|
-
page_count: pageCount,
|
|
69
|
-
ocr_pages: ocrPages,
|
|
70
|
-
languages: ocrPages.length > 0 ? config.ocr_languages : [],
|
|
71
|
-
extraction_method,
|
|
72
|
-
...(truncated ? { truncated: true } : {}),
|
|
73
|
-
},
|
|
74
|
-
};
|
|
75
|
-
}
|
|
76
|
-
async function ocrPage(page, config) {
|
|
77
|
-
try {
|
|
78
|
-
const Tesseract = await import('tesseract.js');
|
|
79
|
-
if (!ocrWorker) {
|
|
80
|
-
ocrWorker = await Tesseract.createWorker(config.ocr_languages.join('+'));
|
|
81
|
-
}
|
|
82
|
-
const worker = ocrWorker;
|
|
83
|
-
const viewport = page.getViewport({ scale: 2.0 });
|
|
84
|
-
// Dynamic import for optional native dependency — canvas is only needed for OCR
|
|
85
|
-
const { createCanvas } = await import(/* webpackIgnore: true */ 'canvas');
|
|
86
|
-
const canvas = createCanvas(viewport.width, viewport.height);
|
|
87
|
-
const context = canvas.getContext('2d');
|
|
88
|
-
await page.render({
|
|
89
|
-
canvasContext: context,
|
|
90
|
-
viewport,
|
|
91
|
-
}).promise;
|
|
92
|
-
const imageBuffer = canvas.toBuffer('image/png');
|
|
93
|
-
const result = await worker.recognize(imageBuffer);
|
|
94
|
-
return result.data.text.trim();
|
|
95
|
-
}
|
|
96
|
-
catch (err) {
|
|
97
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
98
|
-
if (message.includes('Cannot find module') || message.includes('MODULE_NOT_FOUND')) {
|
|
99
|
-
console.warn('[pdf-extractor] OCR dependencies not available — skipping OCR');
|
|
100
|
-
return '';
|
|
101
|
-
}
|
|
102
|
-
throw err;
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
export async function terminateOcrWorker() {
|
|
106
|
-
if (ocrWorker) {
|
|
107
|
-
try {
|
|
108
|
-
await ocrWorker.terminate();
|
|
109
|
-
}
|
|
110
|
-
catch {
|
|
111
|
-
// Ignore termination errors
|
|
112
|
-
}
|
|
113
|
-
ocrWorker = null;
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
//# sourceMappingURL=pdf-extractor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pdf-extractor.js","sourceRoot":"","sources":["../../src/ingest/pdf-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AAQzB,IAAI,SAAS,GAAmB,IAAI,CAAC;AAErC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,QAAgB,EAChB,MAAiB;IAEjB,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC;IAEvD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,iCAAiC,CAAC,CAAC;IAEjE,IAAI,GAAG,CAAC;IACR,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,WAAW,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC;IAC3E,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YAClE,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,EAAE,CAAC,CAAC;QACzD,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,KAAK,OAAO,EAAE,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,SAAS,GAAG,GAAG,CAAC,QAAQ,CAAC;IAE/B,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,kBAAkB,QAAQ,EAAE,CAAC,CAAC;IAChD,CAAC;IAED,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,cAAc,GAAG,SAAS,CAAC;IAC7C,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,cAAc,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAChD,MAAM,IAAI,GAAG,WAAW,CAAC,KAAK;aAC3B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;aAC5C,IAAI,CAAC,GAAG,CAAC;aACT,IAAI,EAAE,CAAC;QAEV,IAAI,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACxC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,CAAC;aAAM,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;YAC9B,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;gBAC5C,SAAS,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,CAAC;gBAChC,IAAI,OAAO;oBAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAChC,CAAC;YAAC,MAAM,CAAC;gBACP,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,CAAC;QAED,IAAI,CAAC,OAAO,EAAE,CAAC;IACjB,CAAC;IAED,IAAI,OAAe,CAAC;IACpB,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,SAAS;aAChB,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;aAC/D,IAAI,CAAC,MAAM,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,KAAK,CAAC;QAC7C,CAAC,CAAC,QAAQ;QACV,CAAC,CAAC,QAAQ,CAAC,MAAM,KAAK,cAAc;YAClC,CAAC,CAAC,KAAK;YACP,CAAC,CAAC,OAAO,CAAC;IAEd,OAAO;QACL,OAAO;QACP,QAAQ,EAAE;YACR,UAAU,EAAE,SAAS;YACrB,SAAS,EAAE,QAAQ;YACnB,SAAS,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE;YAC1D,iBAAiB;YACjB,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC1C;KACF,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,OAAO,CAAC,IAAa,EAAE,MAAiB;IACrD,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;QAC/C,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,SAAS,GAAG,MAAM,SAAS,CAAC,YAAY,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QAC3E,CAAC;QACD,MAAM,MAAM,GAAG,SAAmF,CAAC;QAEnG,MAAM,QAAQ,GAAI,IAA8D,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QAC7G,gFAAgF;QAChF,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,yBAAyB,CAAC,QAAkB,CAAC,CAAC;QACpF,MAAM,MAAM,GAAG,YAAY,CACxB,QAA8B,CAAC,KAAK,EACpC,QAA+B,CAAC,MAAM,CACxC,CAAC;QACF,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAExC,MAAO,IAAkE,CAAC,MAAM,CAAC;YAC/E,aAAa,EAAE,OAAO;YACtB,QAAQ;SACT,CAAC,CAAC,OAAO,CAAC;QAEX,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;QACjD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;QACnD,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IACjC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,IAAI,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;YACnF,OAAO,CAAC,IAAI,CAAC,+DAA+D,CAAC,CAAC;YAC9E,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,MAAM,GAAG,CAAC;IACZ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB;IACtC,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,MAAO,SAAgD,CAAC,SAAS,EAAE,CAAC;QACtE,CAAC;QAAC,MAAM,CAAC;YACP,4BAA4B;QAC9B,CAAC;QACD,SAAS,GAAG,IAAI,CAAC;IACnB,CAAC;AACH,CAAC"}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import type Database from 'better-sqlite3';
|
|
2
|
-
import type { Embedder, SourceType } from '../types.js';
|
|
3
|
-
interface ChunkerConfig {
|
|
4
|
-
max_tokens: number;
|
|
5
|
-
overlap: number;
|
|
6
|
-
}
|
|
7
|
-
interface IngestOptions {
|
|
8
|
-
squad_code?: string;
|
|
9
|
-
agent_id?: string;
|
|
10
|
-
source_type?: SourceType;
|
|
11
|
-
tags?: string[];
|
|
12
|
-
}
|
|
13
|
-
interface IngestResult {
|
|
14
|
-
document_id: string;
|
|
15
|
-
chunks_count: number;
|
|
16
|
-
skipped: boolean;
|
|
17
|
-
}
|
|
18
|
-
export declare class IngestPipeline {
|
|
19
|
-
private db;
|
|
20
|
-
private embedder;
|
|
21
|
-
private chunkerConfig;
|
|
22
|
-
constructor(db: Database.Database, embedder: Embedder, chunkerConfig: ChunkerConfig);
|
|
23
|
-
ingest(filePath: string, options?: IngestOptions): Promise<IngestResult>;
|
|
24
|
-
ingestText(content: string, title: string, options?: IngestOptions): Promise<IngestResult>;
|
|
25
|
-
}
|
|
26
|
-
export {};
|
|
27
|
-
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/ingest/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,QAAQ,MAAM,gBAAgB,CAAC;AAI3C,OAAO,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAe,MAAM,aAAa,CAAC;AAErE,UAAU,aAAa;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,aAAa;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,UAAU,CAAC;IACzB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,UAAU,YAAY;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,qBAAa,cAAc;IAEvB,OAAO,CAAC,EAAE;IACV,OAAO,CAAC,QAAQ;IAChB,OAAO,CAAC,aAAa;gBAFb,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,QAAQ,EAAE,QAAQ,EAClB,aAAa,EAAE,aAAa;IAGhC,MAAM,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,YAAY,CAAC;IAmFxE,UAAU,CACd,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,YAAY,CAAC;CA6CzB"}
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
import fs from 'node:fs';
|
|
2
|
-
import { nanoid } from 'nanoid';
|
|
3
|
-
import { loadDocument, computeHash } from './document-loader.js';
|
|
4
|
-
import { chunkMarkdown, chunkPlainText } from './chunker.js';
|
|
5
|
-
export class IngestPipeline {
|
|
6
|
-
db;
|
|
7
|
-
embedder;
|
|
8
|
-
chunkerConfig;
|
|
9
|
-
constructor(db, embedder, chunkerConfig) {
|
|
10
|
-
this.db = db;
|
|
11
|
-
this.embedder = embedder;
|
|
12
|
-
this.chunkerConfig = chunkerConfig;
|
|
13
|
-
}
|
|
14
|
-
async ingest(filePath, options) {
|
|
15
|
-
const parsed = await loadDocument(filePath);
|
|
16
|
-
const contentHash = parsed.mime_type === 'application/pdf'
|
|
17
|
-
? computeHash(fs.readFileSync(filePath))
|
|
18
|
-
: computeHash(parsed.content);
|
|
19
|
-
const sourceType = options?.source_type ?? 'external';
|
|
20
|
-
// Check for existing document with same path and hash
|
|
21
|
-
const existing = this.db
|
|
22
|
-
.prepare('SELECT id FROM documents WHERE source_path = ? AND content_hash = ?')
|
|
23
|
-
.get(filePath, contentHash);
|
|
24
|
-
if (existing) {
|
|
25
|
-
return { document_id: existing.id, chunks_count: 0, skipped: true };
|
|
26
|
-
}
|
|
27
|
-
// Upsert: delete old document with same source_path (CASCADE deletes chunks)
|
|
28
|
-
this.db.prepare('DELETE FROM documents WHERE source_path = ?').run(filePath);
|
|
29
|
-
const docId = nanoid();
|
|
30
|
-
const now = new Date().toISOString();
|
|
31
|
-
const docMetadata = {
|
|
32
|
-
...(options?.tags ? { tags: options.tags } : {}),
|
|
33
|
-
...(parsed.metadata ? parsed.metadata : {}),
|
|
34
|
-
};
|
|
35
|
-
const metadata = Object.keys(docMetadata).length > 0 ? JSON.stringify(docMetadata) : null;
|
|
36
|
-
this.db
|
|
37
|
-
.prepare(`INSERT INTO documents (id, source_type, source_path, squad_code, agent_id, title, mime_type, content_hash, extraction_status, metadata, ingested_at, updated_at)
|
|
38
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?, ?, ?)`)
|
|
39
|
-
.run(docId, sourceType, filePath, options?.squad_code ?? null, options?.agent_id ?? null, parsed.title, parsed.mime_type, contentHash, metadata, now, now);
|
|
40
|
-
// Chunk
|
|
41
|
-
const chunks = parsed.mime_type === 'application/pdf'
|
|
42
|
-
? chunkPlainText(parsed.content, this.chunkerConfig)
|
|
43
|
-
: chunkMarkdown(parsed.content, this.chunkerConfig);
|
|
44
|
-
// Store chunks + embeddings
|
|
45
|
-
const insertChunk = this.db.prepare('INSERT INTO chunks (id, document_id, content, chunk_index, token_count, metadata, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)');
|
|
46
|
-
const insertEmbedding = this.db.prepare('INSERT INTO chunk_embeddings_store (chunk_id, embedding) VALUES (?, ?)');
|
|
47
|
-
// Batch embed
|
|
48
|
-
const texts = chunks.map((c) => c.content);
|
|
49
|
-
const embeddings = texts.length > 0 ? await this.embedder.embed(texts) : [];
|
|
50
|
-
const transaction = this.db.transaction(() => {
|
|
51
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
52
|
-
const chunkId = nanoid();
|
|
53
|
-
insertChunk.run(chunkId, docId, chunks[i].content, chunks[i].chunk_index, chunks[i].token_count, JSON.stringify(chunks[i].metadata), now);
|
|
54
|
-
insertEmbedding.run(chunkId, JSON.stringify(embeddings[i]));
|
|
55
|
-
}
|
|
56
|
-
});
|
|
57
|
-
transaction();
|
|
58
|
-
return { document_id: docId, chunks_count: chunks.length, skipped: false };
|
|
59
|
-
}
|
|
60
|
-
async ingestText(content, title, options) {
|
|
61
|
-
const contentHash = computeHash(content);
|
|
62
|
-
const existing = this.db
|
|
63
|
-
.prepare('SELECT id FROM documents WHERE content_hash = ? AND source_path IS NULL AND title = ?')
|
|
64
|
-
.get(contentHash, title);
|
|
65
|
-
if (existing) {
|
|
66
|
-
return { document_id: existing.id, chunks_count: 0, skipped: true };
|
|
67
|
-
}
|
|
68
|
-
const docId = nanoid();
|
|
69
|
-
const now = new Date().toISOString();
|
|
70
|
-
const sourceType = options?.source_type ?? 'external';
|
|
71
|
-
const metadata = options?.tags ? JSON.stringify({ tags: options.tags }) : null;
|
|
72
|
-
this.db
|
|
73
|
-
.prepare(`INSERT INTO documents (id, source_type, source_path, squad_code, agent_id, title, mime_type, content_hash, extraction_status, metadata, ingested_at, updated_at)
|
|
74
|
-
VALUES (?, ?, NULL, ?, ?, ?, 'text/markdown', ?, 'pending', ?, ?, ?)`)
|
|
75
|
-
.run(docId, sourceType, options?.squad_code ?? null, options?.agent_id ?? null, title, contentHash, metadata, now, now);
|
|
76
|
-
const chunks = chunkMarkdown(content, this.chunkerConfig);
|
|
77
|
-
const texts = chunks.map((c) => c.content);
|
|
78
|
-
const embeddings = texts.length > 0 ? await this.embedder.embed(texts) : [];
|
|
79
|
-
const insertChunk = this.db.prepare('INSERT INTO chunks (id, document_id, content, chunk_index, token_count, metadata, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)');
|
|
80
|
-
const insertEmbedding = this.db.prepare('INSERT INTO chunk_embeddings_store (chunk_id, embedding) VALUES (?, ?)');
|
|
81
|
-
const transaction = this.db.transaction(() => {
|
|
82
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
83
|
-
const chunkId = nanoid();
|
|
84
|
-
insertChunk.run(chunkId, docId, chunks[i].content, chunks[i].chunk_index, chunks[i].token_count, JSON.stringify(chunks[i].metadata), now);
|
|
85
|
-
insertEmbedding.run(chunkId, JSON.stringify(embeddings[i]));
|
|
86
|
-
}
|
|
87
|
-
});
|
|
88
|
-
transaction();
|
|
89
|
-
return { document_id: docId, chunks_count: chunks.length, skipped: false };
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
//# sourceMappingURL=pipeline.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/ingest/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AAEzB,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACjE,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAqB7D,MAAM,OAAO,cAAc;IAEf;IACA;IACA;IAHV,YACU,EAAqB,EACrB,QAAkB,EAClB,aAA4B;QAF5B,OAAE,GAAF,EAAE,CAAmB;QACrB,aAAQ,GAAR,QAAQ,CAAU;QAClB,kBAAa,GAAb,aAAa,CAAe;IACnC,CAAC;IAEJ,KAAK,CAAC,MAAM,CAAC,QAAgB,EAAE,OAAuB;QACpD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,CAAC;QAC5C,MAAM,WAAW,GAAG,MAAM,CAAC,SAAS,KAAK,iBAAiB;YACxD,CAAC,CAAC,WAAW,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;YACxC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAChC,MAAM,UAAU,GAAG,OAAO,EAAE,WAAW,IAAI,UAAU,CAAC;QAEtD,sDAAsD;QACtD,MAAM,QAAQ,GAAG,IAAI,CAAC,EAAE;aACrB,OAAO,CAAC,qEAAqE,CAAC;aAC9E,GAAG,CAAC,QAAQ,EAAE,WAAW,CAA+B,CAAC;QAE5D,IAAI,QAAQ,EAAE,CAAC;YACb,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE,EAAE,YAAY,EAAE,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;QACtE,CAAC;QAED,6EAA6E;QAC7E,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,6CAA6C,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE7E,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,WAAW,GAAG;YAClB,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAChD,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;SAC5C,CAAC;QACF,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAE1F,IAAI,CAAC,EAAE;aACJ,OAAO,CACN;6DACqD,CACtD;aACA,GAAG,CACF,KAAK,EACL,UAAU,EACV,QAAQ,EACR,OAAO,EAAE,UAAU,IAAI,IAAI,EAC3B,OAAO,EAAE,QAAQ,IAAI,IAAI,EACzB,MAAM,CAAC,KAAK,EACZ,MAAM,CAAC,SAAS,EAChB,WAAW,EACX,QAAQ,EACR,GAAG,EACH,GAAG,CACJ,CAAC;QAEJ,QAAQ;QACR,MAAM,MAAM,GAAkB,MAAM,CAAC,SAAS,KAAK,iBAAiB;YAClE,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,aAAa,CAAC;YACpD,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QAEtD,4BAA4B;QAC5B,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CACjC,4HAA4H,CAC7H,CAAC;QACF,MAAM,eAAe,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CACrC,wEAAwE,CACzE,CAAC;QAEF,cAAc;QACd,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAC3C,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE5E,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC;gBACzB,WAAW,CAAC,GAAG,CACb,OAAO,EACP,KAAK,EACL,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,EACjB,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EACrB,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EACrB,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,EAClC,GAAG,CACJ,CAAC;gBACF,eAAe,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9D,CAAC;QACH,CAAC,CAAC,CAAC;QACH,WAAW,EAAE,CAAC;QAEd,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC7E,CAAC;IAED,KAAK,CAAC,UAAU,CACd,OAAe,EACf,KAAa,EACb,OAAuB;QAEvB,MAAM,WAAW,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,QAAQ,GAAG,IAAI,CAAC,EAAE;aACrB,OAAO,CAAC,uFAAuF,CAAC;aAChG,GAAG,CAAC,WAAW,EAAE,KAAK,CAA+B,CAAC;QAEzD,IAAI,QAAQ,EAAE,CAAC;YACb,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE,EAAE,YAAY,EAAE,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;QACtE,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,UAAU,GAAG,OAAO,EAAE,WAAW,IAAI,UAAU,CAAC;QACtD,MAAM,QAAQ,GAAG,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAE/E,IAAI,CAAC,EAAE;aACJ,OAAO,CACN;8EACsE,CACvE;aACA,GAAG,CAAC,KAAK,EAAE,UAAU,EAAE,OAAO,EAAE,UAAU,IAAI,IAAI,EAAE,OAAO,EAAE,QAAQ,IAAI,IAAI,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAE1H,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAC3C,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE5E,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CACjC,4HAA4H,CAC7H,CAAC;QACF,MAAM,eAAe,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CACrC,wEAAwE,CACzE,CAAC;QAEF,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC;gBACzB,WAAW,CAAC,GAAG,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,GAAG,CAAC,CAAC;gBAC1I,eAAe,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9D,CAAC;QACH,CAAC,CAAC,CAAC;QACH,WAAW,EAAE,CAAC;QAEd,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC7E,CAAC;CACF"}
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import type Database from 'better-sqlite3';
|
|
2
|
-
import type { RelationType } from '../types.js';
|
|
3
|
-
interface TraversalOptions {
|
|
4
|
-
maxDepth: number;
|
|
5
|
-
relationType?: RelationType;
|
|
6
|
-
}
|
|
7
|
-
interface TraversalResult {
|
|
8
|
-
source: {
|
|
9
|
-
id: string;
|
|
10
|
-
name: string;
|
|
11
|
-
entity_type: string;
|
|
12
|
-
};
|
|
13
|
-
target: {
|
|
14
|
-
id: string;
|
|
15
|
-
name: string;
|
|
16
|
-
entity_type: string;
|
|
17
|
-
};
|
|
18
|
-
relation_type: string;
|
|
19
|
-
description: string | null;
|
|
20
|
-
weight: number;
|
|
21
|
-
depth: number;
|
|
22
|
-
}
|
|
23
|
-
export declare class GraphTraversal {
|
|
24
|
-
private db;
|
|
25
|
-
constructor(db: Database.Database);
|
|
26
|
-
findEntityByName(name: string): {
|
|
27
|
-
id: string;
|
|
28
|
-
name: string;
|
|
29
|
-
entity_type: string;
|
|
30
|
-
} | null;
|
|
31
|
-
findEntitiesByType(type: string): Array<{
|
|
32
|
-
id: string;
|
|
33
|
-
name: string;
|
|
34
|
-
entity_type: string;
|
|
35
|
-
}>;
|
|
36
|
-
traverse(entityId: string, options: TraversalOptions): TraversalResult[];
|
|
37
|
-
private dfs;
|
|
38
|
-
buildNarrative(results: TraversalResult[]): string;
|
|
39
|
-
}
|
|
40
|
-
export {};
|
|
41
|
-
//# sourceMappingURL=graph-traversal.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"graph-traversal.d.ts","sourceRoot":"","sources":["../../src/query/graph-traversal.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,QAAQ,MAAM,gBAAgB,CAAC;AAC3C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,UAAU,gBAAgB;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,YAAY,CAAC;CAC7B;AAED,UAAU,eAAe;IACvB,MAAM,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1D,MAAM,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1D,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;CACf;AAED,qBAAa,cAAc;IACb,OAAO,CAAC,EAAE;gBAAF,EAAE,EAAE,QAAQ,CAAC,QAAQ;IAEzC,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI;IAOxF,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAM1F,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,GAAG,eAAe,EAAE;IAOxE,OAAO,CAAC,GAAG;IAsDX,cAAc,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,MAAM;CAUnD"}
|