@itkoren/sqmd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/CHANGELOG.md +46 -0
  2. package/LICENSE +21 -0
  3. package/README.md +1052 -0
  4. package/dist/api/app.d.ts +14 -0
  5. package/dist/api/app.d.ts.map +1 -0
  6. package/dist/api/app.js +32 -0
  7. package/dist/api/app.js.map +1 -0
  8. package/dist/api/middleware.d.ts +5 -0
  9. package/dist/api/middleware.d.ts.map +1 -0
  10. package/dist/api/middleware.js +37 -0
  11. package/dist/api/middleware.js.map +1 -0
  12. package/dist/api/models.d.ts +178 -0
  13. package/dist/api/models.d.ts.map +1 -0
  14. package/dist/api/models.js +39 -0
  15. package/dist/api/models.js.map +1 -0
  16. package/dist/api/routes/documents.d.ts +4 -0
  17. package/dist/api/routes/documents.d.ts.map +1 -0
  18. package/dist/api/routes/documents.js +92 -0
  19. package/dist/api/routes/documents.js.map +1 -0
  20. package/dist/api/routes/health.d.ts +6 -0
  21. package/dist/api/routes/health.d.ts.map +1 -0
  22. package/dist/api/routes/health.js +38 -0
  23. package/dist/api/routes/health.js.map +1 -0
  24. package/dist/api/routes/index.d.ts +5 -0
  25. package/dist/api/routes/index.d.ts.map +1 -0
  26. package/dist/api/routes/index.js +83 -0
  27. package/dist/api/routes/index.js.map +1 -0
  28. package/dist/api/routes/search.d.ts +6 -0
  29. package/dist/api/routes/search.d.ts.map +1 -0
  30. package/dist/api/routes/search.js +104 -0
  31. package/dist/api/routes/search.js.map +1 -0
  32. package/dist/config/loader.d.ts +4 -0
  33. package/dist/config/loader.d.ts.map +1 -0
  34. package/dist/config/loader.js +144 -0
  35. package/dist/config/loader.js.map +1 -0
  36. package/dist/config/schema.d.ts +298 -0
  37. package/dist/config/schema.d.ts.map +1 -0
  38. package/dist/config/schema.js +50 -0
  39. package/dist/config/schema.js.map +1 -0
  40. package/dist/embeddings/ollama.d.ts +14 -0
  41. package/dist/embeddings/ollama.d.ts.map +1 -0
  42. package/dist/embeddings/ollama.js +46 -0
  43. package/dist/embeddings/ollama.js.map +1 -0
  44. package/dist/embeddings/transformers.d.ts +14 -0
  45. package/dist/embeddings/transformers.d.ts.map +1 -0
  46. package/dist/embeddings/transformers.js +64 -0
  47. package/dist/embeddings/transformers.js.map +1 -0
  48. package/dist/embeddings/types.d.ts +6 -0
  49. package/dist/embeddings/types.d.ts.map +1 -0
  50. package/dist/embeddings/types.js +2 -0
  51. package/dist/embeddings/types.js.map +1 -0
  52. package/dist/index.d.ts +3 -0
  53. package/dist/index.d.ts.map +1 -0
  54. package/dist/index.js +233 -0
  55. package/dist/index.js.map +1 -0
  56. package/dist/ingestion/chunker.d.ts +21 -0
  57. package/dist/ingestion/chunker.d.ts.map +1 -0
  58. package/dist/ingestion/chunker.js +117 -0
  59. package/dist/ingestion/chunker.js.map +1 -0
  60. package/dist/ingestion/fingerprint.d.ts +6 -0
  61. package/dist/ingestion/fingerprint.d.ts.map +1 -0
  62. package/dist/ingestion/fingerprint.js +17 -0
  63. package/dist/ingestion/fingerprint.js.map +1 -0
  64. package/dist/ingestion/parser.d.ts +16 -0
  65. package/dist/ingestion/parser.d.ts.map +1 -0
  66. package/dist/ingestion/parser.js +98 -0
  67. package/dist/ingestion/parser.js.map +1 -0
  68. package/dist/ingestion/pipeline.d.ts +32 -0
  69. package/dist/ingestion/pipeline.d.ts.map +1 -0
  70. package/dist/ingestion/pipeline.js +191 -0
  71. package/dist/ingestion/pipeline.js.map +1 -0
  72. package/dist/ingestion/scanner.d.ts +2 -0
  73. package/dist/ingestion/scanner.d.ts.map +1 -0
  74. package/dist/ingestion/scanner.js +54 -0
  75. package/dist/ingestion/scanner.js.map +1 -0
  76. package/dist/mcp/server.d.ts +8 -0
  77. package/dist/mcp/server.d.ts.map +1 -0
  78. package/dist/mcp/server.js +73 -0
  79. package/dist/mcp/server.js.map +1 -0
  80. package/dist/mcp/tools.d.ts +6 -0
  81. package/dist/mcp/tools.d.ts.map +1 -0
  82. package/dist/mcp/tools.js +276 -0
  83. package/dist/mcp/tools.js.map +1 -0
  84. package/dist/rag/context-builder.d.ts +3 -0
  85. package/dist/rag/context-builder.d.ts.map +1 -0
  86. package/dist/rag/context-builder.js +27 -0
  87. package/dist/rag/context-builder.js.map +1 -0
  88. package/dist/rag/prompt-templates.d.ts +5 -0
  89. package/dist/rag/prompt-templates.d.ts.map +1 -0
  90. package/dist/rag/prompt-templates.js +41 -0
  91. package/dist/rag/prompt-templates.js.map +1 -0
  92. package/dist/search/hybrid.d.ts +14 -0
  93. package/dist/search/hybrid.d.ts.map +1 -0
  94. package/dist/search/hybrid.js +58 -0
  95. package/dist/search/hybrid.js.map +1 -0
  96. package/dist/search/query.d.ts +4 -0
  97. package/dist/search/query.d.ts.map +1 -0
  98. package/dist/search/query.js +23 -0
  99. package/dist/search/query.js.map +1 -0
  100. package/dist/search/reranker.d.ts +11 -0
  101. package/dist/search/reranker.d.ts.map +1 -0
  102. package/dist/search/reranker.js +44 -0
  103. package/dist/search/reranker.js.map +1 -0
  104. package/dist/store/db.d.ts +11 -0
  105. package/dist/store/db.d.ts.map +1 -0
  106. package/dist/store/db.js +75 -0
  107. package/dist/store/db.js.map +1 -0
  108. package/dist/store/reader.d.ts +8 -0
  109. package/dist/store/reader.d.ts.map +1 -0
  110. package/dist/store/reader.js +122 -0
  111. package/dist/store/reader.js.map +1 -0
  112. package/dist/store/schema.d.ts +39 -0
  113. package/dist/store/schema.d.ts.map +1 -0
  114. package/dist/store/schema.js +33 -0
  115. package/dist/store/schema.js.map +1 -0
  116. package/dist/store/writer.d.ts +6 -0
  117. package/dist/store/writer.d.ts.map +1 -0
  118. package/dist/store/writer.js +43 -0
  119. package/dist/store/writer.js.map +1 -0
  120. package/dist/watcher/daemon.d.ts +5 -0
  121. package/dist/watcher/daemon.d.ts.map +1 -0
  122. package/dist/watcher/daemon.js +43 -0
  123. package/dist/watcher/daemon.js.map +1 -0
  124. package/dist/watcher/handler.d.ts +14 -0
  125. package/dist/watcher/handler.d.ts.map +1 -0
  126. package/dist/watcher/handler.js +82 -0
  127. package/dist/watcher/handler.js.map +1 -0
  128. package/package.json +56 -0
@@ -0,0 +1,98 @@
1
+ import * as path from 'node:path';
2
+ import remarkParse from 'remark-parse';
3
+ import { unified } from 'unified';
4
+ function extractNodeText(node) {
5
+ const parts = [];
6
+ if ('value' in node && typeof node.value === 'string') {
7
+ parts.push(node.value);
8
+ }
9
+ if ('children' in node && Array.isArray(node.children)) {
10
+ for (const child of node.children) {
11
+ parts.push(extractNodeText(child));
12
+ }
13
+ }
14
+ return parts.join('');
15
+ }
16
+ function getNodeStart(node) {
17
+ return node.position?.start?.line ?? 0;
18
+ }
19
+ function getNodeEnd(node) {
20
+ return node.position?.end?.line ?? 0;
21
+ }
22
+ export function parseMarkdown(content, filePath) {
23
+ const processor = unified().use(remarkParse);
24
+ const ast = processor.parse(content);
25
+ const sections = [];
26
+ const headingStack = [];
27
+ let currentSectionNodes = [];
28
+ let currentSectionStart = 1;
29
+ let currentHeadingLevel = 0;
30
+ let currentHeadingText = '';
31
+ function flushSection(endLine) {
32
+ const sectionText = currentSectionNodes
33
+ .map((n) => extractNodeText(n))
34
+ .filter((t) => t.trim().length > 0)
35
+ .join('\n\n');
36
+ const parentHeadings = headingStack.map((h) => h.text);
37
+ const headingPath = headingStack.length > 0 ? headingStack.map((h) => h.text).join(' > ') : '';
38
+ sections.push({
39
+ headingPath,
40
+ headingText: currentHeadingText,
41
+ headingLevel: currentHeadingLevel,
42
+ lineStart: currentSectionStart,
43
+ lineEnd: endLine,
44
+ content: sectionText,
45
+ parentHeadings: [...parentHeadings],
46
+ });
47
+ currentSectionNodes = [];
48
+ }
49
+ const children = ast.children;
50
+ let i = 0;
51
+ while (i < children.length) {
52
+ const node = children[i];
53
+ if (node.type === 'heading') {
54
+ const headingNode = node;
55
+ const level = headingNode.depth;
56
+ const headingText = extractNodeText(headingNode);
57
+ const lineStart = getNodeStart(headingNode);
58
+ // Flush previous section
59
+ if (currentSectionNodes.length > 0 || currentHeadingLevel > 0) {
60
+ flushSection(lineStart - 1);
61
+ }
62
+ // Update heading stack
63
+ // Pop headings at same or deeper level
64
+ while (headingStack.length > 0 && headingStack[headingStack.length - 1].level >= level) {
65
+ headingStack.pop();
66
+ }
67
+ // Push current heading to parent stack for next sections
68
+ currentHeadingLevel = level;
69
+ currentHeadingText = headingText;
70
+ currentSectionStart = lineStart;
71
+ // The heading itself becomes the first node
72
+ currentSectionNodes = [headingNode];
73
+ // Push to stack for children headings
74
+ headingStack.push({ level, text: headingText });
75
+ }
76
+ else {
77
+ if (currentSectionNodes.length === 0 && sections.length === 0 && headingStack.length === 0) {
78
+ // Preamble content before first heading
79
+ currentSectionStart = getNodeStart(node);
80
+ }
81
+ currentSectionNodes.push(node);
82
+ }
83
+ i++;
84
+ }
85
+ // Flush last section
86
+ if (currentSectionNodes.length > 0 || (sections.length === 0 && children.length > 0)) {
87
+ const lastLine = children.length > 0 ? getNodeEnd(children[children.length - 1]) : 0;
88
+ flushSection(lastLine);
89
+ }
90
+ return {
91
+ filePath,
92
+ sections,
93
+ };
94
+ }
95
+ export function getFileStem(filePath) {
96
+ return path.basename(filePath, path.extname(filePath));
97
+ }
98
+ //# sourceMappingURL=parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/ingestion/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,WAAW,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAsBlC,SAAS,eAAe,CAAC,IAAU;IACjC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,OAAO,IAAI,IAAI,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;QACtD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzB,CAAC;IAED,IAAI,UAAU,IAAI,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAkB,EAAE,CAAC;YAC5C,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,CAAC;QACrC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACxB,CAAC;AAED,SAAS,YAAY,CAAC,IAAU;IAC9B,OAAO,IAAI,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,IAAI,CAAC,CAAC;AACzC,CAAC;AAED,SAAS,UAAU,CAAC,IAAU;IAC5B,OAAO,IAAI,CAAC,QAAQ,EAAE,GAAG,EAAE,IAAI,IAAI,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,OAAe,EAAE,QAAgB;IAC7D,MAAM,SAAS,GAAG,OAAO,EAAE,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,GAAG,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAS,CAAC;IAE7C,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,MAAM,YAAY,GAAwB,EAAE,CAAC;IAE7C,IAAI,mBAAmB,GAAW,EAAE,CAAC;IACrC,IAAI,mBAAmB,GAAG,CAAC,CAAC;IAC5B,IAAI,mBAAmB,GAAG,CAAC,CAAC;IAC5B,IAAI,kBAAkB,GAAG,EAAE,CAAC;IAE5B,SAAS,YAAY,CAAC,OAAe;QACnC,MAAM,WAAW,GAAG,mBAAmB;aACpC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;aAC9B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;aAClC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,cAAc,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,WAAW,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE/F,QAAQ,CAAC,IAAI,CAAC;YACZ,WAAW;YACX,WAAW,EAAE,kBAAkB;YAC/B,YAAY,EAAE,mBAAmB;YACjC,SAAS,EAAE,mBAAmB;YAC9B,OAAO,EAAE,OAAO;YAChB,OAAO,EAAE,WAAW;YACpB,cAAc,EAAE,CAAC,GAAG,cAAc,CAAC;SACpC,CAAC,CAAC;QAEH,mBAAmB,GAAG,EAAE,CAAC;IAC3B,CAAC;IAED,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;IAC9B,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;QAE1B,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YAC5B,MAAM,WAAW,GAAG,IAAe,CAAC;YACpC,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC;YAChC,MAAM,WAAW,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC;YACjD,MAAM,SAAS,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;YAE5C,yBAAyB;YACzB,IAAI,mBAAmB,CAAC,MAAM,GAAG,CAAC,IAAI,mBAAmB,GAAG,CAAC,EAAE,CAAC;gBAC9D,YAAY,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YAC9B,CAAC;YAED,uBAAuB;YACvB,uCAAuC;YACvC,OAAO,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,KAAK,IAAI,KAAK,EAAE,CAAC;gBACxF,YAAY,CAAC,GAAG,EAAE,CAAC;YACrB,CAAC;YAED,yDAAyD;YACzD,mBAAmB,GAAG,KAAK,CAAC;YAC5B,kBAAkB,GAAG,WAAW,CAAC;YACjC,mBAAmB,GAAG,SAAS,CAAC;YAEhC,4CAA4C;YAC5C,mBAAmB,GAAG,CAAC,WAAW,CAAC,CAAC;YAEpC,sCAAsC;YACtC,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACN,IAAI,mBAAmB,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3F,wCAAwC;gBACxC,mBAAmB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;YAC3C,CAAC;YACD,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC;QAED,CAAC,EAAE,CAAC;IACN,CAAC;IAED,qBAAqB;IACrB,IAAI,mBAAmB,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;QACrF,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACtF,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzB,CAAC;IAED,OAAO;QACL,QAAQ;QACR,QAAQ;KACT,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;AACzD,CAAC"}
@@ -0,0 +1,32 @@
1
+ import type { Config } from '../config/schema.js';
2
+ export interface FileError {
3
+ filePath: string;
4
+ error: string;
5
+ }
6
+ export interface IndexResult {
7
+ indexed: number;
8
+ skipped: number;
9
+ errors: FileError[];
10
+ }
11
+ export type ProgressCallback = (event: {
12
+ type: 'file_start' | 'file_done' | 'file_skip' | 'file_error' | 'batch_embed';
13
+ filePath?: string;
14
+ indexed?: number;
15
+ skipped?: number;
16
+ errors?: number;
17
+ total?: number;
18
+ }) => void;
19
+ export interface RunOptions {
20
+ paths: string[];
21
+ force?: boolean;
22
+ onProgress?: ProgressCallback;
23
+ concurrency?: number;
24
+ }
25
+ export declare class IndexPipeline {
26
+ private config;
27
+ private embedder;
28
+ constructor(config: Config);
29
+ private getEmbedder;
30
+ run(options: RunOptions): Promise<IndexResult>;
31
+ }
32
+ //# sourceMappingURL=pipeline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/ingestion/pipeline.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAalD,MAAM,WAAW,SAAS;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,SAAS,EAAE,CAAC;CACrB;AAED,MAAM,MAAM,gBAAgB,GAAG,CAAC,KAAK,EAAE;IACrC,IAAI,EAAE,YAAY,GAAG,WAAW,GAAG,WAAW,GAAG,YAAY,GAAG,aAAa,CAAC;IAC9E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,KAAK,IAAI,CAAC;AAEX,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,QAAQ,CAAyB;gBAE7B,MAAM,EAAE,MAAM;IAI1B,OAAO,CAAC,WAAW;IAcb,GAAG,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC;CA6LrD"}
@@ -0,0 +1,191 @@
1
+ import * as fs from 'node:fs';
2
+ import pLimit from 'p-limit';
3
+ import { OllamaEmbedder } from '../embeddings/ollama.js';
4
+ import { TransformersEmbedder } from '../embeddings/transformers.js';
5
+ import { createIndexes, getChunksTable, getDb, getFilesTable } from '../store/db.js';
6
+ import { getAllFiles } from '../store/reader.js';
7
+ import { upsertChunks, upsertFile } from '../store/writer.js';
8
+ import { chunkDocument } from './chunker.js';
9
+ import { hashFile, hashPath } from './fingerprint.js';
10
+ import { parseMarkdown } from './parser.js';
11
+ import { scanDirectory } from './scanner.js';
12
+ export class IndexPipeline {
13
+ config;
14
+ embedder = null;
15
+ constructor(config) {
16
+ this.config = config;
17
+ }
18
+ getEmbedder() {
19
+ if (this.embedder)
20
+ return this.embedder;
21
+ const { backend, model, ollama_base_url } = this.config.embeddings;
22
+ if (backend === 'ollama') {
23
+ this.embedder = new OllamaEmbedder(model, ollama_base_url);
24
+ }
25
+ else {
26
+ this.embedder = new TransformersEmbedder(model, this.config.paths.model_cache_dir);
27
+ }
28
+ return this.embedder;
29
+ }
30
+ async run(options) {
31
+ const { paths, force = false, onProgress, concurrency = 4 } = options;
32
+ const db = await getDb(this.config.paths.db_path);
33
+ const chunksTable = await getChunksTable(db);
34
+ const filesTable = await getFilesTable(db);
35
+ // Build map of existing file hashes
36
+ const existingFiles = await getAllFiles(filesTable);
37
+ const existingHashMap = new Map(existingFiles.map((f) => [f.file_id, f.file_hash]));
38
+ const { extensions, ignore_patterns } = this.config.watcher;
39
+ // Collect all files to process
40
+ const filesToProcess = [];
41
+ for (const searchPath of paths) {
42
+ if (!fs.existsSync(searchPath)) {
43
+ console.warn(`Path not found: ${searchPath}`);
44
+ continue;
45
+ }
46
+ const stat = fs.statSync(searchPath);
47
+ if (stat.isDirectory()) {
48
+ for await (const filePath of scanDirectory(searchPath, extensions, ignore_patterns)) {
49
+ filesToProcess.push(filePath);
50
+ }
51
+ }
52
+ else if (stat.isFile()) {
53
+ const ext = searchPath.match(/\.[^.]+$/)?.[0]?.toLowerCase() ?? '';
54
+ if (extensions.includes(ext)) {
55
+ filesToProcess.push(searchPath);
56
+ }
57
+ }
58
+ }
59
+ const result = {
60
+ indexed: 0,
61
+ skipped: 0,
62
+ errors: [],
63
+ };
64
+ const limit = pLimit(concurrency);
65
+ const embedder = this.getEmbedder();
66
+ const { batch_size } = this.config.embeddings;
67
+ let pendingChunks = [];
68
+ let pendingFiles = [];
69
+ const flushBatch = async () => {
70
+ if (pendingChunks.length === 0)
71
+ return;
72
+ // Embed all pending chunks
73
+ const texts = pendingChunks.map((c) => c.text);
74
+ const batchCount = Math.ceil(texts.length / batch_size);
75
+ for (let b = 0; b < batchCount; b++) {
76
+ const start = b * batch_size;
77
+ const end = Math.min(start + batch_size, texts.length);
78
+ const batchTexts = texts.slice(start, end);
79
+ const embeddings = await embedder.embed(batchTexts);
80
+ for (let i = 0; i < batchTexts.length; i++) {
81
+ pendingChunks[start + i].vector = embeddings[i];
82
+ }
83
+ onProgress?.({ type: 'batch_embed', total: end });
84
+ }
85
+ // Upsert chunks
86
+ for (let i = 0; i < pendingFiles.length; i++) {
87
+ const file = pendingFiles[i];
88
+ const fileChunks = pendingChunks.filter((c) => c.file_id === file.file_id);
89
+ await upsertChunks(chunksTable, fileChunks);
90
+ await upsertFile(filesTable, file);
91
+ result.indexed++;
92
+ onProgress?.({
93
+ type: 'file_done',
94
+ filePath: file.file_path,
95
+ indexed: result.indexed,
96
+ skipped: result.skipped,
97
+ errors: result.errors.length,
98
+ });
99
+ }
100
+ pendingChunks = [];
101
+ pendingFiles = [];
102
+ };
103
+ const processFile = async (filePath) => {
104
+ onProgress?.({ type: 'file_start', filePath });
105
+ const fileId = hashPath(filePath);
106
+ const { hash: fileHash, mtime: fileMtime } = await hashFile(filePath);
107
+ // Skip unchanged files unless force
108
+ if (!force && existingHashMap.get(fileId) === fileHash) {
109
+ result.skipped++;
110
+ onProgress?.({
111
+ type: 'file_skip',
112
+ filePath,
113
+ skipped: result.skipped,
114
+ });
115
+ return;
116
+ }
117
+ const content = fs.readFileSync(filePath, 'utf-8');
118
+ const doc = parseMarkdown(content, filePath);
119
+ const chunks = chunkDocument(doc, {
120
+ fileId,
121
+ fileHash,
122
+ filePath,
123
+ fileMtime,
124
+ maxTokens: this.config.chunking.max_tokens,
125
+ minChars: this.config.chunking.min_chars,
126
+ overlapTokens: this.config.chunking.overlap_tokens,
127
+ includeBreadcrumb: this.config.chunking.include_breadcrumb,
128
+ });
129
+ const fileRecord = {
130
+ file_id: fileId,
131
+ file_path: filePath,
132
+ file_hash: fileHash,
133
+ file_mtime: fileMtime,
134
+ chunk_count: chunks.length,
135
+ indexed_at: Date.now(),
136
+ status: 'indexed',
137
+ error_msg: '',
138
+ };
139
+ pendingChunks.push(...chunks);
140
+ pendingFiles.push(fileRecord);
141
+ };
142
+ // Process files with concurrency limit
143
+ const tasks = filesToProcess.map((filePath) => limit(async () => {
144
+ try {
145
+ await processFile(filePath);
146
+ }
147
+ catch (err) {
148
+ result.errors.push({
149
+ filePath,
150
+ error: err instanceof Error ? err.message : String(err),
151
+ });
152
+ onProgress?.({
153
+ type: 'file_error',
154
+ filePath,
155
+ errors: result.errors.length,
156
+ });
157
+ // Record error in files table
158
+ const fileId = hashPath(filePath);
159
+ const fileRecord = {
160
+ file_id: fileId,
161
+ file_path: filePath,
162
+ file_hash: '',
163
+ file_mtime: 0,
164
+ chunk_count: 0,
165
+ indexed_at: Date.now(),
166
+ status: 'error',
167
+ error_msg: err instanceof Error ? err.message : String(err),
168
+ };
169
+ try {
170
+ await upsertFile(filesTable, fileRecord);
171
+ }
172
+ catch {
173
+ // Best effort
174
+ }
175
+ }
176
+ // Flush when we have enough chunks
177
+ if (pendingChunks.length >= batch_size * 4) {
178
+ await flushBatch();
179
+ }
180
+ }));
181
+ await Promise.all(tasks);
182
+ // Final flush
183
+ await flushBatch();
184
+ // Create indexes after bulk loading
185
+ if (result.indexed > 0) {
186
+ await createIndexes(db);
187
+ }
188
+ return result;
189
+ }
190
+ }
191
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,MAAM,MAAM,SAAS,CAAC;AAE7B,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,oBAAoB,EAAE,MAAM,+BAA+B,CAAC;AAErE,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AACrF,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AA6B7C,MAAM,OAAO,aAAa;IAChB,MAAM,CAAS;IACf,QAAQ,GAAoB,IAAI,CAAC;IAEzC,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAEO,WAAW;QACjB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC,QAAQ,CAAC;QAExC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;QAEnE,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;YACzB,IAAI,CAAC,QAAQ,GAAG,IAAI,cAAc,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QAC7D,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,QAAQ,GAAG,IAAI,oBAAoB,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QACrF,CAAC;QAED,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,OAAmB;QAC3B,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,KAAK,EAAE,UAAU,EAAE,WAAW,GAAG,CAAC,EAAE,GAAG,OAAO,CAAC;QAEtE,MAAM,EAAE,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAClD,MAAM,WAAW,GAAG,MAAM,cAAc,CAAC,EAAE,CAAC,CAAC;QAC7C,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC,EAAE,CAAC,CAAC;QAE3C,oCAAoC;QACpC,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,UAAU,CAAC,CAAC;QACpD,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QAEpF,MAAM,EAAE,UAAU,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC;QAE5D,+BAA+B;QAC/B,MAAM,cAAc,GAAa,EAAE,CAAC;QAEpC,KAAK,MAAM,UAAU,IAAI,KAAK,EAAE,CAAC;YAC/B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,mBAAmB,UAAU,EAAE,CAAC,CAAC;gBAC9C,SAAS;YACX,CAAC;YAED,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;YACrC,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;gBACvB,IAAI,KAAK,EAAE,MAAM,QAAQ,IAAI,aAAa,CAAC,UAAU,EAAE,UAAU,EAAE,eAAe,CAAC,EAAE,CAAC;oBACpF,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAChC,CAAC;YACH,CAAC;iBAAM,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;gBACzB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;gBACnE,IAAI,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBAC7B,cAAc,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBAClC,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAgB;YAC1B,OAAO,EAAE,CAAC;YACV,OAAO,EAAE,CAAC;YACV,MAAM,EAAE,EAAE;SACX,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC;QAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;QAE9C,IAAI,aAAa,GAAkB,EAAE,CAAC;QACtC,IAAI,YAAY,GAAiB,EAAE,CAAC;QAEpC,MAAM,UAAU,GAAG,KAAK,IAAmB,EAAE;YAC3C,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO;YAEvC,2BAA2B;YAC3B,MAAM,KAAK,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC/C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC;YAExD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,KAAK,GAAG,CAAC,GAAG,UAAU,CAAC;gBAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;gBACvD,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;gBAE3C,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;gBAEpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC3C,aAAa,CAAC,KAAK,GAAG,CAAC,CAAE,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC,CAAE,CAAC;gBACpD,CAAC;gBAED,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YACpD,CAAC;YAED,gBAAgB;YAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7C,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAE,CAAC;gBAC9B,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,OAAO,CAAC,CAAC;gBAC3E,MAAM,YAAY,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;gBAC5C,MAAM,UAAU,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;gBACnC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACjB,UAAU,EAAE,CAAC;oBACX,IAAI,EAAE,WAAW;oBACjB,QAAQ,EAAE,IAAI,CAAC,SAAS;oBACxB,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM;iBAC7B,CAAC,CAAC;YACL,CAAC;YAED,aAAa,GAAG,EAAE,CAAC;YACnB,YAAY,GAAG,EAAE,CAAC;QACpB,CAAC,CAAC;QAEF,MAAM,WAAW,GAAG,KAAK,EAAE,QAAgB,EAAiB,EAAE;YAC5D,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,CAAC,CAAC;YAE/C,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAClC,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAEtE,oCAAoC;YACpC,IAAI,CAAC,KAAK,IAAI,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;gBACvD,MAAM,CAAC,OAAO,EAAE,CAAC;gBACjB,UAAU,EAAE,CAAC;oBACX,IAAI,EAAE,WAAW;oBACjB,QAAQ;oBACR,OAAO,EAAE,MAAM,CAAC,OAAO;iBACxB,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YACnD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YAE7C,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,EAAE;gBAChC,MAAM;gBACN,QAAQ;gBACR,QAAQ;gBACR,SAAS;gBACT,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU;gBAC1C,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS;gBACxC,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc;gBAClD,iBAAiB,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB;aAC3D,CAAC,CAAC;YAEH,MAAM,UAAU,GAAe;gBAC7B,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,QAAQ;gBACnB,SAAS,EAAE,QAAQ;gBACnB,UAAU,EAAE,SAAS;gBACrB,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE;gBACtB,MAAM,EAAE,SAAS;gBACjB,SAAS,EAAE,EAAE;aACd,CAAC;YAEF,aAAa,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;YAC9B,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAChC,CAAC,CAAC;QAEF,uCAAuC;QACvC,MAAM,KAAK,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAC5C,KAAK,CAAC,KAAK,IAAI,EAAE;YACf,IAAI,CAAC;gBACH,MAAM,WAAW,CAAC,QAAQ,CAAC,CAAC;YAC9B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;oBACjB,QAAQ;oBACR,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;iBACxD,CAAC,CAAC;gBACH,UAAU,EAAE,CAAC;oBACX,IAAI,EAAE,YAAY;oBAClB,QAAQ;oBACR,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM;iBAC7B,CAAC,CAAC;gBAEH,8BAA8B;gBAC9B,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBAClC,MAAM,UAAU,GAAe;oBAC7B,OAAO,EAAE,MAAM;oBACf,SAAS,EAAE,QAAQ;oBACnB,SAAS,EAAE,EAAE;oBACb,UAAU,EAAE,CAAC;oBACb,WAAW,EAAE,CAAC;oBACd,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE;oBACtB,MAAM,EAAE,OAAO;oBACf,SAAS,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;iBAC5D,CAAC;gBACF,IAAI,CAAC;oBACH,MAAM,UAAU,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;gBAC3C,CAAC;gBAAC,MAAM,CAAC;oBACP,cAAc;gBAChB,CAAC;YACH,CAAC;YAED,mCAAmC;YACnC,IAAI,aAAa,CAAC,MAAM,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;gBAC3C,MAAM,UAAU,EAAE,CAAC;YACrB,CAAC;QACH,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAEzB,cAAc;QACd,MAAM,UAAU,EAAE,CAAC;QAEnB,oCAAoC;QACpC,IAAI,MAAM,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,aAAa,CAAC,EAAE,CAAC,CAAC;QAC1B,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
@@ -0,0 +1,2 @@
1
+ export declare function scanDirectory(dir: string, extensions: string[], ignorePatterns?: string[]): AsyncGenerator<string>;
2
+ //# sourceMappingURL=scanner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scanner.d.ts","sourceRoot":"","sources":["../../src/ingestion/scanner.ts"],"names":[],"mappings":"AAqBA,wBAAuB,aAAa,CAClC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,MAAM,EAAE,EACpB,cAAc,GAAE,MAAM,EAAyC,GAC9D,cAAc,CAAC,MAAM,CAAC,CAaxB"}
@@ -0,0 +1,54 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ function matchesPattern(filePath, pattern) {
4
+ // Convert glob pattern to regex
5
+ const escaped = pattern
6
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&')
7
+ .replace(/\*\*/g, '<<DOUBLE_STAR>>')
8
+ .replace(/\*/g, '[^/]*')
9
+ .replace(/\?/g, '[^/]')
10
+ .replace(/<<DOUBLE_STAR>>/g, '.*');
11
+ const regex = new RegExp(escaped);
12
+ return regex.test(filePath) || regex.test(filePath.replace(/\\/g, '/'));
13
+ }
14
+ function matchesAnyPattern(filePath, patterns) {
15
+ const normalizedPath = filePath.replace(/\\/g, '/');
16
+ return patterns.some((pattern) => matchesPattern(normalizedPath, pattern));
17
+ }
18
+ export async function* scanDirectory(dir, extensions, ignorePatterns = ['**/.git/**', '**/node_modules/**']) {
19
+ const normalizedDir = path.resolve(dir);
20
+ if (!fs.existsSync(normalizedDir)) {
21
+ return;
22
+ }
23
+ const stat = fs.statSync(normalizedDir);
24
+ if (!stat.isDirectory()) {
25
+ return;
26
+ }
27
+ yield* walkDirectory(normalizedDir, extensions, ignorePatterns);
28
+ }
29
+ async function* walkDirectory(dir, extensions, ignorePatterns) {
30
+ let entries;
31
+ try {
32
+ entries = fs.readdirSync(dir, { withFileTypes: true });
33
+ }
34
+ catch {
35
+ return;
36
+ }
37
+ for (const entry of entries) {
38
+ const fullPath = path.join(dir, entry.name);
39
+ const normalizedPath = fullPath.replace(/\\/g, '/');
40
+ if (matchesAnyPattern(normalizedPath, ignorePatterns)) {
41
+ continue;
42
+ }
43
+ if (entry.isDirectory()) {
44
+ yield* walkDirectory(fullPath, extensions, ignorePatterns);
45
+ }
46
+ else if (entry.isFile()) {
47
+ const ext = path.extname(entry.name).toLowerCase();
48
+ if (extensions.includes(ext)) {
49
+ yield fullPath;
50
+ }
51
+ }
52
+ }
53
+ }
54
+ //# sourceMappingURL=scanner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scanner.js","sourceRoot":"","sources":["../../src/ingestion/scanner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,SAAS,cAAc,CAAC,QAAgB,EAAE,OAAe;IACvD,gCAAgC;IAChC,MAAM,OAAO,GAAG,OAAO;SACpB,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC;SACpC,OAAO,CAAC,OAAO,EAAE,iBAAiB,CAAC;SACnC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC;SACvB,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC;SACtB,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,CAAC;IAErC,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;AAC1E,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAgB,EAAE,QAAkB;IAC7D,MAAM,cAAc,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACpD,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,cAAc,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,CAAC;AAC7E,CAAC;AAED,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,aAAa,CAClC,GAAW,EACX,UAAoB,EACpB,iBAA2B,CAAC,YAAY,EAAE,oBAAoB,CAAC;IAE/D,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAExC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,OAAO;IACT,CAAC;IAED,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;IACxC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACxB,OAAO;IACT,CAAC;IAED,KAAK,CAAC,CAAC,aAAa,CAAC,aAAa,EAAE,UAAU,EAAE,cAAc,CAAC,CAAC;AAClE,CAAC;AAED,KAAK,SAAS,CAAC,CAAC,aAAa,CAC3B,GAAW,EACX,UAAoB,EACpB,cAAwB;IAExB,IAAI,OAAoB,CAAC;IAEzB,IAAI,CAAC;QACH,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;IACT,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5C,MAAM,cAAc,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAEpD,IAAI,iBAAiB,CAAC,cAAc,EAAE,cAAc,CAAC,EAAE,CAAC;YACtD,SAAS;QACX,CAAC;QAED,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,KAAK,CAAC,CAAC,aAAa,CAAC,QAAQ,EAAE,UAAU,EAAE,cAAc,CAAC,CAAC;QAC7D,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;YAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;YACnD,IAAI,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC7B,MAAM,QAAQ,CAAC;YACjB,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type * as lancedb from '@lancedb/lancedb';
2
+ import type { Config } from '../config/schema.js';
3
+ import type { Embedder } from '../embeddings/types.js';
4
+ export declare function startMcpServer(db: lancedb.Connection, embedder: Embedder, config: Config, options?: {
5
+ transport?: 'stdio' | 'sse';
6
+ port?: number;
7
+ }): Promise<void>;
8
+ //# sourceMappingURL=server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/mcp/server.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,KAAK,OAAO,MAAM,kBAAkB,CAAC;AAIjD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAGvD,wBAAsB,cAAc,CAClC,EAAE,EAAE,OAAO,CAAC,UAAU,EACtB,QAAQ,EAAE,QAAQ,EAClB,MAAM,EAAE,MAAM,EACd,OAAO,GAAE;IAAE,SAAS,CAAC,EAAE,OAAO,GAAG,KAAK,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAA;CAAO,GAC3D,OAAO,CAAC,IAAI,CAAC,CAiFf"}
@@ -0,0 +1,73 @@
1
+ import { createServer } from 'node:http';
2
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3
+ import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
4
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
5
+ import { registerTools } from './tools.js';
6
+ export async function startMcpServer(db, embedder, config, options = {}) {
7
+ const transport = options.transport ?? config.mcp.transport;
8
+ const port = options.port ?? config.mcp.sse_port;
9
+ const server = new Server({
10
+ name: 'sqmd',
11
+ version: '0.1.0',
12
+ }, {
13
+ capabilities: {
14
+ tools: {},
15
+ resources: {},
16
+ },
17
+ });
18
+ // Register all tools and resources
19
+ registerTools(server, db, embedder, config);
20
+ if (transport === 'stdio') {
21
+ console.error('[mcp] Starting stdio transport');
22
+ const stdioTransport = new StdioServerTransport();
23
+ await server.connect(stdioTransport);
24
+ console.error('[mcp] Server connected via stdio');
25
+ }
26
+ else if (transport === 'sse') {
27
+ console.log(`[mcp] Starting SSE transport on port ${port}`);
28
+ const httpServer = createServer();
29
+ const sseTransports = new Map();
30
+ httpServer.on('request', (req, res) => {
31
+ const url = new URL(req.url ?? '/', `http://localhost:${port}`);
32
+ if (req.method === 'GET' && url.pathname === '/sse') {
33
+ const sseTransport = new SSEServerTransport('/messages', res);
34
+ const sessionId = Date.now().toString();
35
+ sseTransports.set(sessionId, sseTransport);
36
+ res.on('close', () => {
37
+ sseTransports.delete(sessionId);
38
+ });
39
+ server.connect(sseTransport).catch((err) => {
40
+ console.error('[mcp] SSE connection error:', err instanceof Error ? err.message : String(err));
41
+ });
42
+ }
43
+ else if (req.method === 'POST' && url.pathname === '/messages') {
44
+ // Find the matching SSE transport
45
+ for (const sseTransport of sseTransports.values()) {
46
+ sseTransport.handlePostMessage(req, res).catch((err) => {
47
+ console.error('[mcp] Message handling error:', err instanceof Error ? err.message : String(err));
48
+ });
49
+ return;
50
+ }
51
+ res.writeHead(404);
52
+ res.end('No active SSE connection');
53
+ }
54
+ else {
55
+ res.writeHead(404);
56
+ res.end('Not found');
57
+ }
58
+ });
59
+ await new Promise((resolve, reject) => {
60
+ httpServer.listen(port, () => {
61
+ console.log(`[mcp] SSE server listening on http://localhost:${port}/sse`);
62
+ resolve();
63
+ });
64
+ httpServer.on('error', reject);
65
+ });
66
+ // Keep alive
67
+ await new Promise(() => { });
68
+ }
69
+ else {
70
+ throw new Error(`Unknown transport: ${transport}`);
71
+ }
72
+ }
73
+ //# sourceMappingURL=server.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"server.js","sourceRoot":"","sources":["../../src/mcp/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,yCAAyC,CAAC;AAC7E,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AAGjF,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3C,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,EAAsB,EACtB,QAAkB,EAClB,MAAc,EACd,UAA0D,EAAE;IAE5D,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC,GAAG,CAAC,SAAS,CAAC;IAC5D,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC;IAEjD,MAAM,MAAM,GAAG,IAAI,MAAM,CACvB;QACE,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,SAAS,EAAE,EAAE;SACd;KACF,CACF,CAAC;IAEF,mCAAmC;IACnC,aAAa,CAAC,MAAM,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;IAE5C,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC;QAC1B,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChD,MAAM,cAAc,GAAG,IAAI,oBAAoB,EAAE,CAAC;QAClD,MAAM,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;IACpD,CAAC;SAAM,IAAI,SAAS,KAAK,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,wCAAwC,IAAI,EAAE,CAAC,CAAC;QAE5D,MAAM,UAAU,GAAG,YAAY,EAAE,CAAC;QAClC,MAAM,aAAa,GAAG,IAAI,GAAG,EAA8B,CAAC;QAE5D,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YACpC,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,EAAE,oBAAoB,IAAI,EAAE,CAAC,CAAC;YAEhE,IAAI,GAAG,CAAC,MAAM,KAAK,KAAK,IAAI,GAAG,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;gBACpD,MAAM,YAAY,GAAG,IAAI,kBAAkB,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBAC9D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC;gBACxC,aAAa,CAAC,GAAG,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;gBAE3C,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;oBACnB,aAAa,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;gBAClC,CAAC,CAAC,CAAC;gBAEH,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,GAAY,EAAE,EAAE;oBAClD,OAAO,CAAC,KAAK,CACX,6BAA6B,EAC7B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CACjD,CAAC;gBACJ,CAAC,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,GAAG,CAAC,MAAM,KAAK,MAAM,IAAI,GAAG,CAAC,QAAQ,KAAK,WAAW,EAAE,CAAC;gBACjE,kCAAkC;gBAClC,KAAK,MAAM,YAAY,IAAI,aAAa,CAAC,MAAM,EAAE,EAAE,CAAC;oBAClD,YAAY,CAAC,iBAAiB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,GAAY,EAAE,EAAE;wBAC9D,OAAO,CAAC,KAAK,CACX,+BAA+B,EAC/B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CACjD,CAAC;oBACJ,CAAC,CAAC,CAAC;oBACH,OAAO;gBACT,CAAC;gBACD,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;gBACnB,GAAG,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;YACtC,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;gBACnB,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YACvB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1C,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;gBAC3B,OAAO,CAAC,GAAG,CAAC,kDAAkD,IAAI,MAAM,CAAC,CAAC;gBAC1E,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC,CAAC;YACH,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QACjC,CAAC,CAAC,CAAC;QAEH,aAAa;QACb,MAAM,IAAI,OAAO,CAAQ,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACrC,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,sBAAsB,SAAS,EAAE,CAAC,CAAC;IACrD,CAAC;AACH,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type * as lancedb from '@lancedb/lancedb';
2
+ import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
3
+ import type { Config } from '../config/schema.js';
4
+ import type { Embedder } from '../embeddings/types.js';
5
+ export declare function registerTools(server: Server, db: lancedb.Connection, embedder: Embedder, config: Config): void;
6
+ //# sourceMappingURL=tools.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../../src/mcp/tools.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,KAAK,OAAO,MAAM,kBAAkB,CAAC;AACjD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AAOxE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAQvD,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,EACd,EAAE,EAAE,OAAO,CAAC,UAAU,EACtB,QAAQ,EAAE,QAAQ,EAClB,MAAM,EAAE,MAAM,GACb,IAAI,CAuSN"}