@milo4jo/contextkit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +181 -0
  2. package/dist/commands/index-cmd.d.ts +3 -0
  3. package/dist/commands/index-cmd.d.ts.map +1 -0
  4. package/dist/commands/index-cmd.js +111 -0
  5. package/dist/commands/index-cmd.js.map +1 -0
  6. package/dist/commands/init.d.ts +3 -0
  7. package/dist/commands/init.d.ts.map +1 -0
  8. package/dist/commands/init.js +56 -0
  9. package/dist/commands/init.js.map +1 -0
  10. package/dist/commands/select.d.ts +3 -0
  11. package/dist/commands/select.d.ts.map +1 -0
  12. package/dist/commands/select.js +64 -0
  13. package/dist/commands/select.js.map +1 -0
  14. package/dist/commands/source/add.d.ts +3 -0
  15. package/dist/commands/source/add.d.ts.map +1 -0
  16. package/dist/commands/source/add.js +87 -0
  17. package/dist/commands/source/add.js.map +1 -0
  18. package/dist/commands/source/index.d.ts +3 -0
  19. package/dist/commands/source/index.d.ts.map +1 -0
  20. package/dist/commands/source/index.js +14 -0
  21. package/dist/commands/source/index.js.map +1 -0
  22. package/dist/commands/source/list.d.ts +3 -0
  23. package/dist/commands/source/list.d.ts.map +1 -0
  24. package/dist/commands/source/list.js +46 -0
  25. package/dist/commands/source/list.js.map +1 -0
  26. package/dist/commands/source/remove.d.ts +3 -0
  27. package/dist/commands/source/remove.d.ts.map +1 -0
  28. package/dist/commands/source/remove.js +38 -0
  29. package/dist/commands/source/remove.js.map +1 -0
  30. package/dist/commands/source.d.ts +3 -0
  31. package/dist/commands/source.d.ts.map +1 -0
  32. package/dist/commands/source.js +153 -0
  33. package/dist/commands/source.js.map +1 -0
  34. package/dist/config/index.d.ts +38 -0
  35. package/dist/config/index.d.ts.map +1 -0
  36. package/dist/config/index.js +100 -0
  37. package/dist/config/index.js.map +1 -0
  38. package/dist/config/types.d.ts +21 -0
  39. package/dist/config/types.d.ts.map +1 -0
  40. package/dist/config/types.js +5 -0
  41. package/dist/config/types.js.map +1 -0
  42. package/dist/db/index.d.ts +14 -0
  43. package/dist/db/index.d.ts.map +1 -0
  44. package/dist/db/index.js +63 -0
  45. package/dist/db/index.js.map +1 -0
  46. package/dist/errors/index.d.ts +30 -0
  47. package/dist/errors/index.d.ts.map +1 -0
  48. package/dist/errors/index.js +51 -0
  49. package/dist/errors/index.js.map +1 -0
  50. package/dist/index.d.ts +3 -0
  51. package/dist/index.d.ts.map +1 -0
  52. package/dist/index.js +90 -0
  53. package/dist/index.js.map +1 -0
  54. package/dist/indexer/chunker.d.ts +44 -0
  55. package/dist/indexer/chunker.d.ts.map +1 -0
  56. package/dist/indexer/chunker.js +102 -0
  57. package/dist/indexer/chunker.js.map +1 -0
  58. package/dist/indexer/discovery.d.ts +34 -0
  59. package/dist/indexer/discovery.d.ts.map +1 -0
  60. package/dist/indexer/discovery.js +66 -0
  61. package/dist/indexer/discovery.js.map +1 -0
  62. package/dist/indexer/embeddings.d.ts +32 -0
  63. package/dist/indexer/embeddings.d.ts.map +1 -0
  64. package/dist/indexer/embeddings.js +85 -0
  65. package/dist/indexer/embeddings.js.map +1 -0
  66. package/dist/indexer/index.d.ts +37 -0
  67. package/dist/indexer/index.d.ts.map +1 -0
  68. package/dist/indexer/index.js +123 -0
  69. package/dist/indexer/index.js.map +1 -0
  70. package/dist/selector/budget.d.ts +26 -0
  71. package/dist/selector/budget.d.ts.map +1 -0
  72. package/dist/selector/budget.js +75 -0
  73. package/dist/selector/budget.js.map +1 -0
  74. package/dist/selector/formatter.d.ts +44 -0
  75. package/dist/selector/formatter.d.ts.map +1 -0
  76. package/dist/selector/formatter.js +114 -0
  77. package/dist/selector/formatter.js.map +1 -0
  78. package/dist/selector/index.d.ts +37 -0
  79. package/dist/selector/index.d.ts.map +1 -0
  80. package/dist/selector/index.js +67 -0
  81. package/dist/selector/index.js.map +1 -0
  82. package/dist/selector/scoring.d.ts +26 -0
  83. package/dist/selector/scoring.d.ts.map +1 -0
  84. package/dist/selector/scoring.js +113 -0
  85. package/dist/selector/scoring.js.map +1 -0
  86. package/dist/selector/search.d.ts +29 -0
  87. package/dist/selector/search.d.ts.map +1 -0
  88. package/dist/selector/search.js +48 -0
  89. package/dist/selector/search.js.map +1 -0
  90. package/dist/utils/cli.d.ts +20 -0
  91. package/dist/utils/cli.d.ts.map +1 -0
  92. package/dist/utils/cli.js +19 -0
  93. package/dist/utils/cli.js.map +1 -0
  94. package/dist/utils/format.d.ts +30 -0
  95. package/dist/utils/format.d.ts.map +1 -0
  96. package/dist/utils/format.js +44 -0
  97. package/dist/utils/format.js.map +1 -0
  98. package/dist/utils/output.d.ts +42 -0
  99. package/dist/utils/output.d.ts.map +1 -0
  100. package/dist/utils/output.js +62 -0
  101. package/dist/utils/output.js.map +1 -0
  102. package/dist/utils/prompts.d.ts +23 -0
  103. package/dist/utils/prompts.d.ts.map +1 -0
  104. package/dist/utils/prompts.js +46 -0
  105. package/dist/utils/prompts.js.map +1 -0
  106. package/dist/utils/streams.d.ts +40 -0
  107. package/dist/utils/streams.d.ts.map +1 -0
  108. package/dist/utils/streams.js +61 -0
  109. package/dist/utils/streams.js.map +1 -0
  110. package/package.json +67 -0
package/dist/index.js ADDED
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander';
3
+ import { readFileSync } from 'fs';
4
+ import { fileURLToPath } from 'url';
5
+ import { dirname, join } from 'path';
6
+ import { initCommand } from './commands/init.js';
7
+ import { sourceCommand } from './commands/source/index.js';
8
+ import { indexCommand } from './commands/index-cmd.js';
9
+ import { selectCommand } from './commands/select.js';
10
+ import { ContextKitError, InvalidUsageError } from './errors/index.js';
11
+ import { writeError, writeMessage } from './utils/streams.js';
12
+ // Get version from package.json
13
+ const __dirname = dirname(fileURLToPath(import.meta.url));
14
+ const pkg = JSON.parse(readFileSync(join(__dirname, '..', 'package.json'), 'utf-8'));
15
+ const VERSION = pkg.version;
16
+ // Exit codes per CLI-DESIGN.md
17
+ const EXIT_ERROR = 1;
18
+ const EXIT_INVALID_USAGE = 2;
19
+ /**
20
+ * Handle errors globally
21
+ */
22
+ function handleError(error) {
23
+ if (error instanceof ContextKitError) {
24
+ writeError(error.message);
25
+ const exitCode = error instanceof InvalidUsageError ? EXIT_INVALID_USAGE : EXIT_ERROR;
26
+ process.exit(exitCode);
27
+ }
28
+ // Unexpected error - show stack in verbose mode
29
+ if (error instanceof Error) {
30
+ writeError(error.message);
31
+ if (process.env.DEBUG) {
32
+ writeMessage(error.stack || '');
33
+ }
34
+ }
35
+ else {
36
+ writeError('An unexpected error occurred');
37
+ }
38
+ process.exit(EXIT_ERROR);
39
+ }
40
+ // Global error handlers
41
+ process.on('uncaughtException', handleError);
42
+ process.on('unhandledRejection', handleError);
43
+ const program = new Command();
44
+ program
45
+ .name('contextkit')
46
+ .description('Smart context selection for LLMs')
47
+ .version(VERSION, '-v, --version', 'Show version number')
48
+ .showHelpAfterError()
49
+ .configureHelp({
50
+ sortSubcommands: true,
51
+ subcommandTerm: (cmd) => cmd.name(),
52
+ });
53
+ // Global options
54
+ program
55
+ .option('--json', 'Output as JSON')
56
+ .option('--plain', 'Plain output (no colors/formatting)')
57
+ .option('--quiet', 'Suppress non-essential output');
58
+ // Register commands
59
+ program.addCommand(initCommand);
60
+ program.addCommand(sourceCommand);
61
+ program.addCommand(indexCommand);
62
+ program.addCommand(selectCommand);
63
+ // Default action when no command given
64
+ program.action(() => {
65
+ console.log(`
66
+ 🎯 contextkit - Smart context selection for AI coding assistants
67
+
68
+ Quick Start:
69
+ $ contextkit init # Initialize in your project
70
+ $ contextkit source add ./src # Add source directories
71
+ $ contextkit index # Index everything
72
+ $ contextkit select "your query" # Find relevant context
73
+
74
+ Commands:
75
+ init Initialize ContextKit in current directory
76
+ source Manage source directories
77
+ index Index all sources (re-run after code changes)
78
+ select Select context for a query
79
+
80
+ Global Options:
81
+ --json Output as JSON
82
+ --plain No colors (or set NO_COLOR=1)
83
+ --quiet Suppress non-essential output
84
+ -v, --version Show version
85
+
86
+ Run 'contextkit <command> --help' for command details.
87
+ `);
88
+ });
89
+ program.parse();
90
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAE9D,gCAAgC;AAChC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;AACrF,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;AAE5B,+BAA+B;AAC/B,MAAM,UAAU,GAAG,CAAC,CAAC;AACrB,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAE7B;;GAEG;AACH,SAAS,WAAW,CAAC,KAAc;IACjC,IAAI,KAAK,YAAY,eAAe,EAAE,CAAC;QACrC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAE1B,MAAM,QAAQ,GAAG,KAAK,YAAY,iBAAiB,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,UAAU,CAAC;QAEtF,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzB,CAAC;IAED,gDAAgD;IAChD,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;YACtB,YAAY,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,UAAU,CAAC,8BAA8B,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;AAC3B,CAAC;AAED,wBAAwB;AACxB,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAC;AAC7C,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,WAAW,CAAC,CAAC;AAE9C,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,kCAAkC,CAAC;KAC/C,OAAO,CAAC,OAAO,EAAE,eAAe,EAAE,qBAAqB,CAAC;KACxD,kBAAkB,EAAE;KACpB,aAAa,CAAC;IACb,eAAe,EAAE,IAAI;IACrB,cAAc,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE;CACpC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,SAAS,EAAE,qCAAqC,CAAC;KACxD,MAAM,CAAC,SAAS,EAAE,+BAA+B,CAAC,CAAC;AAEtD,oBAAoB;AACpB,OAAO,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;AAChC,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAClC,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;AACjC,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAElC,uCAAuC;AACvC,OAAO,CAAC,MAAM,CAAC,GAAG,EAAE;IAClB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;CAsBb,CAAC,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Chunking Module
3
+ *
4
+ * Splits files into chunks for embedding.
5
+ * Uses line-based chunking with configurable size and overlap.
6
+ */
7
+ import type { DiscoveredFile } from './discovery.js';
8
+ /** Chunk of content ready for embedding */
9
+ export interface Chunk {
10
+ /** Unique chunk ID */
11
+ id: string;
12
+ /** Source this chunk belongs to */
13
+ sourceId: string;
14
+ /** Original file path (relative) */
15
+ filePath: string;
16
+ /** Chunk content */
17
+ content: string;
18
+ /** Start line (1-indexed) */
19
+ startLine: number;
20
+ /** End line (1-indexed, inclusive) */
21
+ endLine: number;
22
+ /** Token count */
23
+ tokens: number;
24
+ }
25
+ /** Chunking options */
26
+ export interface ChunkOptions {
27
+ /** Target tokens per chunk */
28
+ chunkSize: number;
29
+ /** Overlap tokens between chunks */
30
+ chunkOverlap: number;
31
+ }
32
+ /**
33
+ * Count tokens in a string
34
+ */
35
+ export declare function countTokens(text: string): number;
36
+ /**
37
+ * Chunk a single file into pieces
38
+ */
39
+ export declare function chunkFile(file: DiscoveredFile, options?: ChunkOptions): Chunk[];
40
+ /**
41
+ * Chunk multiple files
42
+ */
43
+ export declare function chunkFiles(files: DiscoveredFile[], options?: ChunkOptions): Chunk[];
44
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAErD,2CAA2C;AAC3C,MAAM,WAAW,KAAK;IACpB,sBAAsB;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,oBAAoB;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,kBAAkB;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,uBAAuB;AACvB,MAAM,WAAW,YAAY;IAC3B,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;CACtB;AAUD;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD;AAiBD;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,cAAc,EAAE,OAAO,GAAE,YAA8B,GAAG,KAAK,EAAE,CA8DhG;AAED;;GAEG;AACH,wBAAgB,UAAU,CACxB,KAAK,EAAE,cAAc,EAAE,EACvB,OAAO,GAAE,YAA8B,GACtC,KAAK,EAAE,CAST"}
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Chunking Module
3
+ *
4
+ * Splits files into chunks for embedding.
5
+ * Uses line-based chunking with configurable size and overlap.
6
+ */
7
+ import { encodingForModel } from 'js-tiktoken';
8
+ const DEFAULT_OPTIONS = {
9
+ chunkSize: 500,
10
+ chunkOverlap: 50,
11
+ };
12
+ // Use cl100k_base encoding (GPT-4/Claude compatible)
13
+ const encoder = encodingForModel('gpt-4');
14
+ /**
15
+ * Count tokens in a string
16
+ */
17
+ export function countTokens(text) {
18
+ return encoder.encode(text).length;
19
+ }
20
+ /**
21
+ * Generate a unique chunk ID
22
+ */
23
+ function generateChunkId(sourceId, filePath, startLine) {
24
+ const base = `${sourceId}:${filePath}:${startLine}`;
25
+ // Simple hash for ID
26
+ let hash = 0;
27
+ for (let i = 0; i < base.length; i++) {
28
+ const char = base.charCodeAt(i);
29
+ hash = (hash << 5) - hash + char;
30
+ hash = hash & hash; // Convert to 32-bit integer
31
+ }
32
+ return `chunk_${Math.abs(hash).toString(36)}`;
33
+ }
34
+ /**
35
+ * Chunk a single file into pieces
36
+ */
37
+ export function chunkFile(file, options = DEFAULT_OPTIONS) {
38
+ const lines = file.content.split('\n');
39
+ const chunks = [];
40
+ let currentLines = [];
41
+ let currentTokens = 0;
42
+ let startLine = 1;
43
+ for (let i = 0; i < lines.length; i++) {
44
+ const line = lines[i];
45
+ const lineTokens = countTokens(line + '\n');
46
+ // If adding this line exceeds chunk size, save current chunk
47
+ if (currentTokens + lineTokens > options.chunkSize && currentLines.length > 0) {
48
+ const content = currentLines.join('\n');
49
+ chunks.push({
50
+ id: generateChunkId(file.sourceId, file.relativePath, startLine),
51
+ sourceId: file.sourceId,
52
+ filePath: file.relativePath,
53
+ content,
54
+ startLine,
55
+ endLine: startLine + currentLines.length - 1,
56
+ tokens: currentTokens,
57
+ });
58
+ // Calculate overlap: keep last N tokens worth of lines
59
+ const overlapLines = [];
60
+ let overlapTokens = 0;
61
+ for (let j = currentLines.length - 1; j >= 0 && overlapTokens < options.chunkOverlap; j--) {
62
+ const overlapLine = currentLines[j];
63
+ const overlapLineTokens = countTokens(overlapLine + '\n');
64
+ overlapLines.unshift(overlapLine);
65
+ overlapTokens += overlapLineTokens;
66
+ }
67
+ // Start next chunk with overlap
68
+ currentLines = overlapLines;
69
+ currentTokens = overlapTokens;
70
+ startLine =
71
+ startLine + (chunks[chunks.length - 1].endLine - startLine + 1) - overlapLines.length;
72
+ }
73
+ currentLines.push(line);
74
+ currentTokens += lineTokens;
75
+ }
76
+ // Don't forget the last chunk
77
+ if (currentLines.length > 0) {
78
+ const content = currentLines.join('\n');
79
+ chunks.push({
80
+ id: generateChunkId(file.sourceId, file.relativePath, startLine),
81
+ sourceId: file.sourceId,
82
+ filePath: file.relativePath,
83
+ content,
84
+ startLine,
85
+ endLine: startLine + currentLines.length - 1,
86
+ tokens: countTokens(content),
87
+ });
88
+ }
89
+ return chunks;
90
+ }
91
+ /**
92
+ * Chunk multiple files
93
+ */
94
+ export function chunkFiles(files, options = DEFAULT_OPTIONS) {
95
+ const allChunks = [];
96
+ for (const file of files) {
97
+ const chunks = chunkFile(file, options);
98
+ allChunks.push(...chunks);
99
+ }
100
+ return allChunks;
101
+ }
102
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AA6B/C,MAAM,eAAe,GAAiB;IACpC,SAAS,EAAE,GAAG;IACd,YAAY,EAAE,EAAE;CACjB,CAAC;AAEF,qDAAqD;AACrD,MAAM,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;AAE1C;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,QAAgB,EAAE,QAAgB,EAAE,SAAiB;IAC5E,MAAM,IAAI,GAAG,GAAG,QAAQ,IAAI,QAAQ,IAAI,SAAS,EAAE,CAAC;IACpD,qBAAqB;IACrB,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAChC,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;QACjC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,4BAA4B;IAClD,CAAC;IACD,OAAO,SAAS,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;AAChD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAoB,EAAE,UAAwB,eAAe;IACrF,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAE5C,6DAA6D;QAC7D,IAAI,aAAa,GAAG,UAAU,GAAG,OAAO,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9E,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,SAAS,CAAC;gBAChE,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;gBAC3B,OAAO;gBACP,SAAS;gBACT,OAAO,EAAE,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC;gBAC5C,MAAM,EAAE,aAAa;aACtB,CAAC,CAAC;YAEH,uDAAuD;YACvD,MAAM,YAAY,GAAa,EAAE,CAAC;YAClC,IAAI,aAAa,GAAG,CAAC,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,aAAa,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;gBACpC,MAAM,iBAAiB,GAAG,WAAW,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC;gBAC1D,YAAY,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;gBAClC,aAAa,IAAI,iBAAiB,CAAC;YACrC,CAAC;YAED,gCAAgC;YAChC,YAAY,GAAG,YAAY,CAAC;YAC5B,aAAa,GAAG,aAAa,CAAC;YAC9B,SAAS;gBACP,SAAS,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC;QAC1F,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,aAAa,IAAI,UAAU,CAAC;IAC9B,CAAC;IAED,8BAA8B;IAC9B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,SAAS,CAAC;YAChE,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,OAAO;YACP,SAAS;YACT,OAAO,EAAE,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC;YAC5C,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC;SAC7B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CACxB,KAAuB,EACvB,UAAwB,eAAe;IAEvC,MAAM,SAAS,GAAY,EAAE,CAAC;IAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACxC,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
@@ -0,0 +1,34 @@
1
+ /**
2
+ * File Discovery Module
3
+ *
4
+ * Discovers files in sources based on include/exclude patterns.
5
+ */
6
+ import type { Source } from '../config/types.js';
7
+ /** File info returned by discovery */
8
+ export interface DiscoveredFile {
9
+ /** Absolute path to file */
10
+ path: string;
11
+ /** Path relative to source */
12
+ relativePath: string;
13
+ /** Source this file belongs to */
14
+ sourceId: string;
15
+ /** File content */
16
+ content: string;
17
+ /** File size in bytes */
18
+ size: number;
19
+ }
20
+ /** Discovery result for a source */
21
+ export interface DiscoveryResult {
22
+ sourceId: string;
23
+ files: DiscoveredFile[];
24
+ skipped: number;
25
+ }
26
+ /**
27
+ * Discover all files in a source
28
+ */
29
+ export declare function discoverFiles(source: Source, baseDir: string): DiscoveryResult;
30
+ /**
31
+ * Discover files from multiple sources
32
+ */
33
+ export declare function discoverAllFiles(sources: Source[], baseDir: string): DiscoveryResult[];
34
+ //# sourceMappingURL=discovery.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"discovery.d.ts","sourceRoot":"","sources":["../../src/indexer/discovery.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAKjD,sCAAsC;AACtC,MAAM,WAAW,cAAc;IAC7B,4BAA4B;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,8BAA8B;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,mBAAmB;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,oCAAoC;AACpC,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,cAAc,EAAE,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,eAAe,CAqD9E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,OAAO,EAAE,MAAM,GAAG,eAAe,EAAE,CAEtF"}
@@ -0,0 +1,66 @@
1
+ /**
2
+ * File Discovery Module
3
+ *
4
+ * Discovers files in sources based on include/exclude patterns.
5
+ */
6
+ import { resolve } from 'path';
7
+ import { readFileSync, statSync } from 'fs';
8
+ import fg from 'fast-glob';
9
+ /** Maximum file size to process (100KB) */
10
+ const MAX_FILE_SIZE = 100 * 1024;
11
+ /**
12
+ * Discover all files in a source
13
+ */
14
+ export function discoverFiles(source, baseDir) {
15
+ const sourcePath = resolve(baseDir, source.path);
16
+ // Find matching files
17
+ const matches = fg.sync(source.patterns.include, {
18
+ cwd: sourcePath,
19
+ ignore: source.patterns.exclude,
20
+ onlyFiles: true,
21
+ absolute: false,
22
+ });
23
+ const files = [];
24
+ let skipped = 0;
25
+ for (const relativePath of matches) {
26
+ const absolutePath = resolve(sourcePath, relativePath);
27
+ try {
28
+ const stats = statSync(absolutePath);
29
+ // Skip files that are too large
30
+ if (stats.size > MAX_FILE_SIZE) {
31
+ skipped++;
32
+ continue;
33
+ }
34
+ // Read file content
35
+ const content = readFileSync(absolutePath, 'utf-8');
36
+ // Skip binary files (simple heuristic: check for null bytes)
37
+ if (content.includes('\0')) {
38
+ skipped++;
39
+ continue;
40
+ }
41
+ files.push({
42
+ path: absolutePath,
43
+ relativePath,
44
+ sourceId: source.id,
45
+ content,
46
+ size: stats.size,
47
+ });
48
+ }
49
+ catch {
50
+ // Skip files we can't read
51
+ skipped++;
52
+ }
53
+ }
54
+ return {
55
+ sourceId: source.id,
56
+ files,
57
+ skipped,
58
+ };
59
+ }
60
+ /**
61
+ * Discover files from multiple sources
62
+ */
63
+ export function discoverAllFiles(sources, baseDir) {
64
+ return sources.map((source) => discoverFiles(source, baseDir));
65
+ }
66
+ //# sourceMappingURL=discovery.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"discovery.js","sourceRoot":"","sources":["../../src/indexer/discovery.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC5C,OAAO,EAAE,MAAM,WAAW,CAAC;AAG3B,2CAA2C;AAC3C,MAAM,aAAa,GAAG,GAAG,GAAG,IAAI,CAAC;AAuBjC;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,MAAc,EAAE,OAAe;IAC3D,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;IAEjD,sBAAsB;IACtB,MAAM,OAAO,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,EAAE;QAC/C,GAAG,EAAE,UAAU;QACf,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,OAAO;QAC/B,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,KAAK;KAChB,CAAC,CAAC;IAEH,MAAM,KAAK,GAAqB,EAAE,CAAC;IACnC,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,YAAY,IAAI,OAAO,EAAE,CAAC;QACnC,MAAM,YAAY,GAAG,OAAO,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;QAEvD,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;YAErC,gCAAgC;YAChC,IAAI,KAAK,CAAC,IAAI,GAAG,aAAa,EAAE,CAAC;gBAC/B,OAAO,EAAE,CAAC;gBACV,SAAS;YACX,CAAC;YAED,oBAAoB;YACpB,MAAM,OAAO,GAAG,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;YAEpD,6DAA6D;YAC7D,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC3B,OAAO,EAAE,CAAC;gBACV,SAAS;YACX,CAAC;YAED,KAAK,CAAC,IAAI,CAAC;gBACT,IAAI,EAAE,YAAY;gBAClB,YAAY;gBACZ,QAAQ,EAAE,MAAM,CAAC,EAAE;gBACnB,OAAO;gBACP,IAAI,EAAE,KAAK,CAAC,IAAI;aACjB,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,2BAA2B;YAC3B,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,OAAO;QACL,QAAQ,EAAE,MAAM,CAAC,EAAE;QACnB,KAAK;QACL,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAiB,EAAE,OAAe;IACjE,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,aAAa,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;AACjE,CAAC"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Embeddings Module
3
+ *
4
+ * Generates embeddings using local models via @xenova/transformers.
5
+ * Uses gte-small (384 dimensions) by default.
6
+ */
7
+ import type { Chunk } from './chunker.js';
8
+ /** Embedding dimension for gte-small */
9
+ export declare const EMBEDDING_DIM = 384;
10
+ /** Chunk with embedding */
11
+ export interface EmbeddedChunk extends Chunk {
12
+ embedding: number[];
13
+ }
14
+ /** Progress callback */
15
+ export type ProgressCallback = (current: number, total: number) => void;
16
+ /**
17
+ * Generate embedding for a single text
18
+ */
19
+ export declare function embed(text: string): Promise<number[]>;
20
+ /**
21
+ * Generate embeddings for multiple texts in batches
22
+ */
23
+ export declare function embedBatch(texts: string[], onProgress?: ProgressCallback): Promise<number[][]>;
24
+ /**
25
+ * Generate embeddings for chunks
26
+ */
27
+ export declare function embedChunks(chunks: Chunk[], onProgress?: ProgressCallback): Promise<EmbeddedChunk[]>;
28
+ /**
29
+ * Compute cosine similarity between two vectors
30
+ */
31
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
32
+ //# sourceMappingURL=embeddings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../src/indexer/embeddings.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAK1C,wCAAwC;AACxC,eAAO,MAAM,aAAa,MAAM,CAAC;AAKjC,2BAA2B;AAC3B,MAAM,WAAW,aAAc,SAAQ,KAAK;IAC1C,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,wBAAwB;AACxB,MAAM,MAAM,gBAAgB,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;AAiBxE;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAI3D;AAED;;GAEG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,CAAC,EAAE,gBAAgB,GAC5B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAwBrB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,MAAM,EAAE,KAAK,EAAE,EACf,UAAU,CAAC,EAAE,gBAAgB,GAC5B,OAAO,CAAC,aAAa,EAAE,CAAC,CAQ1B;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE"}
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Embeddings Module
3
+ *
4
+ * Generates embeddings using local models via @xenova/transformers.
5
+ * Uses gte-small (384 dimensions) by default.
6
+ */
7
+ import { pipeline } from '@xenova/transformers';
8
+ /** Default embedding model */
9
+ const DEFAULT_MODEL = 'Xenova/gte-small';
10
+ /** Embedding dimension for gte-small */
11
+ export const EMBEDDING_DIM = 384;
12
+ /** Batch size for embedding generation */
13
+ const BATCH_SIZE = 32;
14
+ // Singleton pipeline instance
15
+ let embeddingPipeline = null;
16
+ /**
17
+ * Initialize the embedding pipeline
18
+ */
19
+ async function getEmbeddingPipeline() {
20
+ if (!embeddingPipeline) {
21
+ embeddingPipeline = await pipeline('feature-extraction', DEFAULT_MODEL, {
22
+ quantized: true, // Use quantized model for speed
23
+ });
24
+ }
25
+ return embeddingPipeline;
26
+ }
27
+ /**
28
+ * Generate embedding for a single text
29
+ */
30
+ export async function embed(text) {
31
+ const pipe = await getEmbeddingPipeline();
32
+ const output = await pipe(text, { pooling: 'mean', normalize: true });
33
+ return Array.from(output.data);
34
+ }
35
+ /**
36
+ * Generate embeddings for multiple texts in batches
37
+ */
38
+ export async function embedBatch(texts, onProgress) {
39
+ const pipe = await getEmbeddingPipeline();
40
+ const embeddings = [];
41
+ for (let i = 0; i < texts.length; i += BATCH_SIZE) {
42
+ const batch = texts.slice(i, i + BATCH_SIZE);
43
+ // Process batch
44
+ const outputs = await Promise.all(batch.map(async (text) => {
45
+ const output = await pipe(text, { pooling: 'mean', normalize: true });
46
+ return Array.from(output.data);
47
+ }));
48
+ embeddings.push(...outputs);
49
+ // Report progress
50
+ if (onProgress) {
51
+ onProgress(Math.min(i + BATCH_SIZE, texts.length), texts.length);
52
+ }
53
+ }
54
+ return embeddings;
55
+ }
56
+ /**
57
+ * Generate embeddings for chunks
58
+ */
59
+ export async function embedChunks(chunks, onProgress) {
60
+ const texts = chunks.map((c) => c.content);
61
+ const embeddings = await embedBatch(texts, onProgress);
62
+ return chunks.map((chunk, i) => ({
63
+ ...chunk,
64
+ embedding: embeddings[i],
65
+ }));
66
+ }
67
+ /**
68
+ * Compute cosine similarity between two vectors
69
+ */
70
+ export function cosineSimilarity(a, b) {
71
+ if (a.length !== b.length) {
72
+ throw new Error('Vectors must have the same length');
73
+ }
74
+ let dotProduct = 0;
75
+ let normA = 0;
76
+ let normB = 0;
77
+ for (let i = 0; i < a.length; i++) {
78
+ dotProduct += a[i] * b[i];
79
+ normA += a[i] * a[i];
80
+ normB += b[i] * b[i];
81
+ }
82
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
83
+ return magnitude === 0 ? 0 : dotProduct / magnitude;
84
+ }
85
+ //# sourceMappingURL=embeddings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/indexer/embeddings.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAkC,MAAM,sBAAsB,CAAC;AAGhF,8BAA8B;AAC9B,MAAM,aAAa,GAAG,kBAAkB,CAAC;AAEzC,wCAAwC;AACxC,MAAM,CAAC,MAAM,aAAa,GAAG,GAAG,CAAC;AAEjC,0CAA0C;AAC1C,MAAM,UAAU,GAAG,EAAE,CAAC;AAUtB,8BAA8B;AAC9B,IAAI,iBAAiB,GAAqC,IAAI,CAAC;AAE/D;;GAEG;AACH,KAAK,UAAU,oBAAoB;IACjC,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACvB,iBAAiB,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,aAAa,EAAE;YACtE,SAAS,EAAE,IAAI,EAAE,gCAAgC;SAClD,CAAC,CAAC;IACL,CAAC;IACD,OAAO,iBAAiB,CAAC;AAC3B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY;IACtC,MAAM,IAAI,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAC1C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACtE,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAoB,CAAC,CAAC;AACjD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,KAAe,EACf,UAA6B;IAE7B,MAAM,IAAI,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAC1C,MAAM,UAAU,GAAe,EAAE,CAAC;IAElC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;QAE7C,gBAAgB;QAChB,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YACvB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAoB,CAAC,CAAC;QACjD,CAAC,CAAC,CACH,CAAC;QAEF,UAAU,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC;QAE5B,kBAAkB;QAClB,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,MAAe,EACf,UAA6B;IAE7B,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,UAAU,GAAG,MAAM,UAAU,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IAEvD,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC/B,GAAG,KAAK;QACR,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC;KACzB,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,CAAW,EAAE,CAAW;IACvD,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtD,OAAO,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,SAAS,CAAC;AACtD,CAAC"}
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Main Indexer Module
3
+ *
4
+ * Orchestrates file discovery, chunking, embedding, and storage.
5
+ */
6
+ import Database from 'better-sqlite3';
7
+ import { type ChunkOptions } from './chunker.js';
8
+ import type { Source } from '../config/types.js';
9
+ /** Indexing statistics */
10
+ export interface IndexStats {
11
+ sources: number;
12
+ files: number;
13
+ chunks: number;
14
+ skipped: number;
15
+ timeMs: number;
16
+ }
17
+ /** Progress update */
18
+ export interface IndexProgress {
19
+ phase: 'discovery' | 'chunking' | 'embedding' | 'storing';
20
+ sourceId: string;
21
+ current: number;
22
+ total: number;
23
+ }
24
+ /** Progress callback */
25
+ export type IndexProgressCallback = (progress: IndexProgress) => void;
26
+ /**
27
+ * Index all sources
28
+ */
29
+ export declare function indexSources(sources: Source[], baseDir: string, db: Database.Database, chunkOptions: ChunkOptions, onProgress?: IndexProgressCallback): Promise<IndexStats>;
30
+ /**
31
+ * Read embedding from blob
32
+ */
33
+ export declare function readEmbedding(blob: Buffer): number[];
34
+ export { discoverFiles, type DiscoveredFile, type DiscoveryResult } from './discovery.js';
35
+ export { chunkFiles, chunkFile, countTokens, type Chunk, type ChunkOptions } from './chunker.js';
36
+ export { embed, embedBatch, embedChunks, cosineSimilarity, EMBEDDING_DIM, type EmbeddedChunk, } from './embeddings.js';
37
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/indexer/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAEtC,OAAO,EAAc,KAAK,YAAY,EAAE,MAAM,cAAc,CAAC;AAE7D,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,0BAA0B;AAC1B,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,sBAAsB;AACtB,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,WAAW,GAAG,UAAU,GAAG,WAAW,GAAG,SAAS,CAAC;IAC1D,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wBAAwB;AACxB,MAAM,MAAM,qBAAqB,GAAG,CAAC,QAAQ,EAAE,aAAa,KAAK,IAAI,CAAC;AAEtE;;GAEG;AACH,wBAAsB,YAAY,CAChC,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE,MAAM,EACf,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,YAAY,EAAE,YAAY,EAC1B,UAAU,CAAC,EAAE,qBAAqB,GACjC,OAAO,CAAC,UAAU,CAAC,CA+ErB;AAwDD;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAGpD;AAGD,OAAO,EAAE,aAAa,EAAE,KAAK,cAAc,EAAE,KAAK,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAC1F,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,KAAK,KAAK,EAAE,KAAK,YAAY,EAAE,MAAM,cAAc,CAAC;AACjG,OAAO,EACL,KAAK,EACL,UAAU,EACV,WAAW,EACX,gBAAgB,EAChB,aAAa,EACb,KAAK,aAAa,GACnB,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Main Indexer Module
3
+ *
4
+ * Orchestrates file discovery, chunking, embedding, and storage.
5
+ */
6
+ import { discoverFiles } from './discovery.js';
7
+ import { chunkFiles } from './chunker.js';
8
+ import { embedChunks } from './embeddings.js';
9
+ /**
10
+ * Index all sources
11
+ */
12
+ export async function indexSources(sources, baseDir, db, chunkOptions, onProgress) {
13
+ const startTime = Date.now();
14
+ let totalFiles = 0;
15
+ let totalChunks = 0;
16
+ let totalSkipped = 0;
17
+ for (const source of sources) {
18
+ // Phase 1: Discovery
19
+ onProgress?.({
20
+ phase: 'discovery',
21
+ sourceId: source.id,
22
+ current: 0,
23
+ total: 0,
24
+ });
25
+ const discovered = discoverFiles(source, baseDir);
26
+ totalFiles += discovered.files.length;
27
+ totalSkipped += discovered.skipped;
28
+ onProgress?.({
29
+ phase: 'discovery',
30
+ sourceId: source.id,
31
+ current: discovered.files.length,
32
+ total: discovered.files.length,
33
+ });
34
+ // Phase 2: Chunking
35
+ onProgress?.({
36
+ phase: 'chunking',
37
+ sourceId: source.id,
38
+ current: 0,
39
+ total: discovered.files.length,
40
+ });
41
+ const chunks = chunkFiles(discovered.files, chunkOptions);
42
+ onProgress?.({
43
+ phase: 'chunking',
44
+ sourceId: source.id,
45
+ current: discovered.files.length,
46
+ total: discovered.files.length,
47
+ });
48
+ // Phase 3: Embedding
49
+ const embeddedChunks = await embedChunks(chunks, (current, total) => {
50
+ onProgress?.({
51
+ phase: 'embedding',
52
+ sourceId: source.id,
53
+ current,
54
+ total,
55
+ });
56
+ });
57
+ // Phase 4: Store in database
58
+ onProgress?.({
59
+ phase: 'storing',
60
+ sourceId: source.id,
61
+ current: 0,
62
+ total: embeddedChunks.length,
63
+ });
64
+ storeChunks(db, source.id, source.path, embeddedChunks, discovered.files.length);
65
+ totalChunks += embeddedChunks.length;
66
+ onProgress?.({
67
+ phase: 'storing',
68
+ sourceId: source.id,
69
+ current: embeddedChunks.length,
70
+ total: embeddedChunks.length,
71
+ });
72
+ }
73
+ return {
74
+ sources: sources.length,
75
+ files: totalFiles,
76
+ chunks: totalChunks,
77
+ skipped: totalSkipped,
78
+ timeMs: Date.now() - startTime,
79
+ };
80
+ }
81
+ /**
82
+ * Store chunks in the database
83
+ */
84
+ function storeChunks(db, sourceId, sourcePath, chunks, fileCount) {
85
+ // Begin transaction for performance
86
+ const transaction = db.transaction(() => {
87
+ // Clear existing chunks for this source
88
+ db.prepare('DELETE FROM chunks WHERE source_id = ?').run(sourceId);
89
+ // Update source record
90
+ db.prepare(`
91
+ INSERT INTO sources (id, path, file_count, chunk_count, indexed_at)
92
+ VALUES (?, ?, ?, ?, datetime('now'))
93
+ ON CONFLICT(id) DO UPDATE SET
94
+ path = excluded.path,
95
+ file_count = excluded.file_count,
96
+ chunk_count = excluded.chunk_count,
97
+ indexed_at = excluded.indexed_at
98
+ `).run(sourceId, sourcePath, fileCount, chunks.length);
99
+ // Insert chunks
100
+ const insertChunk = db.prepare(`
101
+ INSERT INTO chunks (id, source_id, file_path, content, start_line, end_line, tokens, embedding)
102
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
103
+ `);
104
+ for (const chunk of chunks) {
105
+ // Convert embedding to binary blob
106
+ const embeddingBlob = Buffer.from(new Float32Array(chunk.embedding).buffer);
107
+ insertChunk.run(chunk.id, chunk.sourceId, chunk.filePath, chunk.content, chunk.startLine, chunk.endLine, chunk.tokens, embeddingBlob);
108
+ }
109
+ });
110
+ transaction();
111
+ }
112
+ /**
113
+ * Read embedding from blob
114
+ */
115
+ export function readEmbedding(blob) {
116
+ const float32Array = new Float32Array(blob.buffer, blob.byteOffset, blob.length / 4);
117
+ return Array.from(float32Array);
118
+ }
119
+ // Re-export types and functions
120
+ export { discoverFiles } from './discovery.js';
121
+ export { chunkFiles, chunkFile, countTokens } from './chunker.js';
122
+ export { embed, embedBatch, embedChunks, cosineSimilarity, EMBEDDING_DIM, } from './embeddings.js';
123
+ //# sourceMappingURL=index.js.map