@vdpeijl/kb-mcp 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +416 -0
  2. package/dist/scripts/postinstall.d.ts +3 -0
  3. package/dist/scripts/postinstall.d.ts.map +1 -0
  4. package/dist/scripts/postinstall.js +110 -0
  5. package/dist/scripts/postinstall.js.map +1 -0
  6. package/dist/src/cli.d.ts +3 -0
  7. package/dist/src/cli.d.ts.map +1 -0
  8. package/dist/src/cli.js +22 -0
  9. package/dist/src/cli.js.map +1 -0
  10. package/dist/src/commands/doctor.d.ts +3 -0
  11. package/dist/src/commands/doctor.d.ts.map +1 -0
  12. package/dist/src/commands/doctor.js +93 -0
  13. package/dist/src/commands/doctor.js.map +1 -0
  14. package/dist/src/commands/init.d.ts +3 -0
  15. package/dist/src/commands/init.d.ts.map +1 -0
  16. package/dist/src/commands/init.js +150 -0
  17. package/dist/src/commands/init.js.map +1 -0
  18. package/dist/src/commands/serve.d.ts +3 -0
  19. package/dist/src/commands/serve.d.ts.map +1 -0
  20. package/dist/src/commands/serve.js +22 -0
  21. package/dist/src/commands/serve.js.map +1 -0
  22. package/dist/src/commands/setup.d.ts +9 -0
  23. package/dist/src/commands/setup.d.ts.map +1 -0
  24. package/dist/src/commands/setup.js +115 -0
  25. package/dist/src/commands/setup.js.map +1 -0
  26. package/dist/src/commands/sources.d.ts +3 -0
  27. package/dist/src/commands/sources.d.ts.map +1 -0
  28. package/dist/src/commands/sources.js +258 -0
  29. package/dist/src/commands/sources.js.map +1 -0
  30. package/dist/src/commands/stats.d.ts +3 -0
  31. package/dist/src/commands/stats.d.ts.map +1 -0
  32. package/dist/src/commands/stats.js +48 -0
  33. package/dist/src/commands/stats.js.map +1 -0
  34. package/dist/src/commands/sync.d.ts +13 -0
  35. package/dist/src/commands/sync.d.ts.map +1 -0
  36. package/dist/src/commands/sync.js +106 -0
  37. package/dist/src/commands/sync.js.map +1 -0
  38. package/dist/src/config/index.d.ts +18 -0
  39. package/dist/src/config/index.d.ts.map +1 -0
  40. package/dist/src/config/index.js +67 -0
  41. package/dist/src/config/index.js.map +1 -0
  42. package/dist/src/config/paths.d.ts +21 -0
  43. package/dist/src/config/paths.d.ts.map +1 -0
  44. package/dist/src/config/paths.js +38 -0
  45. package/dist/src/config/paths.js.map +1 -0
  46. package/dist/src/config/schema.d.ts +133 -0
  47. package/dist/src/config/schema.d.ts.map +1 -0
  48. package/dist/src/config/schema.js +34 -0
  49. package/dist/src/config/schema.js.map +1 -0
  50. package/dist/src/db/articles.d.ts +39 -0
  51. package/dist/src/db/articles.d.ts.map +1 -0
  52. package/dist/src/db/articles.js +103 -0
  53. package/dist/src/db/articles.js.map +1 -0
  54. package/dist/src/db/chunks.d.ts +46 -0
  55. package/dist/src/db/chunks.d.ts.map +1 -0
  56. package/dist/src/db/chunks.js +129 -0
  57. package/dist/src/db/chunks.js.map +1 -0
  58. package/dist/src/db/index.d.ts +19 -0
  59. package/dist/src/db/index.d.ts.map +1 -0
  60. package/dist/src/db/index.js +86 -0
  61. package/dist/src/db/index.js.map +1 -0
  62. package/dist/src/db/schema.d.ts +10 -0
  63. package/dist/src/db/schema.d.ts.map +1 -0
  64. package/dist/src/db/schema.js +80 -0
  65. package/dist/src/db/schema.js.map +1 -0
  66. package/dist/src/db/sources.d.ts +48 -0
  67. package/dist/src/db/sources.d.ts.map +1 -0
  68. package/dist/src/db/sources.js +110 -0
  69. package/dist/src/db/sources.js.map +1 -0
  70. package/dist/src/index.d.ts +8 -0
  71. package/dist/src/index.d.ts.map +1 -0
  72. package/dist/src/index.js +87 -0
  73. package/dist/src/index.js.map +1 -0
  74. package/dist/src/search/embeddings.d.ts +22 -0
  75. package/dist/src/search/embeddings.d.ts.map +1 -0
  76. package/dist/src/search/embeddings.js +122 -0
  77. package/dist/src/search/embeddings.js.map +1 -0
  78. package/dist/src/search/index.d.ts +21 -0
  79. package/dist/src/search/index.d.ts.map +1 -0
  80. package/dist/src/search/index.js +50 -0
  81. package/dist/src/search/index.js.map +1 -0
  82. package/dist/src/sync/chunker.d.ts +15 -0
  83. package/dist/src/sync/chunker.d.ts.map +1 -0
  84. package/dist/src/sync/chunker.js +117 -0
  85. package/dist/src/sync/chunker.js.map +1 -0
  86. package/dist/src/sync/index.d.ts +24 -0
  87. package/dist/src/sync/index.d.ts.map +1 -0
  88. package/dist/src/sync/index.js +180 -0
  89. package/dist/src/sync/index.js.map +1 -0
  90. package/dist/src/sync/parser.d.ts +9 -0
  91. package/dist/src/sync/parser.d.ts.map +1 -0
  92. package/dist/src/sync/parser.js +91 -0
  93. package/dist/src/sync/parser.js.map +1 -0
  94. package/dist/src/sync/zendesk.d.ts +45 -0
  95. package/dist/src/sync/zendesk.d.ts.map +1 -0
  96. package/dist/src/sync/zendesk.js +161 -0
  97. package/dist/src/sync/zendesk.js.map +1 -0
  98. package/dist/src/utils/errors.d.ts +39 -0
  99. package/dist/src/utils/errors.d.ts.map +1 -0
  100. package/dist/src/utils/errors.js +62 -0
  101. package/dist/src/utils/errors.js.map +1 -0
  102. package/dist/src/utils/logger.d.ts +21 -0
  103. package/dist/src/utils/logger.d.ts.map +1 -0
  104. package/dist/src/utils/logger.js +25 -0
  105. package/dist/src/utils/logger.js.map +1 -0
  106. package/package.json +57 -0
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Insert or update a source
3
+ */
4
+ export function upsertSource(db, source) {
5
+ const stmt = db.prepare(`
6
+ INSERT INTO sources (id, name, base_url, locale, enabled)
7
+ VALUES (?, ?, ?, ?, ?)
8
+ ON CONFLICT(id) DO UPDATE SET
9
+ name = excluded.name,
10
+ base_url = excluded.base_url,
11
+ locale = excluded.locale,
12
+ enabled = excluded.enabled
13
+ `);
14
+ stmt.run(source.id, source.name, source.baseUrl, source.locale, source.enabled ? 1 : 0);
15
+ }
16
+ /**
17
+ * Get a source by ID
18
+ */
19
+ export function getSource(db, id) {
20
+ const stmt = db.prepare('SELECT * FROM sources WHERE id = ?');
21
+ const row = stmt.get(id);
22
+ if (!row) {
23
+ return null;
24
+ }
25
+ return {
26
+ id: row.id,
27
+ name: row.name,
28
+ baseUrl: row.base_url,
29
+ locale: row.locale,
30
+ enabled: row.enabled === 1,
31
+ };
32
+ }
33
+ /**
34
+ * Get all sources
35
+ */
36
+ export function getAllSources(db) {
37
+ const stmt = db.prepare('SELECT * FROM sources ORDER BY name');
38
+ const rows = stmt.all();
39
+ return rows.map(row => ({
40
+ id: row.id,
41
+ name: row.name,
42
+ baseUrl: row.base_url,
43
+ locale: row.locale,
44
+ enabled: row.enabled === 1,
45
+ }));
46
+ }
47
+ /**
48
+ * Get enabled sources only
49
+ */
50
+ export function getEnabledSources(db) {
51
+ const stmt = db.prepare('SELECT * FROM sources WHERE enabled = 1 ORDER BY name');
52
+ const rows = stmt.all();
53
+ return rows.map(row => ({
54
+ id: row.id,
55
+ name: row.name,
56
+ baseUrl: row.base_url,
57
+ locale: row.locale,
58
+ enabled: true,
59
+ }));
60
+ }
61
+ /**
62
+ * Get sources with statistics
63
+ */
64
+ export function getSourcesWithStats(db) {
65
+ const stmt = db.prepare(`
66
+ SELECT
67
+ s.*,
68
+ COUNT(DISTINCT a.id) as article_count,
69
+ COUNT(c.id) as chunk_count
70
+ FROM sources s
71
+ LEFT JOIN articles a ON s.id = a.source_id
72
+ LEFT JOIN chunks c ON a.id = c.article_id AND a.source_id = c.source_id
73
+ GROUP BY s.id
74
+ ORDER BY s.name
75
+ `);
76
+ const rows = stmt.all();
77
+ return rows.map(row => ({
78
+ id: row.id,
79
+ name: row.name,
80
+ baseUrl: row.base_url,
81
+ locale: row.locale,
82
+ enabled: row.enabled === 1,
83
+ lastSyncedAt: row.last_synced_at ? new Date(row.last_synced_at) : null,
84
+ articleCount: row.article_count,
85
+ chunkCount: row.chunk_count,
86
+ }));
87
+ }
88
+ /**
89
+ * Delete a source and all its data
90
+ */
91
+ export function deleteSource(db, id) {
92
+ // Foreign key constraints will cascade delete articles and chunks
93
+ const stmt = db.prepare('DELETE FROM sources WHERE id = ?');
94
+ stmt.run(id);
95
+ }
96
+ /**
97
+ * Update source enabled status
98
+ */
99
+ export function setSourceEnabled(db, id, enabled) {
100
+ const stmt = db.prepare('UPDATE sources SET enabled = ? WHERE id = ?');
101
+ stmt.run(enabled ? 1 : 0, id);
102
+ }
103
+ /**
104
+ * Update last synced timestamp
105
+ */
106
+ export function updateLastSynced(db, id) {
107
+ const stmt = db.prepare('UPDATE sources SET last_synced_at = ? WHERE id = ?');
108
+ stmt.run(new Date().toISOString(), id);
109
+ }
110
+ //# sourceMappingURL=sources.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sources.js","sourceRoot":"","sources":["../../../src/db/sources.ts"],"names":[],"mappings":"AAkBA;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,EAAqB,EAAE,MAAc;IAChE,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;;GAQvB,CAAC,CAAC;IAEH,IAAI,CAAC,GAAG,CACN,MAAM,CAAC,EAAE,EACT,MAAM,CAAC,IAAI,EACX,MAAM,CAAC,OAAO,EACd,MAAM,CAAC,MAAM,EACb,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CACvB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,EAAqB,EAAE,EAAU;IACzD,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,oCAAoC,CAAC,CAAC;IAC9D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAA0B,CAAC;IAElD,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,OAAO,EAAE,GAAG,CAAC,QAAQ;QACrB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,OAAO,EAAE,GAAG,CAAC,OAAO,KAAK,CAAC;KAC3B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,EAAqB;IACjD,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAiB,CAAC;IAEvC,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtB,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,OAAO,EAAE,GAAG,CAAC,QAAQ;QACrB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,OAAO,EAAE,GAAG,CAAC,OAAO,KAAK,CAAC;KAC3B,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,EAAqB;IACrD,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,uDAAuD,CAAC,CAAC;IACjF,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAiB,CAAC;IAEvC,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtB,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,OAAO,EAAE,GAAG,CAAC,QAAQ;QACrB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,OAAO,EAAE,IAAI;KACd,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,EAAqB;IACvD,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;;;;GAUvB,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAuE,CAAC;IAE7F,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtB,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,OAAO,EAAE,GAAG,CAAC,QAAQ;QACrB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,OAAO,EAAE,GAAG,CAAC,OAAO,KAAK,CAAC;QAC1B,YAAY,EAAE,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI;QACtE,YAAY,EAAE,GAAG,CAAC,aAAa;QAC/B,UAAU,EAAE,GAAG,CAAC,WAAW;KAC5B,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,EAAqB,EAAE,EAAU;IAC5D,kEAAkE;IAClE,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,kCAAkC,CAAC,CAAC;IAC5D,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAAqB,EAAE,EAAU,EAAE,OAAgB;IAClF,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,6CAA6C,CAAC,CAAC;IACvE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAAqB,EAAE,EAAU;IAChE,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,oDAAoD,CAAC,CAAC;IAC9E,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,CAAC,CAAC;AACzC,CAAC"}
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
+ /**
4
+ * Create and configure the MCP server
5
+ */
6
+ declare function createServer(): Promise<McpServer>;
7
+ export { createServer };
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAQpE;;GAEG;AACH,iBAAe,YAAY,uBA2E1B;AAiBD,OAAO,EAAE,YAAY,EAAE,CAAC"}
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
+ import { z } from 'zod';
5
+ import { loadConfig } from './config/index.js';
6
+ import { getDatabase } from './db/index.js';
7
+ import { getSourcesWithStats } from './db/sources.js';
8
+ import { search } from './search/index.js';
9
+ /**
10
+ * Create and configure the MCP server
11
+ */
12
+ async function createServer() {
13
+ const config = await loadConfig();
14
+ const db = await getDatabase();
15
+ const server = new McpServer({
16
+ name: 'kb-mcp',
17
+ version: '0.1.0',
18
+ });
19
+ // Register search tool
20
+ server.registerTool('search_knowledge_base', {
21
+ description: 'Search the knowledge base for documentation, guides, and help articles. Returns relevant excerpts with source links.',
22
+ inputSchema: z.object({
23
+ query: z.string().describe('Natural language search query'),
24
+ sources: z.array(z.string()).optional().describe('Filter by source IDs (optional, searches all if omitted)'),
25
+ limit: z.number().optional().describe('Maximum results to return (default: 5, max: 20)'),
26
+ }),
27
+ }, async ({ query, sources, limit }) => {
28
+ const results = await search(db, config, query, { sources, limit });
29
+ const formatted = results
30
+ .map((result, i) => {
31
+ const relevancePercent = Math.round(result.relevance * 100);
32
+ return [
33
+ `${i + 1}. **${result.title}**`,
34
+ ` Source: ${result.sourceId}`,
35
+ ` Relevance: ${relevancePercent}%`,
36
+ ` URL: ${result.url}`,
37
+ ` Excerpt: ${result.excerpt}`,
38
+ '',
39
+ ].join('\n');
40
+ })
41
+ .join('\n');
42
+ return {
43
+ content: [{ type: 'text', text: formatted || 'No results found.' }],
44
+ };
45
+ });
46
+ // Register list sources tool
47
+ server.registerTool('list_sources', {
48
+ description: 'List all configured knowledge base sources and their sync status',
49
+ }, async () => {
50
+ const sources = getSourcesWithStats(db);
51
+ const formatted = sources
52
+ .map((source) => {
53
+ const lastSynced = source.lastSyncedAt?.toLocaleString() ?? 'Never';
54
+ return [
55
+ `**${source.name}** (${source.id})`,
56
+ ` Status: ${source.enabled ? 'Enabled' : 'Disabled'}`,
57
+ ` URL: ${source.baseUrl}`,
58
+ ` Locale: ${source.locale}`,
59
+ ` Articles: ${source.articleCount}`,
60
+ ` Chunks: ${source.chunkCount}`,
61
+ ` Last Synced: ${lastSynced}`,
62
+ '',
63
+ ].join('\n');
64
+ })
65
+ .join('\n');
66
+ return {
67
+ content: [{ type: 'text', text: formatted || 'No sources configured.' }],
68
+ };
69
+ });
70
+ return server;
71
+ }
72
+ async function main() {
73
+ try {
74
+ const server = await createServer();
75
+ const transport = new StdioServerTransport();
76
+ await server.connect(transport);
77
+ }
78
+ catch (error) {
79
+ console.error('Failed to start MCP server:', error);
80
+ process.exit(1);
81
+ }
82
+ }
83
+ if (import.meta.url === `file://${process.argv[1]}`) {
84
+ main();
85
+ }
86
+ export { createServer };
87
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC5C,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C;;GAEG;AACH,KAAK,UAAU,YAAY;IACzB,MAAM,MAAM,GAAG,MAAM,UAAU,EAAE,CAAC;IAClC,MAAM,EAAE,GAAG,MAAM,WAAW,EAAE,CAAC;IAE/B,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,OAAO;KACjB,CAAC,CAAC;IAEH,uBAAuB;IACvB,MAAM,CAAC,YAAY,CACjB,uBAAuB,EACvB;QACE,WAAW,EAAE,sHAAsH;QACnI,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;YACpB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+BAA+B,CAAC;YAC3D,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,0DAA0D,CAAC;YAC5G,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,iDAAiD,CAAC;SACzF,CAAC;KACH,EACD,KAAK,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE;QAClC,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAEpE,MAAM,SAAS,GAAG,OAAO;aACtB,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACjB,MAAM,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;YAC5D,OAAO;gBACL,GAAG,CAAC,GAAG,CAAC,OAAO,MAAM,CAAC,KAAK,IAAI;gBAC/B,cAAc,MAAM,CAAC,QAAQ,EAAE;gBAC/B,iBAAiB,gBAAgB,GAAG;gBACpC,WAAW,MAAM,CAAC,GAAG,EAAE;gBACvB,eAAe,MAAM,CAAC,OAAO,EAAE;gBAC/B,EAAE;aACH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACf,CAAC,CAAC;aACD,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,IAAI,mBAAmB,EAAE,CAAC;SACpE,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,6BAA6B;IAC7B,MAAM,CAAC,YAAY,CACjB,cAAc,EACd;QACE,WAAW,EAAE,kEAAkE;KAChF,EACD,KAAK,IAAI,EAAE;QACT,MAAM,OAAO,GAAG,mBAAmB,CAAC,EAAE,CAAC,CAAC;QAExC,MAAM,SAAS,GAAG,OAAO;aACtB,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;YACd,MAAM,UAAU,GAAG,MAAM,CAAC,YAAY,EAAE,cAAc,EAAE,IAAI,OAAO,CAAC;YACpE,OAAO;gBACL,KAAK,MAAM,CAAC,IAAI,OAAO,MAAM,CAAC,EAAE,GAAG;gBACnC,aAAa,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,EAAE;gBACtD,UAAU,MAAM,CAAC,OAAO,EAAE;gBAC1B,aAAa,MAAM,CAAC,MAAM,EAAE;gBAC5B,eAAe,MAAM,CAAC,YAAY,EAAE;gBACpC,aAAa,MAAM,CAAC,UAAU,EAAE;gBAChC,kBAAkB,UAAU,EAAE;gBAC9B,EAAE;aACH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACf,CAAC,CAAC;aACD,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,IAAI,wBAAwB,EAAE,CAAC;SACvE,CAAC;IACN,CAAC,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;QAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,KAAK,CAAC,CAAC;QACpD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,UAAU,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IACpD,IAAI,EAAE,CAAC;AACT,CAAC;AAED,OAAO,EAAE,YAAY,EAAE,CAAC"}
@@ -0,0 +1,22 @@
1
+ import type { OllamaConfig } from '../config/schema.js';
2
+ /**
3
+ * Check if Ollama is running and accessible
4
+ */
5
+ export declare function checkOllamaConnection(config: OllamaConfig): Promise<boolean>;
6
+ /**
7
+ * Check if the embedding model is available
8
+ */
9
+ export declare function checkModelAvailable(config: OllamaConfig): Promise<boolean>;
10
+ /**
11
+ * Generate an embedding for a single text using Ollama
12
+ */
13
+ export declare function generateEmbedding(text: string, config: OllamaConfig): Promise<Float32Array>;
14
+ /**
15
+ * Generate embeddings for multiple texts with concurrency control
16
+ */
17
+ export declare function generateEmbeddingsBatch(texts: string[], config: OllamaConfig, concurrency?: number, onProgress?: (completed: number, total: number) => void): Promise<Float32Array[]>;
18
+ /**
19
+ * Estimate token count (rough approximation: 4 characters = 1 token)
20
+ */
21
+ export declare function estimateTokenCount(text: string): number;
22
+ //# sourceMappingURL=embeddings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../../src/search/embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAUxD;;GAEG;AACH,wBAAsB,qBAAqB,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,CAUlF;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,CAehF;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,YAAY,GACnB,OAAO,CAAC,YAAY,CAAC,CA+CvB;AA4CD;;GAEG;AACH,wBAAsB,uBAAuB,CAC3C,KAAK,EAAE,MAAM,EAAE,EACf,MAAM,EAAE,YAAY,EACpB,WAAW,GAAE,MAAU,EACvB,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GACtD,OAAO,CAAC,YAAY,EAAE,CAAC,CAWzB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD"}
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Check if Ollama is running and accessible
3
+ */
4
+ export async function checkOllamaConnection(config) {
5
+ try {
6
+ const response = await fetch(`${config.baseUrl}/api/tags`, {
7
+ method: 'GET',
8
+ });
9
+ return response.ok;
10
+ }
11
+ catch (error) {
12
+ return false;
13
+ }
14
+ }
15
+ /**
16
+ * Check if the embedding model is available
17
+ */
18
+ export async function checkModelAvailable(config) {
19
+ try {
20
+ const response = await fetch(`${config.baseUrl}/api/tags`, {
21
+ method: 'GET',
22
+ });
23
+ if (!response.ok) {
24
+ return false;
25
+ }
26
+ const data = (await response.json());
27
+ return data.models.some(m => m.name === config.model || m.name.startsWith(`${config.model}:`));
28
+ }
29
+ catch (error) {
30
+ return false;
31
+ }
32
+ }
33
+ /**
34
+ * Generate an embedding for a single text using Ollama
35
+ */
36
+ export async function generateEmbedding(text, config) {
37
+ try {
38
+ const response = await fetch(`${config.baseUrl}/api/embeddings`, {
39
+ method: 'POST',
40
+ headers: {
41
+ 'Content-Type': 'application/json',
42
+ },
43
+ body: JSON.stringify({
44
+ model: config.model,
45
+ prompt: text,
46
+ }),
47
+ });
48
+ if (!response.ok) {
49
+ const errorText = await response.text();
50
+ throw new Error(`Ollama API error (${response.status}): ${errorText}`);
51
+ }
52
+ const data = (await response.json());
53
+ if (!data.embedding || !Array.isArray(data.embedding)) {
54
+ throw new Error('Invalid embedding response from Ollama');
55
+ }
56
+ // Verify dimension
57
+ if (data.embedding.length !== 768) {
58
+ throw new Error(`Expected 768-dimensional embedding, got ${data.embedding.length}. ` +
59
+ `Make sure you're using the nomic-embed-text model.`);
60
+ }
61
+ return new Float32Array(data.embedding);
62
+ }
63
+ catch (error) {
64
+ if (error instanceof Error) {
65
+ if (error.message.includes('fetch failed') || error.message.includes('ECONNREFUSED')) {
66
+ throw new Error(`Cannot connect to Ollama at ${config.baseUrl}\n\n` +
67
+ `Make sure Ollama is running:\n` +
68
+ ` ollama serve\n\n` +
69
+ `Or update the baseUrl in your config.`);
70
+ }
71
+ throw error;
72
+ }
73
+ throw new Error('Unknown error generating embedding');
74
+ }
75
+ }
76
+ /**
77
+ * Process a queue of texts with concurrency limit
78
+ */
79
+ async function processQueue(items, processor, concurrency, onProgress) {
80
+ const results = [];
81
+ let completed = 0;
82
+ let nextIndex = 0;
83
+ async function worker() {
84
+ while (nextIndex < items.length) {
85
+ const index = nextIndex++;
86
+ const item = items[index];
87
+ try {
88
+ const result = await processor(item);
89
+ results[index] = result;
90
+ completed++;
91
+ if (onProgress) {
92
+ onProgress(completed, items.length);
93
+ }
94
+ }
95
+ catch (error) {
96
+ // Re-throw to be caught by Promise.all
97
+ throw error;
98
+ }
99
+ }
100
+ }
101
+ // Create worker promises
102
+ const workers = Array.from({ length: Math.min(concurrency, items.length) }, () => worker());
103
+ // Wait for all workers to complete
104
+ await Promise.all(workers);
105
+ return results;
106
+ }
107
+ /**
108
+ * Generate embeddings for multiple texts with concurrency control
109
+ */
110
+ export async function generateEmbeddingsBatch(texts, config, concurrency = 5, onProgress) {
111
+ if (texts.length === 0) {
112
+ return [];
113
+ }
114
+ return processQueue(texts, (text) => generateEmbedding(text, config), concurrency, onProgress);
115
+ }
116
+ /**
117
+ * Estimate token count (rough approximation: 4 characters = 1 token)
118
+ */
119
+ export function estimateTokenCount(text) {
120
+ return Math.ceil(text.length / 4);
121
+ }
122
+ //# sourceMappingURL=embeddings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../../src/search/embeddings.ts"],"names":[],"mappings":"AAUA;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,MAAoB;IAC9D,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,MAAM,CAAC,OAAO,WAAW,EAAE;YACzD,MAAM,EAAE,KAAK;SACd,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC,EAAE,CAAC;IACrB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,MAAoB;IAC5D,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,MAAM,CAAC,OAAO,WAAW,EAAE;YACzD,MAAM,EAAE,KAAK;SACd,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAC;QAC5D,OAAO,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IACjG,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,MAAoB;IAEpB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,MAAM,CAAC,OAAO,iBAAiB,EAAE;YAC/D,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,MAAM,EAAE,IAAI;aACb,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CAAC,CAAC;QACzE,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA4B,CAAC;QAEhE,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;QAC5D,CAAC;QAED,mBAAmB;QACnB,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CACb,2CAA2C,IAAI,CAAC,SAAS,CAAC,MAAM,IAAI;gBACpE,oDAAoD,CACrD,CAAC;QACJ,CAAC;QAED,OAAO,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC1C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;gBACrF,MAAM,IAAI,KAAK,CACb,+BAA+B,MAAM,CAAC,OAAO,MAAM;oBACnD,gCAAgC;oBAChC,oBAAoB;oBACpB,uCAAuC,CACxC,CAAC;YACJ,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;IACxD,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY,CACzB,KAAU,EACV,SAAkC,EAClC,WAAmB,EACnB,UAAuD;IAEvD,MAAM,OAAO,GAAQ,EAAE,CAAC;IACxB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,UAAU,MAAM;QACnB,OAAO,SAAS,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,SAAS,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;YAE1B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;gBACrC,OAAO,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC;gBAExB,SAAS,EAAE,CAAC;gBACZ,IAAI,UAAU,EAAE,CAAC;oBACf,UAAU,CAAC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;gBACtC,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,uCAAuC;gBACvC,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,yBAAyB;IACzB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;IAE5F,mCAAmC;IACnC,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAE3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,KAAe,EACf,MAAoB,EACpB,cAAsB,CAAC,EACvB,UAAuD;IAEvD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,YAAY,CACjB,KAAK,EACL,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,EAAE,MAAM,CAAC,EACzC,WAAW,EACX,UAAU,CACX,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC"}
@@ -0,0 +1,21 @@
1
+ import type Database from 'better-sqlite3';
2
+ import type { Config } from '../config/schema.js';
3
+ export interface FormattedSearchResult {
4
+ title: string;
5
+ url: string;
6
+ excerpt: string;
7
+ sourceId: string;
8
+ relevance: number;
9
+ }
10
+ /**
11
+ * Search the knowledge base using vector similarity
12
+ */
13
+ export declare function search(db: Database.Database, config: Config, query: string, options?: {
14
+ sources?: string[];
15
+ limit?: number;
16
+ }): Promise<FormattedSearchResult[]>;
17
+ /**
18
+ * Group search results by article to avoid duplicates
19
+ */
20
+ export declare function deduplicateResults(results: FormattedSearchResult[]): FormattedSearchResult[];
21
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,QAAQ,MAAM,gBAAgB,CAAC;AAC3C,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAIlD,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,wBAAsB,MAAM,CAC1B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IACP,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;CACX,GACL,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAgBlC;AAwBD;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,qBAAqB,EAAE,GAAG,qBAAqB,EAAE,CAe5F"}
@@ -0,0 +1,50 @@
1
+ import { generateEmbedding } from './embeddings.js';
2
+ import { searchSimilarChunks } from '../db/chunks.js';
3
+ /**
4
+ * Search the knowledge base using vector similarity
5
+ */
6
+ export async function search(db, config, query, options = {}) {
7
+ const limit = Math.min(options.limit || 5, 20);
8
+ // Generate embedding for query
9
+ const queryEmbedding = await generateEmbedding(query, config.ollama);
10
+ // Search database
11
+ const results = searchSimilarChunks(db, queryEmbedding, limit, options.sources);
12
+ // Format results
13
+ return results.map(formatSearchResult);
14
+ }
15
+ /**
16
+ * Format a search result for display
17
+ */
18
+ function formatSearchResult(result) {
19
+ // Calculate relevance score (inverse of distance, normalized to 0-1)
20
+ // sqlite-vec uses cosine distance where 0 = identical, 2 = opposite
21
+ const relevance = Math.max(0, 1 - (result.distance / 2));
22
+ // Clean up excerpt (remove extra whitespace)
23
+ const excerpt = result.text
24
+ .replace(/\s+/g, ' ')
25
+ .trim();
26
+ return {
27
+ title: result.title,
28
+ url: result.url,
29
+ excerpt,
30
+ sourceId: result.sourceId,
31
+ relevance,
32
+ };
33
+ }
34
+ /**
35
+ * Group search results by article to avoid duplicates
36
+ */
37
+ export function deduplicateResults(results) {
38
+ const seen = new Map();
39
+ for (const result of results) {
40
+ const key = result.url;
41
+ // Keep the result with highest relevance
42
+ const existing = seen.get(key);
43
+ if (!existing || result.relevance > existing.relevance) {
44
+ seen.set(key, result);
45
+ }
46
+ }
47
+ return Array.from(seen.values())
48
+ .sort((a, b) => b.relevance - a.relevance);
49
+ }
50
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/search/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACpD,OAAO,EAAE,mBAAmB,EAAqB,MAAM,iBAAiB,CAAC;AAUzE;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,MAAM,CAC1B,EAAqB,EACrB,MAAc,EACd,KAAa,EACb,UAGI,EAAE;IAEN,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;IAE/C,+BAA+B;IAC/B,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IAErE,kBAAkB;IAClB,MAAM,OAAO,GAAG,mBAAmB,CACjC,EAAE,EACF,cAAc,EACd,KAAK,EACL,OAAO,CAAC,OAAO,CAChB,CAAC;IAEF,iBAAiB;IACjB,OAAO,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,MAAoB;IAC9C,qEAAqE;IACrE,oEAAoE;IACpE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC;IAEzD,6CAA6C;IAC7C,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI;SACxB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;IAEV,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,GAAG,EAAE,MAAM,CAAC,GAAG;QACf,OAAO;QACP,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,SAAS;KACV,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAgC;IACjE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAiC,CAAC;IAEtD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC;QAEvB,yCAAyC;QACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,CAAC,QAAQ,IAAI,MAAM,CAAC,SAAS,GAAG,QAAQ,CAAC,SAAS,EAAE,CAAC;YACvD,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;SAC7B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;AAC/C,CAAC"}
@@ -0,0 +1,15 @@
1
+ export interface TextChunk {
2
+ text: string;
3
+ index: number;
4
+ tokenCount: number;
5
+ }
6
+ /**
7
+ * Chunk text into smaller pieces with overlap
8
+ *
9
+ * @param text - The text to chunk
10
+ * @param title - Article title (will be prepended to each chunk)
11
+ * @param targetSize - Target chunk size in tokens (default: 500)
12
+ * @param overlap - Number of tokens to overlap between chunks (default: 50)
13
+ */
14
+ export declare function chunkText(text: string, title: string, targetSize?: number, overlap?: number): TextChunk[];
15
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../../src/sync/chunker.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAuBD;;;;;;;GAOG;AACH,wBAAgB,SAAS,CACvB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,UAAU,GAAE,MAAY,EACxB,OAAO,GAAE,MAAW,GACnB,SAAS,EAAE,CAyGb"}
@@ -0,0 +1,117 @@
1
+ import { estimateTokenCount } from '../search/embeddings.js';
2
+ /**
3
+ * Split text into sentences (simple approach)
4
+ */
5
+ function splitIntoSentences(text) {
6
+ // Split on sentence boundaries
7
+ const sentences = text
8
+ .split(/([.!?]+\s+|\n\n)/)
9
+ .reduce((acc, part, i, arr) => {
10
+ if (i % 2 === 0) {
11
+ // Combine sentence with its delimiter
12
+ const sentence = part + (arr[i + 1] || '');
13
+ if (sentence.trim()) {
14
+ acc.push(sentence);
15
+ }
16
+ }
17
+ return acc;
18
+ }, []);
19
+ return sentences;
20
+ }
21
+ /**
22
+ * Chunk text into smaller pieces with overlap
23
+ *
24
+ * @param text - The text to chunk
25
+ * @param title - Article title (will be prepended to each chunk)
26
+ * @param targetSize - Target chunk size in tokens (default: 500)
27
+ * @param overlap - Number of tokens to overlap between chunks (default: 50)
28
+ */
29
+ export function chunkText(text, title, targetSize = 500, overlap = 50) {
30
+ // Prepend title
31
+ const titlePrefix = `# ${title}\n\n`;
32
+ // Split into sentences
33
+ const sentences = splitIntoSentences(text);
34
+ if (sentences.length === 0) {
35
+ // Empty text, return single chunk with just title
36
+ return [{
37
+ text: titlePrefix.trim(),
38
+ index: 0,
39
+ tokenCount: estimateTokenCount(titlePrefix.trim()),
40
+ }];
41
+ }
42
+ const chunks = [];
43
+ let currentChunk = [];
44
+ let currentTokens = 0;
45
+ // Reserve tokens for title
46
+ const titleTokens = estimateTokenCount(titlePrefix);
47
+ const effectiveTargetSize = targetSize - titleTokens;
48
+ for (let i = 0; i < sentences.length; i++) {
49
+ const sentence = sentences[i];
50
+ const sentenceTokens = estimateTokenCount(sentence);
51
+ // If single sentence exceeds target, split it (edge case)
52
+ if (sentenceTokens > effectiveTargetSize && currentChunk.length === 0) {
53
+ // Split by words if sentence is too long
54
+ const words = sentence.split(/\s+/);
55
+ let wordChunk = [];
56
+ let wordTokens = 0;
57
+ for (const word of words) {
58
+ const wordToken = estimateTokenCount(word + ' ');
59
+ if (wordTokens + wordToken > effectiveTargetSize && wordChunk.length > 0) {
60
+ chunks.push({
61
+ text: titlePrefix + wordChunk.join(' '),
62
+ index: chunks.length,
63
+ tokenCount: titleTokens + wordTokens,
64
+ });
65
+ wordChunk = [];
66
+ wordTokens = 0;
67
+ }
68
+ wordChunk.push(word);
69
+ wordTokens += wordToken;
70
+ }
71
+ if (wordChunk.length > 0) {
72
+ chunks.push({
73
+ text: titlePrefix + wordChunk.join(' '),
74
+ index: chunks.length,
75
+ tokenCount: titleTokens + wordTokens,
76
+ });
77
+ }
78
+ continue;
79
+ }
80
+ // Check if adding this sentence would exceed target
81
+ if (currentTokens + sentenceTokens > effectiveTargetSize && currentChunk.length > 0) {
82
+ // Save current chunk
83
+ chunks.push({
84
+ text: titlePrefix + currentChunk.join(' '),
85
+ index: chunks.length,
86
+ tokenCount: titleTokens + currentTokens,
87
+ });
88
+ // Start new chunk with overlap
89
+ // Keep last few sentences for context
90
+ const overlapSentences = [];
91
+ let overlapTokens = 0;
92
+ for (let j = currentChunk.length - 1; j >= 0; j--) {
93
+ const overlapSentence = currentChunk[j];
94
+ const tokens = estimateTokenCount(overlapSentence);
95
+ if (overlapTokens + tokens > overlap) {
96
+ break;
97
+ }
98
+ overlapSentences.unshift(overlapSentence);
99
+ overlapTokens += tokens;
100
+ }
101
+ currentChunk = overlapSentences;
102
+ currentTokens = overlapTokens;
103
+ }
104
+ currentChunk.push(sentence);
105
+ currentTokens += sentenceTokens;
106
+ }
107
+ // Add final chunk if not empty
108
+ if (currentChunk.length > 0) {
109
+ chunks.push({
110
+ text: titlePrefix + currentChunk.join(' '),
111
+ index: chunks.length,
112
+ tokenCount: titleTokens + currentTokens,
113
+ });
114
+ }
115
+ return chunks;
116
+ }
117
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../../src/sync/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAQ7D;;GAEG;AACH,SAAS,kBAAkB,CAAC,IAAY;IACtC,+BAA+B;IAC/B,MAAM,SAAS,GAAG,IAAI;SACnB,KAAK,CAAC,kBAAkB,CAAC;SACzB,MAAM,CAAC,CAAC,GAAa,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE;QACtC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAChB,sCAAsC;YACtC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC3C,IAAI,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;gBACpB,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC,EAAE,EAAE,CAAC,CAAC;IAET,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,SAAS,CACvB,IAAY,EACZ,KAAa,EACb,aAAqB,GAAG,EACxB,UAAkB,EAAE;IAEpB,gBAAgB;IAChB,MAAM,WAAW,GAAG,KAAK,KAAK,MAAM,CAAC;IAErC,uBAAuB;IACvB,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAE3C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,kDAAkD;QAClD,OAAO,CAAC;gBACN,IAAI,EAAE,WAAW,CAAC,IAAI,EAAE;gBACxB,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,kBAAkB,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;aACnD,CAAC,CAAC;IACL,CAAC;IAED,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,2BAA2B;IAC3B,MAAM,WAAW,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IACpD,MAAM,mBAAmB,GAAG,UAAU,GAAG,WAAW,CAAC;IAErD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,cAAc,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAEpD,0DAA0D;QAC1D,IAAI,cAAc,GAAG,mBAAmB,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtE,yCAAyC;YACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,SAAS,GAAa,EAAE,CAAC;YAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;YAEnB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC;gBACjD,IAAI,UAAU,GAAG,SAAS,GAAG,mBAAmB,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACzE,MAAM,CAAC,IAAI,CAAC;wBACV,IAAI,EAAE,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;wBACvC,KAAK,EAAE,MAAM,CAAC,MAAM;wBACpB,UAAU,EAAE,WAAW,GAAG,UAAU;qBACrC,CAAC,CAAC;oBACH,SAAS,GAAG,EAAE,CAAC;oBACf,UAAU,GAAG,CAAC,CAAC;gBACjB,CAAC;gBACD,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrB,UAAU,IAAI,SAAS,CAAC;YAC1B,CAAC;YAED,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;oBACvC,KAAK,EAAE,MAAM,CAAC,MAAM;oBACpB,UAAU,EAAE,WAAW,GAAG,UAAU;iBACrC,CAAC,CAAC;YACL,CAAC;YAED,SAAS;QACX,CAAC;QAED,oDAAoD;QACpD,IAAI,aAAa,GAAG,cAAc,GAAG,mBAAmB,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpF,qBAAqB;YACrB,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC;gBAC1C,KAAK,EAAE,MAAM,CAAC,MAAM;gBACpB,UAAU,EAAE,WAAW,GAAG,aAAa;aACxC,CAAC,CAAC;YAEH,+BAA+B;YAC/B,sCAAsC;YACtC,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,IAAI,aAAa,GAAG,CAAC,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClD,MAAM,eAAe,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;gBACxC,MAAM,MAAM,GAAG,kBAAkB,CAAC,eAAe,CAAC,CAAC;gBAEnD,IAAI,aAAa,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC;oBACrC,MAAM;gBACR,CAAC;gBAED,gBAAgB,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;gBAC1C,aAAa,IAAI,MAAM,CAAC;YAC1B,CAAC;YAED,YAAY,GAAG,gBAAgB,CAAC;YAChC,aAAa,GAAG,aAAa,CAAC;QAChC,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC5B,aAAa,IAAI,cAAc,CAAC;IAClC,CAAC;IAED,+BAA+B;IAC/B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC;YAC1C,KAAK,EAAE,MAAM,CAAC,MAAM;YACpB,UAAU,EAAE,WAAW,GAAG,aAAa;SACxC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,24 @@
1
+ import type Database from 'better-sqlite3';
2
+ import type { Config, Source } from '../config/schema.js';
3
+ export interface SyncProgress {
4
+ phase: 'fetching' | 'parsing' | 'chunking' | 'embedding' | 'storing';
5
+ current: number;
6
+ total: number;
7
+ message: string;
8
+ }
9
+ export interface SyncResult {
10
+ source: string;
11
+ articlesProcessed: number;
12
+ articlesFetched: number;
13
+ chunksCreated: number;
14
+ timeElapsed: number;
15
+ }
16
+ /**
17
+ * Sync a single source
18
+ */
19
+ export declare function syncSource(db: Database.Database, source: Source, config: Config, onProgress?: (progress: SyncProgress) => void, fullResync?: boolean): Promise<SyncResult>;
20
+ /**
21
+ * Sync all enabled sources
22
+ */
23
+ export declare function syncAllSources(db: Database.Database, sources: Source[], config: Config, onProgress?: (source: string, progress: SyncProgress) => void, fullResync?: boolean): Promise<SyncResult[]>;
24
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/sync/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,QAAQ,MAAM,gBAAgB,CAAC;AAC3C,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAU1D,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,UAAU,GAAG,SAAS,GAAG,UAAU,GAAG,WAAW,GAAG,SAAS,CAAC;IACrE,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAC9B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,EACd,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,YAAY,KAAK,IAAI,EAC7C,UAAU,GAAE,OAAe,GAC1B,OAAO,CAAC,UAAU,CAAC,CA6MrB;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,EAAE,MAAM,EAAE,EACjB,MAAM,EAAE,MAAM,EACd,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,KAAK,IAAI,EAC7D,UAAU,GAAE,OAAe,GAC1B,OAAO,CAAC,UAAU,EAAE,CAAC,CAoBvB"}