bluera-knowledge 0.9.26 → 0.9.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.claude/commands/commit.md +4 -7
  2. package/.claude/hooks/post-edit-check.sh +21 -24
  3. package/.claude/skills/atomic-commits/SKILL.md +6 -0
  4. package/.claude-plugin/plugin.json +1 -1
  5. package/.env.example +4 -0
  6. package/.husky/pre-push +12 -2
  7. package/.versionrc.json +0 -4
  8. package/BUGS-FOUND.md +71 -0
  9. package/CHANGELOG.md +76 -0
  10. package/README.md +55 -20
  11. package/bun.lock +35 -1
  12. package/commands/crawl.md +2 -0
  13. package/dist/{chunk-BICFAWMN.js → chunk-2SJHNRXD.js} +73 -8
  14. package/dist/chunk-2SJHNRXD.js.map +1 -0
  15. package/dist/{chunk-J7J6LXOJ.js → chunk-OGEY66FZ.js} +106 -41
  16. package/dist/chunk-OGEY66FZ.js.map +1 -0
  17. package/dist/{chunk-5QMHZUC4.js → chunk-RWSXP3PQ.js} +482 -106
  18. package/dist/chunk-RWSXP3PQ.js.map +1 -0
  19. package/dist/index.js +73 -28
  20. package/dist/index.js.map +1 -1
  21. package/dist/mcp/server.js +2 -2
  22. package/dist/workers/background-worker-cli.js +2 -2
  23. package/eslint.config.js +1 -1
  24. package/package.json +3 -1
  25. package/src/analysis/ast-parser.test.ts +46 -0
  26. package/src/cli/commands/crawl.test.ts +99 -12
  27. package/src/cli/commands/crawl.ts +76 -24
  28. package/src/cli/commands/store.test.ts +68 -1
  29. package/src/cli/commands/store.ts +9 -3
  30. package/src/crawl/article-converter.ts +36 -1
  31. package/src/crawl/bridge.ts +18 -7
  32. package/src/crawl/intelligent-crawler.ts +45 -4
  33. package/src/db/embeddings.test.ts +16 -0
  34. package/src/db/lance.test.ts +31 -0
  35. package/src/db/lance.ts +8 -0
  36. package/src/logging/index.ts +29 -0
  37. package/src/logging/logger.test.ts +75 -0
  38. package/src/logging/logger.ts +147 -0
  39. package/src/logging/payload.test.ts +152 -0
  40. package/src/logging/payload.ts +121 -0
  41. package/src/mcp/handlers/search.handler.test.ts +28 -9
  42. package/src/mcp/handlers/search.handler.ts +69 -29
  43. package/src/mcp/handlers/store.handler.test.ts +1 -0
  44. package/src/mcp/server.ts +44 -16
  45. package/src/services/chunking.service.ts +23 -0
  46. package/src/services/index.service.test.ts +921 -1
  47. package/src/services/index.service.ts +76 -1
  48. package/src/services/index.ts +20 -2
  49. package/src/services/search.service.test.ts +573 -21
  50. package/src/services/search.service.ts +257 -105
  51. package/src/services/services.test.ts +2 -2
  52. package/src/services/snippet.service.ts +28 -3
  53. package/src/services/store.service.test.ts +28 -0
  54. package/src/services/store.service.ts +4 -0
  55. package/src/services/token.service.test.ts +45 -0
  56. package/src/services/token.service.ts +33 -0
  57. package/src/types/result.test.ts +10 -0
  58. package/tests/integration/cli-consistency.test.ts +1 -4
  59. package/vitest.config.ts +4 -0
  60. package/dist/chunk-5QMHZUC4.js.map +0 -1
  61. package/dist/chunk-BICFAWMN.js.map +0 -1
  62. package/dist/chunk-J7J6LXOJ.js.map +0 -1
  63. package/scripts/readme-version-updater.cjs +0 -18
package/eslint.config.js CHANGED
@@ -42,7 +42,7 @@ export default tseslint.config(
42
42
  },
43
43
  },
44
44
  {
45
- ignores: ['dist/**', 'node_modules/**', '*.config.js', '**/*.test.ts', 'tests/**/*.ts'],
45
+ ignores: ['dist/**', 'node_modules/**', '*.config.js', '*.config.ts', '**/*.test.ts', 'tests/**/*.ts'],
46
46
  },
47
47
  // Test files: Apply custom skip-comment rule only
48
48
  {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bluera-knowledge",
3
- "version": "0.9.26",
3
+ "version": "0.9.31",
4
4
  "description": "CLI tool for managing knowledge stores with semantic search",
5
5
  "type": "module",
6
6
  "bin": {
@@ -92,6 +92,8 @@
92
92
  "hono": "^4.11.1",
93
93
  "node-addon-api": "^8.5.0",
94
94
  "ora": "^9.0.0",
95
+ "pino": "^9.6.0",
96
+ "pino-roll": "^1.3.0",
95
97
  "slurp-ai": "^1.0.6",
96
98
  "tree-sitter": "^0.25.0",
97
99
  "tree-sitter-go": "^0.23.1",
@@ -356,6 +356,52 @@ import type { Props } from "./types";
356
356
  });
357
357
 
358
358
  describe('Edge cases and special scenarios', () => {
359
+ it('handles default exported class', () => {
360
+ const code = 'export default class DefaultClass { method() {} }';
361
+ const nodes = parser.parse(code, 'typescript');
362
+
363
+ expect(nodes).toHaveLength(1);
364
+ expect(nodes[0]?.exported).toBe(true);
365
+ expect(nodes[0]?.name).toBe('DefaultClass');
366
+ });
367
+
368
+ it('handles anonymous default exported class (no id)', () => {
369
+ const code = 'export default class { method() {} }';
370
+ const nodes = parser.parse(code, 'javascript');
371
+
372
+ // Anonymous classes don't have an id, should be skipped
373
+ expect(nodes.filter(n => n.type === 'class')).toHaveLength(0);
374
+ });
375
+
376
+ it('handles class with computed property method (non-identifier key)', () => {
377
+ const code = `class MyClass {
378
+ ['computed']() { return 42; }
379
+ normalMethod() { return 1; }
380
+ }`;
381
+ const nodes = parser.parse(code, 'javascript');
382
+
383
+ // Computed properties have StringLiteral keys, not Identifier
384
+ // Only normalMethod should be captured
385
+ expect(nodes[0]?.methods).toHaveLength(1);
386
+ expect(nodes[0]?.methods?.[0]?.name).toBe('normalMethod');
387
+ });
388
+
389
+ it('handles class with rest parameters in method', () => {
390
+ const code = `class MyClass {
391
+ method(...args) { return args; }
392
+ }`;
393
+ const nodes = parser.parse(code, 'javascript');
394
+
395
+ expect(nodes[0]?.methods?.[0]?.signature).toBe('method(param)');
396
+ });
397
+
398
+ it('handles function with rest parameters', () => {
399
+ const code = 'function spread(...items) { return items; }';
400
+ const nodes = parser.parse(code, 'javascript');
401
+
402
+ expect(nodes[0]?.signature).toBe('spread(param)');
403
+ });
404
+
359
405
  it('handles functions with complex destructured parameters', () => {
360
406
  const code = 'function complex({ a, b }, [c, d]) { return a + b + c + d; }';
361
407
  const nodes = parser.parse(code, 'javascript');
@@ -38,6 +38,7 @@ describe('crawl command execution', () => {
38
38
  store: {
39
39
  getByIdOrName: vi.fn(),
40
40
  list: vi.fn(),
41
+ create: vi.fn(),
41
42
  },
42
43
  lance: {
43
44
  initialize: vi.fn(),
@@ -291,23 +292,110 @@ describe('crawl command execution', () => {
291
292
  });
292
293
  });
293
294
 
294
- describe('error handling', () => {
295
- it('exits with code 3 when store not found', async () => {
295
+ describe('store auto-creation', () => {
296
+ it('auto-creates web store when store does not exist', async () => {
297
+ const createdStore: WebStore = {
298
+ id: createStoreId('new-store-id'),
299
+ name: 'new-store',
300
+ type: 'web',
301
+ url: 'https://example.com',
302
+ depth: 2,
303
+ createdAt: new Date(),
304
+ updatedAt: new Date(),
305
+ };
306
+
296
307
  mockServices.store.getByIdOrName.mockResolvedValue(undefined);
308
+ mockServices.store.create.mockResolvedValue({ success: true, data: createdStore });
309
+ mockServices.lance.initialize.mockResolvedValue(undefined);
310
+ mockServices.embeddings.embed.mockResolvedValue([0.1, 0.2, 0.3]);
311
+ mockServices.lance.addDocuments.mockResolvedValue(undefined);
312
+
313
+ mockCrawler.crawl.mockReturnValue(
314
+ (async function* () {
315
+ yield {
316
+ url: 'https://example.com/page1',
317
+ title: 'Page 1',
318
+ markdown: '# Content',
319
+ depth: 0,
320
+ };
321
+ })()
322
+ );
297
323
 
298
324
  const command = createCrawlCommand(getOptions);
299
325
  const actionHandler = command._actionHandler;
300
326
 
301
- await expect(actionHandler(['https://example.com', 'nonexistent-store'])).rejects.toThrow('process.exit: 3');
327
+ await actionHandler(['https://example.com', 'new-store']);
302
328
 
303
- expect(consoleErrorSpy).toHaveBeenCalledWith(
304
- 'Error: Web store not found: nonexistent-store'
329
+ expect(mockServices.store.create).toHaveBeenCalledWith({
330
+ name: 'new-store',
331
+ type: 'web',
332
+ url: 'https://example.com',
333
+ });
334
+ expect(consoleLogSpy).toHaveBeenCalledWith('Created web store: new-store');
335
+ expect(mockCrawler.crawl).toHaveBeenCalled();
336
+ });
337
+
338
+ it('throws error when store creation fails', async () => {
339
+ mockServices.store.getByIdOrName.mockResolvedValue(undefined);
340
+ mockServices.store.create.mockResolvedValue({ success: false, error: new Error('Name already exists') });
341
+
342
+ const command = createCrawlCommand(getOptions);
343
+ const actionHandler = command._actionHandler;
344
+
345
+ await expect(actionHandler(['https://example.com', 'bad-store'])).rejects.toThrow(
346
+ 'Failed to create store: Name already exists'
305
347
  );
306
- expect(processExitSpy).toHaveBeenCalledWith(3);
307
348
  expect(mockCrawler.crawl).not.toHaveBeenCalled();
308
349
  });
309
350
 
310
- it('exits with code 3 when store is not a web store', async () => {
351
+ it('includes storeCreated in JSON output when store was created', async () => {
352
+ const createdStore: WebStore = {
353
+ id: createStoreId('new-store-id'),
354
+ name: 'new-store',
355
+ type: 'web',
356
+ url: 'https://example.com',
357
+ depth: 2,
358
+ createdAt: new Date(),
359
+ updatedAt: new Date(),
360
+ };
361
+
362
+ mockServices.store.getByIdOrName.mockResolvedValue(undefined);
363
+ mockServices.store.create.mockResolvedValue({ success: true, data: createdStore });
364
+ mockServices.lance.initialize.mockResolvedValue(undefined);
365
+ mockServices.embeddings.embed.mockResolvedValue([0.1, 0.2, 0.3]);
366
+ mockServices.lance.addDocuments.mockResolvedValue(undefined);
367
+
368
+ mockCrawler.crawl.mockReturnValue(
369
+ (async function* () {
370
+ yield {
371
+ url: 'https://example.com/page1',
372
+ title: 'Page 1',
373
+ markdown: '# Content',
374
+ depth: 0,
375
+ };
376
+ })()
377
+ );
378
+
379
+ getOptions = () => ({
380
+ config: undefined,
381
+ dataDir: '/tmp/test',
382
+ quiet: false,
383
+ format: 'json',
384
+ });
385
+
386
+ const command = createCrawlCommand(getOptions);
387
+ const actionHandler = command._actionHandler;
388
+
389
+ await actionHandler(['https://example.com', 'new-store']);
390
+
391
+ expect(consoleLogSpy).toHaveBeenCalledWith(
392
+ expect.stringContaining('"storeCreated": true')
393
+ );
394
+ });
395
+ });
396
+
397
+ describe('error handling', () => {
398
+ it('throws error when store is not a web store', async () => {
311
399
  const mockFileStore = {
312
400
  id: createStoreId('store-1'),
313
401
  name: 'file-store',
@@ -322,10 +410,9 @@ describe('crawl command execution', () => {
322
410
  const command = createCrawlCommand(getOptions);
323
411
  const actionHandler = command._actionHandler;
324
412
 
325
- await expect(actionHandler(['https://example.com', 'file-store'])).rejects.toThrow('process.exit: 3');
326
-
327
- expect(consoleErrorSpy).toHaveBeenCalledWith('Error: Web store not found: file-store');
328
- expect(processExitSpy).toHaveBeenCalledWith(3);
413
+ await expect(actionHandler(['https://example.com', 'file-store'])).rejects.toThrow(
414
+ 'Store "file-store" exists but is not a web store (type: file)'
415
+ );
329
416
  });
330
417
 
331
418
  it('exits with code 6 when crawling fails', async () => {
@@ -933,7 +1020,7 @@ describe('crawl command execution', () => {
933
1020
  await actionHandler(['https://example.com', 'test-store']);
934
1021
 
935
1022
  expect(consoleLogSpy).toHaveBeenCalledWith('Crawling https://example.com');
936
- expect(consoleLogSpy).toHaveBeenCalledWith('Crawled and indexed 1 pages');
1023
+ expect(consoleLogSpy).toHaveBeenCalledWith('Crawled 1 pages, indexed 1 chunks');
937
1024
  } finally {
938
1025
  Object.defineProperty(process.stdout, 'isTTY', { value: originalIsTTY, configurable: true });
939
1026
  }
@@ -6,6 +6,9 @@ import { IntelligentCrawler, type CrawlProgress } from '../../crawl/intelligent-
6
6
  import { createDocumentId } from '../../types/brands.js';
7
7
  import type { GlobalOptions } from '../program.js';
8
8
  import type { Document } from '../../types/document.js';
9
+ import type { WebStore } from '../../types/store.js';
10
+ import { ChunkingService } from '../../services/chunking.service.js';
11
+ import { classifyWebContentType } from '../../services/index.service.js';
9
12
 
10
13
  export function createCrawlCommand(getOptions: () => GlobalOptions): Command {
11
14
  return new Command('crawl')
@@ -27,10 +30,38 @@ export function createCrawlCommand(getOptions: () => GlobalOptions): Command {
27
30
  const globalOpts = getOptions();
28
31
  const services = await createServices(globalOpts.config, globalOpts.dataDir);
29
32
 
30
- const store = await services.store.getByIdOrName(storeIdOrName);
31
- if (!store || store.type !== 'web') {
32
- console.error(`Error: Web store not found: ${storeIdOrName}`);
33
- process.exit(3);
33
+ // Look up or auto-create web store
34
+ let store: WebStore;
35
+ let storeCreated = false;
36
+ const existingStore = await services.store.getByIdOrName(storeIdOrName);
37
+
38
+ if (!existingStore) {
39
+ // Auto-create web store
40
+ const result = await services.store.create({
41
+ name: storeIdOrName,
42
+ type: 'web',
43
+ url,
44
+ });
45
+ if (!result.success) {
46
+ await destroyServices(services);
47
+ throw new Error(`Failed to create store: ${result.error.message}`);
48
+ }
49
+ // Type narrowing: success check above ensures result.data is Store
50
+ // We know it's a WebStore because we created it with type: 'web'
51
+ const createdStore = result.data;
52
+ if (createdStore.type !== 'web') {
53
+ throw new Error('Unexpected store type after creation');
54
+ }
55
+ store = createdStore;
56
+ storeCreated = true;
57
+ if (globalOpts.quiet !== true && globalOpts.format !== 'json') {
58
+ console.log(`Created web store: ${store.name}`);
59
+ }
60
+ } else if (existingStore.type !== 'web') {
61
+ await destroyServices(services);
62
+ throw new Error(`Store "${storeIdOrName}" exists but is not a web store (type: ${existingStore.type})`);
63
+ } else {
64
+ store = existingStore;
34
65
  }
35
66
 
36
67
  const maxPages = cmdOptions.maxPages !== undefined ? parseInt(cmdOptions.maxPages) : 50;
@@ -47,7 +78,10 @@ export function createCrawlCommand(getOptions: () => GlobalOptions): Command {
47
78
  }
48
79
 
49
80
  const crawler = new IntelligentCrawler();
81
+ // Use web preset for larger prose-friendly chunks
82
+ const webChunker = ChunkingService.forContentType('web');
50
83
  let pagesIndexed = 0;
84
+ let chunksCreated = 0;
51
85
 
52
86
  // Listen for progress events
53
87
  crawler.on('progress', (progress: CrawlProgress) => {
@@ -78,24 +112,40 @@ export function createCrawlCommand(getOptions: () => GlobalOptions): Command {
78
112
  ...(cmdOptions.simple !== undefined && { simple: cmdOptions.simple }),
79
113
  useHeadless: cmdOptions.headless ?? false,
80
114
  })) {
81
- // Embed and index the content (use extracted if available, otherwise markdown)
82
- const contentToEmbed = result.extracted !== undefined ? result.extracted : result.markdown;
83
- const vector = await services.embeddings.embed(contentToEmbed);
84
-
85
- docs.push({
86
- id: createDocumentId(`${store.id}-${createHash('md5').update(result.url).digest('hex')}`),
87
- content: contentToEmbed,
88
- vector,
89
- metadata: {
90
- type: 'web',
91
- storeId: store.id,
92
- url: result.url,
93
- title: result.title,
94
- extracted: result.extracted !== undefined,
95
- depth: result.depth,
96
- indexedAt: new Date(),
97
- },
98
- });
115
+ // Use extracted content if available, otherwise markdown
116
+ const contentToProcess = result.extracted !== undefined ? result.extracted : result.markdown;
117
+
118
+ // Chunk the content using markdown-aware chunking (web content is converted to markdown)
119
+ const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
120
+ const fileType = classifyWebContentType(result.url, result.title);
121
+ const urlHash = createHash('md5').update(result.url).digest('hex');
122
+
123
+ for (const chunk of chunks) {
124
+ const chunkId = chunks.length > 1
125
+ ? `${store.id}-${urlHash}-${String(chunk.chunkIndex)}`
126
+ : `${store.id}-${urlHash}`;
127
+ const vector = await services.embeddings.embed(chunk.content);
128
+
129
+ docs.push({
130
+ id: createDocumentId(chunkId),
131
+ content: chunk.content,
132
+ vector,
133
+ metadata: {
134
+ type: chunks.length > 1 ? 'chunk' : 'web',
135
+ storeId: store.id,
136
+ url: result.url,
137
+ title: result.title,
138
+ extracted: result.extracted !== undefined,
139
+ depth: result.depth,
140
+ indexedAt: new Date(),
141
+ fileType,
142
+ chunkIndex: chunk.chunkIndex,
143
+ totalChunks: chunk.totalChunks,
144
+ sectionHeader: chunk.sectionHeader,
145
+ },
146
+ });
147
+ chunksCreated++;
148
+ }
99
149
 
100
150
  pagesIndexed++;
101
151
  }
@@ -111,8 +161,10 @@ export function createCrawlCommand(getOptions: () => GlobalOptions): Command {
111
161
  const crawlResult = {
112
162
  success: true,
113
163
  store: store.name,
164
+ storeCreated,
114
165
  url,
115
166
  pagesCrawled: pagesIndexed,
167
+ chunksCreated,
116
168
  mode: cmdOptions.simple === true ? 'simple' : 'intelligent',
117
169
  hadCrawlInstruction: cmdOptions.crawl !== undefined,
118
170
  hadExtractInstruction: cmdOptions.extract !== undefined,
@@ -121,9 +173,9 @@ export function createCrawlCommand(getOptions: () => GlobalOptions): Command {
121
173
  if (globalOpts.format === 'json') {
122
174
  console.log(JSON.stringify(crawlResult, null, 2));
123
175
  } else if (spinner !== undefined) {
124
- spinner.succeed(`Crawled and indexed ${String(pagesIndexed)} pages`);
176
+ spinner.succeed(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
125
177
  } else if (globalOpts.quiet !== true) {
126
- console.log(`Crawled and indexed ${String(pagesIndexed)} pages`);
178
+ console.log(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
127
179
  }
128
180
  } catch (error) {
129
181
  const message = `Crawl failed: ${error instanceof Error ? error.message : String(error)}`;
@@ -935,7 +935,7 @@ describe('store command execution', () => {
935
935
  });
936
936
  });
937
937
 
938
- it('routes source to path for repo stores', async () => {
938
+ it('routes source to path for repo stores with local path', async () => {
939
939
  const mockStore: RepoStore = {
940
940
  id: createStoreId('store-2'),
941
941
  name: 'repo-store',
@@ -967,6 +967,73 @@ describe('store command execution', () => {
967
967
  });
968
968
  });
969
969
 
970
+ it('routes URL source to url for repo stores (Bug #1 fix)', async () => {
971
+ const mockStore: RepoStore = {
972
+ id: createStoreId('store-2'),
973
+ name: 'repo-url-store',
974
+ type: 'repo',
975
+ path: '/cloned/repo/path',
976
+ url: 'https://github.com/user/repo',
977
+ createdAt: new Date(),
978
+ updatedAt: new Date(),
979
+ };
980
+
981
+ mockServices.store.create.mockResolvedValue({
982
+ success: true,
983
+ data: mockStore,
984
+ });
985
+
986
+ const command = createStoreCommand(getOptions);
987
+ const createCommand = command.commands.find(c => c.name() === 'create');
988
+ const actionHandler = createCommand?._actionHandler;
989
+
990
+ createCommand.parseOptions(['--type', 'repo', '--source', 'https://github.com/user/repo']);
991
+ await actionHandler!(['repo-url-store']);
992
+
993
+ // URL should be routed to 'url' parameter, not 'path'
994
+ expect(mockServices.store.create).toHaveBeenCalledWith({
995
+ name: 'repo-url-store',
996
+ type: 'repo',
997
+ path: undefined,
998
+ url: 'https://github.com/user/repo',
999
+ description: undefined,
1000
+ tags: undefined,
1001
+ });
1002
+ });
1003
+
1004
+ it('routes http:// URL source to url for repo stores', async () => {
1005
+ const mockStore: RepoStore = {
1006
+ id: createStoreId('store-2'),
1007
+ name: 'repo-http-store',
1008
+ type: 'repo',
1009
+ path: '/cloned/repo/path',
1010
+ url: 'http://internal-git.example.com/repo',
1011
+ createdAt: new Date(),
1012
+ updatedAt: new Date(),
1013
+ };
1014
+
1015
+ mockServices.store.create.mockResolvedValue({
1016
+ success: true,
1017
+ data: mockStore,
1018
+ });
1019
+
1020
+ const command = createStoreCommand(getOptions);
1021
+ const createCommand = command.commands.find(c => c.name() === 'create');
1022
+ const actionHandler = createCommand?._actionHandler;
1023
+
1024
+ createCommand.parseOptions(['--type', 'repo', '--source', 'http://internal-git.example.com/repo']);
1025
+ await actionHandler!(['repo-http-store']);
1026
+
1027
+ expect(mockServices.store.create).toHaveBeenCalledWith({
1028
+ name: 'repo-http-store',
1029
+ type: 'repo',
1030
+ path: undefined,
1031
+ url: 'http://internal-git.example.com/repo',
1032
+ description: undefined,
1033
+ tags: undefined,
1034
+ });
1035
+ });
1036
+
970
1037
  it('routes source to url for web stores', async () => {
971
1038
  const mockStore: WebStore = {
972
1039
  id: createStoreId('store-3'),
@@ -54,12 +54,15 @@ export function createStoreCommand(getOptions: () => GlobalOptions): Command {
54
54
  }) => {
55
55
  const globalOpts = getOptions();
56
56
  const services = await createServices(globalOpts.config, globalOpts.dataDir);
57
+ let exitCode = 0;
57
58
  try {
59
+ // Detect if source is a URL (for repo stores that should clone from remote)
60
+ const isUrl = options.source.startsWith('http://') || options.source.startsWith('https://');
58
61
  const result = await services.store.create({
59
62
  name,
60
63
  type: options.type,
61
- path: options.type !== 'web' ? options.source : undefined,
62
- url: options.type === 'web' ? options.source : undefined,
64
+ path: options.type === 'file' || (options.type === 'repo' && !isUrl) ? options.source : undefined,
65
+ url: options.type === 'web' || (options.type === 'repo' && isUrl) ? options.source : undefined,
63
66
  description: options.description,
64
67
  tags: options.tags?.split(',').map((t) => t.trim()),
65
68
  });
@@ -72,11 +75,14 @@ export function createStoreCommand(getOptions: () => GlobalOptions): Command {
72
75
  }
73
76
  } else {
74
77
  console.error(`Error: ${result.error.message}`);
75
- process.exit(1);
78
+ exitCode = 1;
76
79
  }
77
80
  } finally {
78
81
  await destroyServices(services);
79
82
  }
83
+ if (exitCode !== 0) {
84
+ process.exit(exitCode);
85
+ }
80
86
  });
81
87
 
82
88
  store
@@ -7,6 +7,9 @@ import { extractFromHtml } from '@extractus/article-extractor';
7
7
  import TurndownService from 'turndown';
8
8
  import { gfm } from 'turndown-plugin-gfm';
9
9
  import { preprocessHtmlForCodeBlocks, cleanupMarkdown } from './markdown-utils.js';
10
+ import { createLogger, truncateForLog } from '../logging/index.js';
11
+
12
+ const logger = createLogger('article-converter');
10
13
 
11
14
  export interface ConversionResult {
12
15
  markdown: string;
@@ -28,6 +31,8 @@ export async function convertHtmlToMarkdown(
28
31
  html: string,
29
32
  url: string,
30
33
  ): Promise<ConversionResult> {
34
+ logger.debug({ url, htmlLength: html.length }, 'Starting HTML conversion');
35
+
31
36
  try {
32
37
  // Step 1: Extract main article content
33
38
  let articleHtml: string;
@@ -38,13 +43,25 @@ export async function convertHtmlToMarkdown(
38
43
  if (article !== null && article.content !== undefined && article.content !== '') {
39
44
  articleHtml = article.content;
40
45
  title = article.title !== undefined && article.title !== '' ? article.title : undefined;
46
+ logger.debug({
47
+ url,
48
+ title,
49
+ extractedLength: articleHtml.length,
50
+ usedFullHtml: false,
51
+ }, 'Article content extracted');
41
52
  } else {
42
53
  // Fallback to full HTML if extraction fails
43
54
  articleHtml = html;
55
+ logger.debug({ url, usedFullHtml: true }, 'Article extraction returned empty, using full HTML');
44
56
  }
45
- } catch {
57
+ } catch (extractError) {
46
58
  // Fallback to full HTML if extraction fails
47
59
  articleHtml = html;
60
+ logger.debug({
61
+ url,
62
+ usedFullHtml: true,
63
+ error: extractError instanceof Error ? extractError.message : String(extractError),
64
+ }, 'Article extraction failed, using full HTML');
48
65
  }
49
66
 
50
67
  // Step 2: Preprocess HTML for code blocks
@@ -83,12 +100,30 @@ export async function convertHtmlToMarkdown(
83
100
  // Step 4: Cleanup markdown with comprehensive regex patterns
84
101
  const markdown = cleanupMarkdown(rawMarkdown);
85
102
 
103
+ logger.debug({
104
+ url,
105
+ title,
106
+ rawMarkdownLength: rawMarkdown.length,
107
+ finalMarkdownLength: markdown.length,
108
+ }, 'HTML to markdown conversion complete');
109
+
110
+ // Log markdown preview at trace level
111
+ logger.trace({
112
+ url,
113
+ markdownPreview: truncateForLog(markdown, 1000),
114
+ }, 'Markdown content preview');
115
+
86
116
  return {
87
117
  markdown,
88
118
  ...(title !== undefined && { title }),
89
119
  success: true,
90
120
  };
91
121
  } catch (error) {
122
+ logger.error({
123
+ url,
124
+ error: error instanceof Error ? error.message : String(error),
125
+ }, 'HTML to markdown conversion failed');
126
+
92
127
  return {
93
128
  markdown: '',
94
129
  success: false,
@@ -11,6 +11,9 @@ import {
11
11
  validateHeadlessResult,
12
12
  validateParsePythonResult,
13
13
  } from './schemas.js';
14
+ import { createLogger } from '../logging/index.js';
15
+
16
+ const logger = createLogger('python-bridge');
14
17
 
15
18
  // Re-export for backwards compatibility
16
19
  export type { CrawledLink, ParsePythonResult };
@@ -32,24 +35,26 @@ export class PythonBridge {
32
35
  start(): Promise<void> {
33
36
  if (this.process) return Promise.resolve();
34
37
 
38
+ logger.debug('Starting Python bridge process');
39
+
35
40
  this.process = spawn('python3', ['python/crawl_worker.py'], {
36
41
  stdio: ['pipe', 'pipe', 'pipe'],
37
42
  });
38
43
 
39
44
  // Add error handler for process spawn errors
40
45
  this.process.on('error', (err) => {
41
- console.error('Python bridge process error:', err);
46
+ logger.error({ error: err.message, stack: err.stack }, 'Python bridge process error');
42
47
  this.rejectAllPending(new Error(`Process error: ${err.message}`));
43
48
  });
44
49
 
45
50
  // Add exit handler to detect non-zero exits
46
51
  this.process.on('exit', (code, signal) => {
47
52
  if (code !== 0 && code !== null) {
48
- console.error(`Python bridge process exited with code ${String(code)}`);
53
+ logger.error({ code }, 'Python bridge process exited with non-zero code');
49
54
  this.rejectAllPending(new Error(`Process exited with code ${String(code)}`));
50
55
  } else if (signal && !this.stoppingIntentionally) {
51
56
  // Only log if we didn't intentionally stop the process
52
- console.error(`Python bridge process killed with signal ${signal}`);
57
+ logger.error({ signal }, 'Python bridge process killed with signal');
53
58
  this.rejectAllPending(new Error(`Process killed with signal ${signal}`));
54
59
  }
55
60
  this.process = null;
@@ -60,7 +65,7 @@ export class PythonBridge {
60
65
  if (this.process.stderr) {
61
66
  const stderrRl = createInterface({ input: this.process.stderr });
62
67
  stderrRl.on('line', (line) => {
63
- console.error('Python bridge stderr:', line);
68
+ logger.warn({ stderr: line }, 'Python bridge stderr output');
64
69
  });
65
70
  }
66
71
 
@@ -106,11 +111,14 @@ export class PythonBridge {
106
111
  } catch (error: unknown) {
107
112
  // Log validation failure with original response for debugging
108
113
  if (error instanceof ZodError) {
109
- console.error('Python bridge response validation failed:', error.issues);
110
- console.error('Original response:', JSON.stringify(response.result));
114
+ logger.error({
115
+ issues: error.issues,
116
+ response: JSON.stringify(response.result),
117
+ }, 'Python bridge response validation failed');
111
118
  pending.reject(new Error(`Invalid response format from Python bridge: ${error.message}`));
112
119
  } else {
113
120
  const errorMessage = error instanceof Error ? error.message : String(error);
121
+ logger.error({ error: errorMessage }, 'Response validation error');
114
122
  pending.reject(new Error(`Response validation error: ${errorMessage}`));
115
123
  }
116
124
  }
@@ -118,7 +126,10 @@ export class PythonBridge {
118
126
  // If neither result nor error, leave pending (will timeout)
119
127
  }
120
128
  } catch (err) {
121
- console.error('Failed to parse JSON response from Python bridge:', err, 'Line:', line);
129
+ logger.error({
130
+ error: err instanceof Error ? err.message : String(err),
131
+ line,
132
+ }, 'Failed to parse JSON response from Python bridge');
122
133
  }
123
134
  });
124
135