bluera-knowledge 0.9.26 → 0.9.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.claude/commands/commit.md +4 -7
  2. package/.claude/hooks/post-edit-check.sh +21 -24
  3. package/.claude/skills/atomic-commits/SKILL.md +6 -0
  4. package/.claude-plugin/plugin.json +1 -1
  5. package/.env.example +4 -0
  6. package/.husky/pre-push +12 -2
  7. package/.versionrc.json +0 -4
  8. package/BUGS-FOUND.md +71 -0
  9. package/CHANGELOG.md +76 -0
  10. package/README.md +55 -20
  11. package/bun.lock +35 -1
  12. package/commands/crawl.md +2 -0
  13. package/dist/{chunk-BICFAWMN.js → chunk-2SJHNRXD.js} +73 -8
  14. package/dist/chunk-2SJHNRXD.js.map +1 -0
  15. package/dist/{chunk-J7J6LXOJ.js → chunk-OGEY66FZ.js} +106 -41
  16. package/dist/chunk-OGEY66FZ.js.map +1 -0
  17. package/dist/{chunk-5QMHZUC4.js → chunk-RWSXP3PQ.js} +482 -106
  18. package/dist/chunk-RWSXP3PQ.js.map +1 -0
  19. package/dist/index.js +73 -28
  20. package/dist/index.js.map +1 -1
  21. package/dist/mcp/server.js +2 -2
  22. package/dist/workers/background-worker-cli.js +2 -2
  23. package/eslint.config.js +1 -1
  24. package/package.json +3 -1
  25. package/src/analysis/ast-parser.test.ts +46 -0
  26. package/src/cli/commands/crawl.test.ts +99 -12
  27. package/src/cli/commands/crawl.ts +76 -24
  28. package/src/cli/commands/store.test.ts +68 -1
  29. package/src/cli/commands/store.ts +9 -3
  30. package/src/crawl/article-converter.ts +36 -1
  31. package/src/crawl/bridge.ts +18 -7
  32. package/src/crawl/intelligent-crawler.ts +45 -4
  33. package/src/db/embeddings.test.ts +16 -0
  34. package/src/db/lance.test.ts +31 -0
  35. package/src/db/lance.ts +8 -0
  36. package/src/logging/index.ts +29 -0
  37. package/src/logging/logger.test.ts +75 -0
  38. package/src/logging/logger.ts +147 -0
  39. package/src/logging/payload.test.ts +152 -0
  40. package/src/logging/payload.ts +121 -0
  41. package/src/mcp/handlers/search.handler.test.ts +28 -9
  42. package/src/mcp/handlers/search.handler.ts +69 -29
  43. package/src/mcp/handlers/store.handler.test.ts +1 -0
  44. package/src/mcp/server.ts +44 -16
  45. package/src/services/chunking.service.ts +23 -0
  46. package/src/services/index.service.test.ts +921 -1
  47. package/src/services/index.service.ts +76 -1
  48. package/src/services/index.ts +20 -2
  49. package/src/services/search.service.test.ts +573 -21
  50. package/src/services/search.service.ts +257 -105
  51. package/src/services/services.test.ts +2 -2
  52. package/src/services/snippet.service.ts +28 -3
  53. package/src/services/store.service.test.ts +28 -0
  54. package/src/services/store.service.ts +4 -0
  55. package/src/services/token.service.test.ts +45 -0
  56. package/src/services/token.service.ts +33 -0
  57. package/src/types/result.test.ts +10 -0
  58. package/tests/integration/cli-consistency.test.ts +1 -4
  59. package/vitest.config.ts +4 -0
  60. package/dist/chunk-5QMHZUC4.js.map +0 -1
  61. package/dist/chunk-BICFAWMN.js.map +0 -1
  62. package/dist/chunk-J7J6LXOJ.js.map +0 -1
  63. package/scripts/readme-version-updater.cjs +0 -18
@@ -8,6 +8,9 @@ import axios from 'axios';
8
8
  import { ClaudeClient, type CrawlStrategy } from './claude-client.js';
9
9
  import { convertHtmlToMarkdown } from './article-converter.js';
10
10
  import { PythonBridge, type CrawledLink } from './bridge.js';
11
+ import { createLogger, summarizePayload } from '../logging/index.js';
12
+
13
+ const logger = createLogger('crawler');
11
14
 
12
15
  export interface CrawlOptions {
13
16
  crawlInstruction?: string; // Natural language: what to crawl
@@ -69,6 +72,13 @@ export class IntelligentCrawler extends EventEmitter {
69
72
  this.visited.clear();
70
73
  this.stopped = false;
71
74
 
75
+ logger.info({
76
+ seedUrl,
77
+ maxPages,
78
+ mode: simple ? 'simple' : (crawlInstruction !== undefined && crawlInstruction !== '' ? 'intelligent' : 'simple'),
79
+ hasExtractInstruction: extractInstruction !== undefined,
80
+ }, 'Starting crawl');
81
+
72
82
  const startProgress: CrawlProgress = {
73
83
  type: 'start',
74
84
  pagesVisited: 0,
@@ -86,6 +96,11 @@ export class IntelligentCrawler extends EventEmitter {
86
96
  yield* this.crawlSimple(seedUrl, extractInstruction, maxPages, options.useHeadless ?? false);
87
97
  }
88
98
 
99
+ logger.info({
100
+ seedUrl,
101
+ pagesVisited: this.visited.size,
102
+ }, 'Crawl complete');
103
+
89
104
  const completeProgress: CrawlProgress = {
90
105
  type: 'complete',
91
106
  pagesVisited: this.visited.size,
@@ -220,9 +235,9 @@ export class IntelligentCrawler extends EventEmitter {
220
235
  const links = await this.extractLinks(current.url, useHeadless);
221
236
 
222
237
  if (links.length === 0) {
223
- console.warn(`No links found on ${current.url} - page may be a leaf node`);
238
+ logger.debug({ url: current.url }, 'No links found - page may be a leaf node');
224
239
  } else {
225
- console.log(`Found ${String(links.length)} links on ${current.url}`);
240
+ logger.debug({ url: current.url, linkCount: links.length }, 'Links extracted from page');
226
241
  }
227
242
 
228
243
  for (const link of links) {
@@ -283,9 +298,16 @@ export class IntelligentCrawler extends EventEmitter {
283
298
  const conversion = await convertHtmlToMarkdown(html, url);
284
299
 
285
300
  if (!conversion.success) {
301
+ logger.error({ url, error: conversion.error }, 'HTML to markdown conversion failed');
286
302
  throw new Error(`Failed to convert HTML: ${conversion.error ?? 'Unknown error'}`);
287
303
  }
288
304
 
305
+ logger.debug({
306
+ url,
307
+ title: conversion.title,
308
+ markdownLength: conversion.markdown.length,
309
+ }, 'Article converted to markdown');
310
+
289
311
  let extracted: string | undefined;
290
312
 
291
313
  // Optional: Extract specific information using Claude
@@ -342,13 +364,23 @@ export class IntelligentCrawler extends EventEmitter {
342
364
  * Fetch HTML content from a URL
343
365
  */
344
366
  private async fetchHtml(url: string, useHeadless: boolean = false): Promise<string> {
367
+ const startTime = Date.now();
368
+ logger.debug({ url, useHeadless }, 'Fetching HTML');
369
+
345
370
  if (useHeadless) {
346
371
  try {
347
372
  const result = await this.pythonBridge.fetchHeadless(url);
373
+ const durationMs = Date.now() - startTime;
374
+ logger.info({
375
+ url,
376
+ useHeadless: true,
377
+ durationMs,
378
+ ...summarizePayload(result.html, 'raw-html', url),
379
+ }, 'Raw HTML fetched');
348
380
  return result.html;
349
381
  } catch (error) {
350
382
  // Fallback to axios if headless fails
351
- console.warn(`Headless fetch failed for ${url}, falling back to axios:`, error);
383
+ logger.warn({ url, error: error instanceof Error ? error.message : String(error) }, 'Headless fetch failed, falling back to axios');
352
384
  }
353
385
  }
354
386
 
@@ -362,8 +394,17 @@ export class IntelligentCrawler extends EventEmitter {
362
394
  },
363
395
  });
364
396
 
397
+ const durationMs = Date.now() - startTime;
398
+ logger.info({
399
+ url,
400
+ useHeadless: false,
401
+ durationMs,
402
+ ...summarizePayload(response.data, 'raw-html', url),
403
+ }, 'Raw HTML fetched');
404
+
365
405
  return response.data;
366
406
  } catch (error) {
407
+ logger.error({ url, error: error instanceof Error ? error.message : String(error) }, 'Failed to fetch HTML');
367
408
  throw new Error(
368
409
  `Failed to fetch ${url}: ${error instanceof Error ? error.message : String(error)}`,
369
410
  );
@@ -398,7 +439,7 @@ export class IntelligentCrawler extends EventEmitter {
398
439
  } catch (error: unknown) {
399
440
  // Log the error for debugging
400
441
  const errorMessage = error instanceof Error ? error.message : String(error);
401
- console.error(`Failed to extract links from ${url}:`, errorMessage);
442
+ logger.error({ url, error: errorMessage }, 'Failed to extract links');
402
443
 
403
444
  // Re-throw the error instead of silently swallowing it
404
445
  throw new Error(`Link extraction failed for ${url}: ${errorMessage}`);
@@ -48,6 +48,22 @@ describe('EmbeddingEngine', () => {
48
48
  const customEngine = new EmbeddingEngine('Xenova/all-MiniLM-L6-v2', 512);
49
49
  expect(customEngine.getDimensions()).toBe(512);
50
50
  });
51
+
52
+ it('skips initialization when already initialized', async () => {
53
+ // Engine is already initialized from beforeAll
54
+ // Calling initialize again should be a no-op
55
+ await engine.initialize();
56
+ const embedding = await engine.embed('test');
57
+ expect(embedding).toHaveLength(384);
58
+ });
59
+
60
+ it('handles large batch with multiple chunks', async () => {
61
+ // Create enough texts to trigger multiple batch iterations (BATCH_SIZE = 32)
62
+ const texts = Array.from({ length: 40 }, (_, i) => `Text number ${String(i)}`);
63
+ const embeddings = await engine.embedBatch(texts);
64
+ expect(embeddings).toHaveLength(40);
65
+ expect(embeddings.every((e) => e.length === 384)).toBe(true);
66
+ }, 60000);
51
67
  });
52
68
 
53
69
  function cosineSimilarity(a: number[], b: number[]): number {
@@ -292,6 +292,37 @@ describe('LanceStore', () => {
292
292
  });
293
293
  });
294
294
 
295
+ describe('close', () => {
296
+ it('clears tables and connection', async () => {
297
+ const closeStoreId = createStoreId('close-test-store');
298
+ const closeStore = new LanceStore(tempDir);
299
+ await closeStore.initialize(closeStoreId);
300
+
301
+ const doc = {
302
+ id: createDocumentId('close-doc'),
303
+ content: 'test',
304
+ vector: new Array(384).fill(0.1),
305
+ metadata: {
306
+ type: 'file' as const,
307
+ storeId: closeStoreId,
308
+ indexedAt: new Date(),
309
+ },
310
+ };
311
+
312
+ await closeStore.addDocuments(closeStoreId, [doc]);
313
+
314
+ // Close should not throw
315
+ expect(() => closeStore.close()).not.toThrow();
316
+ });
317
+
318
+ it('handles close when never initialized', () => {
319
+ const uninitializedStore = new LanceStore(tempDir);
320
+
321
+ // Should not throw even when never initialized
322
+ expect(() => uninitializedStore.close()).not.toThrow();
323
+ });
324
+ });
325
+
295
326
  describe('multiple documents operations', () => {
296
327
  it('adds multiple documents at once', async () => {
297
328
  const multiStoreId = createStoreId('multi-doc-store');
package/src/db/lance.ts CHANGED
@@ -145,6 +145,14 @@ export class LanceStore {
145
145
  }
146
146
  }
147
147
 
148
+ close(): void {
149
+ this.tables.clear();
150
+ if (this.connection !== null) {
151
+ this.connection.close();
152
+ this.connection = null;
153
+ }
154
+ }
155
+
148
156
  private getTableName(storeId: StoreId): string {
149
157
  return `documents_${storeId}`;
150
158
  }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Logging module - pino-based file logging with auto-rotation
3
+ *
4
+ * @example
5
+ * import { createLogger, summarizePayload } from './logging/index.js';
6
+ *
7
+ * const logger = createLogger('my-module');
8
+ * logger.info({ data }, 'Something happened');
9
+ *
10
+ * // For large payloads:
11
+ * logger.info({
12
+ * ...summarizePayload(html, 'raw-html', url),
13
+ * }, 'Fetched HTML');
14
+ */
15
+
16
+ export {
17
+ createLogger,
18
+ shutdownLogger,
19
+ getCurrentLogLevel,
20
+ isLevelEnabled,
21
+ getLogDirectory,
22
+ type LogLevel,
23
+ } from './logger.js';
24
+
25
+ export {
26
+ summarizePayload,
27
+ truncateForLog,
28
+ type PayloadSummary,
29
+ } from './payload.js';
@@ -0,0 +1,75 @@
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
+ import { getCurrentLogLevel, isLevelEnabled, getLogDirectory } from './logger.js';
3
+
4
+ describe('logger', () => {
5
+ const originalEnv = process.env['LOG_LEVEL'];
6
+
7
+ beforeEach(() => {
8
+ delete process.env['LOG_LEVEL'];
9
+ });
10
+
11
+ afterEach(() => {
12
+ if (originalEnv !== undefined) {
13
+ process.env['LOG_LEVEL'] = originalEnv;
14
+ } else {
15
+ delete process.env['LOG_LEVEL'];
16
+ }
17
+ });
18
+
19
+ describe('getCurrentLogLevel', () => {
20
+ it('returns info as default level', () => {
21
+ expect(getCurrentLogLevel()).toBe('info');
22
+ });
23
+
24
+ it('returns level from environment variable', () => {
25
+ process.env['LOG_LEVEL'] = 'debug';
26
+ expect(getCurrentLogLevel()).toBe('debug');
27
+ });
28
+
29
+ it('handles lowercase environment variable', () => {
30
+ process.env['LOG_LEVEL'] = 'WARN';
31
+ expect(getCurrentLogLevel()).toBe('warn');
32
+ });
33
+
34
+ it('treats empty string as default', () => {
35
+ process.env['LOG_LEVEL'] = '';
36
+ expect(getCurrentLogLevel()).toBe('info');
37
+ });
38
+
39
+ it('throws on invalid log level', () => {
40
+ process.env['LOG_LEVEL'] = 'invalid';
41
+ expect(() => getCurrentLogLevel()).toThrow('Invalid LOG_LEVEL: "invalid"');
42
+ });
43
+ });
44
+
45
+ describe('isLevelEnabled', () => {
46
+ it('returns true when check level is at or above current level', () => {
47
+ process.env['LOG_LEVEL'] = 'info';
48
+ expect(isLevelEnabled('info')).toBe(true);
49
+ expect(isLevelEnabled('warn')).toBe(true);
50
+ expect(isLevelEnabled('error')).toBe(true);
51
+ });
52
+
53
+ it('returns false when check level is below current level', () => {
54
+ process.env['LOG_LEVEL'] = 'warn';
55
+ expect(isLevelEnabled('debug')).toBe(false);
56
+ expect(isLevelEnabled('info')).toBe(false);
57
+ });
58
+
59
+ it('enables all levels when set to trace', () => {
60
+ process.env['LOG_LEVEL'] = 'trace';
61
+ expect(isLevelEnabled('trace')).toBe(true);
62
+ expect(isLevelEnabled('debug')).toBe(true);
63
+ expect(isLevelEnabled('info')).toBe(true);
64
+ });
65
+ });
66
+
67
+ describe('getLogDirectory', () => {
68
+ it('returns path under home directory', () => {
69
+ const logDir = getLogDirectory();
70
+ expect(logDir).toContain('.bluera');
71
+ expect(logDir).toContain('bluera-knowledge');
72
+ expect(logDir).toContain('logs');
73
+ });
74
+ });
75
+ });
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Core logger factory using pino with file-based rolling logs
3
+ *
4
+ * Features:
5
+ * - File-only output (no console pollution for Claude Code)
6
+ * - Size-based rotation (10MB, keeps 5 files)
7
+ * - LOG_LEVEL env var control (trace/debug/info/warn/error/fatal)
8
+ * - Child loggers per module for context
9
+ */
10
+
11
+ import pino, { type Logger, type LoggerOptions } from 'pino';
12
+ import { homedir } from 'node:os';
13
+ import { mkdirSync, existsSync } from 'node:fs';
14
+ import { join } from 'node:path';
15
+
16
+ /** Valid log levels */
17
+ export type LogLevel = 'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal';
18
+
19
+ const VALID_LEVELS: readonly LogLevel[] = ['trace', 'debug', 'info', 'warn', 'error', 'fatal'];
20
+ const VALID_LEVELS_SET: ReadonlySet<string> = new Set(VALID_LEVELS);
21
+
22
+ /** Default log directory under user home */
23
+ function getLogDir(): string {
24
+ return join(homedir(), '.bluera', 'bluera-knowledge', 'logs');
25
+ }
26
+
27
+ /** Resolve and create log directory - fails fast if cannot create */
28
+ function ensureLogDir(): string {
29
+ const logDir = getLogDir();
30
+ if (!existsSync(logDir)) {
31
+ mkdirSync(logDir, { recursive: true });
32
+ }
33
+ return logDir;
34
+ }
35
+
36
+ /** Check if a string is a valid log level */
37
+ function isValidLogLevel(level: string): level is LogLevel {
38
+ return VALID_LEVELS_SET.has(level);
39
+ }
40
+
41
+ /** Get log level from environment - fails fast on invalid value */
42
+ function getLogLevel(): LogLevel {
43
+ const level = process.env['LOG_LEVEL']?.toLowerCase();
44
+
45
+ if (level === undefined || level === '') {
46
+ return 'info';
47
+ }
48
+
49
+ if (!isValidLogLevel(level)) {
50
+ throw new Error(
51
+ `Invalid LOG_LEVEL: "${level}". Valid values: ${VALID_LEVELS.join(', ')}`
52
+ );
53
+ }
54
+
55
+ return level;
56
+ }
57
+
58
+ /** Root logger instance - lazily initialized */
59
+ let rootLogger: Logger | null = null;
60
+
61
+ /** Initialize the root logger with pino-roll transport */
62
+ function initializeLogger(): Logger {
63
+ if (rootLogger !== null) {
64
+ return rootLogger;
65
+ }
66
+
67
+ const logDir = ensureLogDir();
68
+ const logFile = join(logDir, 'app.log');
69
+ const level = getLogLevel();
70
+
71
+ const options: LoggerOptions = {
72
+ level,
73
+ timestamp: pino.stdTimeFunctions.isoTime,
74
+ formatters: {
75
+ level: (label) => ({ level: label }),
76
+ },
77
+ transport: {
78
+ target: 'pino-roll',
79
+ options: {
80
+ file: logFile,
81
+ size: '10m', // 10MB rotation
82
+ limit: { count: 5 }, // Keep 5 rotated files
83
+ mkdir: true,
84
+ },
85
+ },
86
+ };
87
+
88
+ rootLogger = pino(options);
89
+ return rootLogger;
90
+ }
91
+
92
+ /**
93
+ * Create a named child logger for a specific module
94
+ *
95
+ * @param module - Module name (e.g., 'crawler', 'mcp-server', 'search-service')
96
+ * @returns Logger instance with module context
97
+ *
98
+ * @example
99
+ * const logger = createLogger('crawler');
100
+ * logger.info({ url }, 'Fetching page');
101
+ */
102
+ export function createLogger(module: string): Logger {
103
+ const root = initializeLogger();
104
+ return root.child({ module });
105
+ }
106
+
107
+ /**
108
+ * Get the current log level
109
+ */
110
+ export function getCurrentLogLevel(): LogLevel {
111
+ return getLogLevel();
112
+ }
113
+
114
+ /**
115
+ * Check if a specific log level is enabled
116
+ */
117
+ export function isLevelEnabled(level: LogLevel): boolean {
118
+ const currentLevel = getLogLevel();
119
+ const currentIndex = VALID_LEVELS.indexOf(currentLevel);
120
+ const checkIndex = VALID_LEVELS.indexOf(level);
121
+ return checkIndex >= currentIndex;
122
+ }
123
+
124
+ /**
125
+ * Get the log directory path
126
+ */
127
+ export function getLogDirectory(): string {
128
+ return getLogDir();
129
+ }
130
+
131
+ /**
132
+ * Flush and shutdown the logger - call before process exit
133
+ */
134
+ export function shutdownLogger(): Promise<void> {
135
+ return new Promise((resolve) => {
136
+ if (rootLogger !== null) {
137
+ rootLogger.flush();
138
+ // Give time for async transport to flush
139
+ setTimeout(() => {
140
+ rootLogger = null;
141
+ resolve();
142
+ }, 100);
143
+ } else {
144
+ resolve();
145
+ }
146
+ });
147
+ }
@@ -0,0 +1,152 @@
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
+ import { mkdtempSync, rmSync, existsSync, readdirSync, readFileSync } from 'node:fs';
3
+ import { join } from 'node:path';
4
+ import { tmpdir } from 'node:os';
5
+
6
+ // Mock logger module before importing payload
7
+ vi.mock('./logger.js', () => ({
8
+ getLogDirectory: vi.fn(),
9
+ isLevelEnabled: vi.fn()
10
+ }));
11
+
12
+ import { summarizePayload, truncateForLog } from './payload.js';
13
+ import { getLogDirectory, isLevelEnabled } from './logger.js';
14
+
15
+ const mockGetLogDirectory = getLogDirectory as ReturnType<typeof vi.fn>;
16
+ const mockIsLevelEnabled = isLevelEnabled as ReturnType<typeof vi.fn>;
17
+
18
+ describe('payload utilities', () => {
19
+ let tempDir: string;
20
+
21
+ beforeEach(() => {
22
+ tempDir = mkdtempSync(join(tmpdir(), 'payload-test-'));
23
+ mockGetLogDirectory.mockReturnValue(tempDir);
24
+ mockIsLevelEnabled.mockReturnValue(false);
25
+ });
26
+
27
+ afterEach(() => {
28
+ rmSync(tempDir, { recursive: true, force: true });
29
+ vi.clearAllMocks();
30
+ });
31
+
32
+ describe('truncateForLog', () => {
33
+ it('returns content unchanged when under max length', () => {
34
+ const content = 'short content';
35
+ expect(truncateForLog(content, 500)).toBe(content);
36
+ });
37
+
38
+ it('truncates content when over max length', () => {
39
+ const content = 'a'.repeat(600);
40
+ const result = truncateForLog(content, 500);
41
+ expect(result.length).toBe(500 + '... [truncated]'.length);
42
+ expect(result).toContain('... [truncated]');
43
+ });
44
+
45
+ it('uses default max length of 500', () => {
46
+ const content = 'a'.repeat(600);
47
+ const result = truncateForLog(content);
48
+ expect(result.startsWith('a'.repeat(500))).toBe(true);
49
+ expect(result).toContain('... [truncated]');
50
+ });
51
+
52
+ it('handles exact max length content', () => {
53
+ const content = 'a'.repeat(500);
54
+ expect(truncateForLog(content, 500)).toBe(content);
55
+ });
56
+ });
57
+
58
+ describe('summarizePayload', () => {
59
+ it('returns summary with preview, size, and hash', () => {
60
+ const content = 'test content for summarization';
61
+ const result = summarizePayload(content, 'test-type', 'test-id');
62
+
63
+ expect(result.preview).toBe(content);
64
+ expect(result.sizeBytes).toBe(Buffer.byteLength(content, 'utf8'));
65
+ expect(result.hash).toMatch(/^[a-f0-9]{12}$/);
66
+ expect(result.payloadFile).toBeUndefined();
67
+ });
68
+
69
+ it('truncates preview for large content', () => {
70
+ const content = 'x'.repeat(1000);
71
+ const result = summarizePayload(content, 'large', 'large-id');
72
+
73
+ expect(result.preview).toContain('... [truncated]');
74
+ expect(result.preview.length).toBeLessThan(content.length);
75
+ });
76
+
77
+ it('does not dump payload when dumpFull is false', () => {
78
+ const content = 'x'.repeat(20000); // Above threshold
79
+ const result = summarizePayload(content, 'type', 'id', false);
80
+
81
+ expect(result.payloadFile).toBeUndefined();
82
+ const payloadDir = join(tempDir, 'payload');
83
+ expect(existsSync(payloadDir)).toBe(false);
84
+ });
85
+
86
+ it('dumps payload to file when dumpFull is true and above threshold', () => {
87
+ const content = 'x'.repeat(20000); // Above 10KB threshold
88
+ const result = summarizePayload(content, 'dump-type', 'dump-id', true);
89
+
90
+ expect(result.payloadFile).toBeDefined();
91
+ expect(result.payloadFile).toContain('dump-type');
92
+ expect(result.payloadFile).toContain(result.hash);
93
+
94
+ const payloadDir = join(tempDir, 'payload');
95
+ expect(existsSync(payloadDir)).toBe(true);
96
+
97
+ const files = readdirSync(payloadDir);
98
+ expect(files.length).toBe(1);
99
+ expect(files[0]).toBe(result.payloadFile);
100
+
101
+ const fileContent = JSON.parse(readFileSync(join(payloadDir, files[0]), 'utf8'));
102
+ expect(fileContent.type).toBe('dump-type');
103
+ expect(fileContent.identifier).toBe('dump-id');
104
+ expect(fileContent.content).toBe(content);
105
+ expect(fileContent.sizeBytes).toBe(result.sizeBytes);
106
+ });
107
+
108
+ it('does not dump payload below threshold even with dumpFull true', () => {
109
+ const content = 'small content'; // Below 10KB threshold
110
+ const result = summarizePayload(content, 'small-type', 'small-id', true);
111
+
112
+ expect(result.payloadFile).toBeUndefined();
113
+ });
114
+
115
+ it('creates payload directory if it does not exist', () => {
116
+ const content = 'y'.repeat(20000);
117
+ const payloadDir = join(tempDir, 'payload');
118
+ expect(existsSync(payloadDir)).toBe(false);
119
+
120
+ summarizePayload(content, 'create-dir', 'create-id', true);
121
+
122
+ expect(existsSync(payloadDir)).toBe(true);
123
+ });
124
+
125
+ it('sanitizes identifier for filename', () => {
126
+ const content = 'z'.repeat(20000);
127
+ const result = summarizePayload(content, 'type', 'https://example.com/path?query=1', true);
128
+
129
+ expect(result.payloadFile).toBeDefined();
130
+ expect(result.payloadFile).not.toContain('://');
131
+ expect(result.payloadFile).not.toContain('?');
132
+ });
133
+
134
+ it('uses trace level check for dumpFull default', () => {
135
+ mockIsLevelEnabled.mockReturnValue(true);
136
+ const content = 'a'.repeat(20000);
137
+
138
+ const result = summarizePayload(content, 'trace-type', 'trace-id');
139
+
140
+ expect(mockIsLevelEnabled).toHaveBeenCalledWith('trace');
141
+ expect(result.payloadFile).toBeDefined();
142
+ });
143
+
144
+ it('generates consistent hash for same content', () => {
145
+ const content = 'consistent content';
146
+ const result1 = summarizePayload(content, 'type1', 'id1');
147
+ const result2 = summarizePayload(content, 'type2', 'id2');
148
+
149
+ expect(result1.hash).toBe(result2.hash);
150
+ });
151
+ });
152
+ });