bluera-knowledge 0.9.26 → 0.9.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/commit.md +4 -7
- package/.claude/hooks/post-edit-check.sh +21 -24
- package/.claude/skills/atomic-commits/SKILL.md +6 -0
- package/.claude-plugin/plugin.json +1 -1
- package/.env.example +4 -0
- package/.husky/pre-push +12 -2
- package/.versionrc.json +0 -4
- package/BUGS-FOUND.md +71 -0
- package/CHANGELOG.md +76 -0
- package/README.md +55 -20
- package/bun.lock +35 -1
- package/commands/crawl.md +2 -0
- package/dist/{chunk-BICFAWMN.js → chunk-2SJHNRXD.js} +73 -8
- package/dist/chunk-2SJHNRXD.js.map +1 -0
- package/dist/{chunk-J7J6LXOJ.js → chunk-OGEY66FZ.js} +106 -41
- package/dist/chunk-OGEY66FZ.js.map +1 -0
- package/dist/{chunk-5QMHZUC4.js → chunk-RWSXP3PQ.js} +482 -106
- package/dist/chunk-RWSXP3PQ.js.map +1 -0
- package/dist/index.js +73 -28
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/eslint.config.js +1 -1
- package/package.json +3 -1
- package/src/analysis/ast-parser.test.ts +46 -0
- package/src/cli/commands/crawl.test.ts +99 -12
- package/src/cli/commands/crawl.ts +76 -24
- package/src/cli/commands/store.test.ts +68 -1
- package/src/cli/commands/store.ts +9 -3
- package/src/crawl/article-converter.ts +36 -1
- package/src/crawl/bridge.ts +18 -7
- package/src/crawl/intelligent-crawler.ts +45 -4
- package/src/db/embeddings.test.ts +16 -0
- package/src/db/lance.test.ts +31 -0
- package/src/db/lance.ts +8 -0
- package/src/logging/index.ts +29 -0
- package/src/logging/logger.test.ts +75 -0
- package/src/logging/logger.ts +147 -0
- package/src/logging/payload.test.ts +152 -0
- package/src/logging/payload.ts +121 -0
- package/src/mcp/handlers/search.handler.test.ts +28 -9
- package/src/mcp/handlers/search.handler.ts +69 -29
- package/src/mcp/handlers/store.handler.test.ts +1 -0
- package/src/mcp/server.ts +44 -16
- package/src/services/chunking.service.ts +23 -0
- package/src/services/index.service.test.ts +921 -1
- package/src/services/index.service.ts +76 -1
- package/src/services/index.ts +20 -2
- package/src/services/search.service.test.ts +573 -21
- package/src/services/search.service.ts +257 -105
- package/src/services/services.test.ts +2 -2
- package/src/services/snippet.service.ts +28 -3
- package/src/services/store.service.test.ts +28 -0
- package/src/services/store.service.ts +4 -0
- package/src/services/token.service.test.ts +45 -0
- package/src/services/token.service.ts +33 -0
- package/src/types/result.test.ts +10 -0
- package/tests/integration/cli-consistency.test.ts +1 -4
- package/vitest.config.ts +4 -0
- package/dist/chunk-5QMHZUC4.js.map +0 -1
- package/dist/chunk-BICFAWMN.js.map +0 -1
- package/dist/chunk-J7J6LXOJ.js.map +0 -1
- package/scripts/readme-version-updater.cjs +0 -18
|
@@ -8,6 +8,9 @@ import axios from 'axios';
|
|
|
8
8
|
import { ClaudeClient, type CrawlStrategy } from './claude-client.js';
|
|
9
9
|
import { convertHtmlToMarkdown } from './article-converter.js';
|
|
10
10
|
import { PythonBridge, type CrawledLink } from './bridge.js';
|
|
11
|
+
import { createLogger, summarizePayload } from '../logging/index.js';
|
|
12
|
+
|
|
13
|
+
const logger = createLogger('crawler');
|
|
11
14
|
|
|
12
15
|
export interface CrawlOptions {
|
|
13
16
|
crawlInstruction?: string; // Natural language: what to crawl
|
|
@@ -69,6 +72,13 @@ export class IntelligentCrawler extends EventEmitter {
|
|
|
69
72
|
this.visited.clear();
|
|
70
73
|
this.stopped = false;
|
|
71
74
|
|
|
75
|
+
logger.info({
|
|
76
|
+
seedUrl,
|
|
77
|
+
maxPages,
|
|
78
|
+
mode: simple ? 'simple' : (crawlInstruction !== undefined && crawlInstruction !== '' ? 'intelligent' : 'simple'),
|
|
79
|
+
hasExtractInstruction: extractInstruction !== undefined,
|
|
80
|
+
}, 'Starting crawl');
|
|
81
|
+
|
|
72
82
|
const startProgress: CrawlProgress = {
|
|
73
83
|
type: 'start',
|
|
74
84
|
pagesVisited: 0,
|
|
@@ -86,6 +96,11 @@ export class IntelligentCrawler extends EventEmitter {
|
|
|
86
96
|
yield* this.crawlSimple(seedUrl, extractInstruction, maxPages, options.useHeadless ?? false);
|
|
87
97
|
}
|
|
88
98
|
|
|
99
|
+
logger.info({
|
|
100
|
+
seedUrl,
|
|
101
|
+
pagesVisited: this.visited.size,
|
|
102
|
+
}, 'Crawl complete');
|
|
103
|
+
|
|
89
104
|
const completeProgress: CrawlProgress = {
|
|
90
105
|
type: 'complete',
|
|
91
106
|
pagesVisited: this.visited.size,
|
|
@@ -220,9 +235,9 @@ export class IntelligentCrawler extends EventEmitter {
|
|
|
220
235
|
const links = await this.extractLinks(current.url, useHeadless);
|
|
221
236
|
|
|
222
237
|
if (links.length === 0) {
|
|
223
|
-
|
|
238
|
+
logger.debug({ url: current.url }, 'No links found - page may be a leaf node');
|
|
224
239
|
} else {
|
|
225
|
-
|
|
240
|
+
logger.debug({ url: current.url, linkCount: links.length }, 'Links extracted from page');
|
|
226
241
|
}
|
|
227
242
|
|
|
228
243
|
for (const link of links) {
|
|
@@ -283,9 +298,16 @@ export class IntelligentCrawler extends EventEmitter {
|
|
|
283
298
|
const conversion = await convertHtmlToMarkdown(html, url);
|
|
284
299
|
|
|
285
300
|
if (!conversion.success) {
|
|
301
|
+
logger.error({ url, error: conversion.error }, 'HTML to markdown conversion failed');
|
|
286
302
|
throw new Error(`Failed to convert HTML: ${conversion.error ?? 'Unknown error'}`);
|
|
287
303
|
}
|
|
288
304
|
|
|
305
|
+
logger.debug({
|
|
306
|
+
url,
|
|
307
|
+
title: conversion.title,
|
|
308
|
+
markdownLength: conversion.markdown.length,
|
|
309
|
+
}, 'Article converted to markdown');
|
|
310
|
+
|
|
289
311
|
let extracted: string | undefined;
|
|
290
312
|
|
|
291
313
|
// Optional: Extract specific information using Claude
|
|
@@ -342,13 +364,23 @@ export class IntelligentCrawler extends EventEmitter {
|
|
|
342
364
|
* Fetch HTML content from a URL
|
|
343
365
|
*/
|
|
344
366
|
private async fetchHtml(url: string, useHeadless: boolean = false): Promise<string> {
|
|
367
|
+
const startTime = Date.now();
|
|
368
|
+
logger.debug({ url, useHeadless }, 'Fetching HTML');
|
|
369
|
+
|
|
345
370
|
if (useHeadless) {
|
|
346
371
|
try {
|
|
347
372
|
const result = await this.pythonBridge.fetchHeadless(url);
|
|
373
|
+
const durationMs = Date.now() - startTime;
|
|
374
|
+
logger.info({
|
|
375
|
+
url,
|
|
376
|
+
useHeadless: true,
|
|
377
|
+
durationMs,
|
|
378
|
+
...summarizePayload(result.html, 'raw-html', url),
|
|
379
|
+
}, 'Raw HTML fetched');
|
|
348
380
|
return result.html;
|
|
349
381
|
} catch (error) {
|
|
350
382
|
// Fallback to axios if headless fails
|
|
351
|
-
|
|
383
|
+
logger.warn({ url, error: error instanceof Error ? error.message : String(error) }, 'Headless fetch failed, falling back to axios');
|
|
352
384
|
}
|
|
353
385
|
}
|
|
354
386
|
|
|
@@ -362,8 +394,17 @@ export class IntelligentCrawler extends EventEmitter {
|
|
|
362
394
|
},
|
|
363
395
|
});
|
|
364
396
|
|
|
397
|
+
const durationMs = Date.now() - startTime;
|
|
398
|
+
logger.info({
|
|
399
|
+
url,
|
|
400
|
+
useHeadless: false,
|
|
401
|
+
durationMs,
|
|
402
|
+
...summarizePayload(response.data, 'raw-html', url),
|
|
403
|
+
}, 'Raw HTML fetched');
|
|
404
|
+
|
|
365
405
|
return response.data;
|
|
366
406
|
} catch (error) {
|
|
407
|
+
logger.error({ url, error: error instanceof Error ? error.message : String(error) }, 'Failed to fetch HTML');
|
|
367
408
|
throw new Error(
|
|
368
409
|
`Failed to fetch ${url}: ${error instanceof Error ? error.message : String(error)}`,
|
|
369
410
|
);
|
|
@@ -398,7 +439,7 @@ export class IntelligentCrawler extends EventEmitter {
|
|
|
398
439
|
} catch (error: unknown) {
|
|
399
440
|
// Log the error for debugging
|
|
400
441
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
401
|
-
|
|
442
|
+
logger.error({ url, error: errorMessage }, 'Failed to extract links');
|
|
402
443
|
|
|
403
444
|
// Re-throw the error instead of silently swallowing it
|
|
404
445
|
throw new Error(`Link extraction failed for ${url}: ${errorMessage}`);
|
|
@@ -48,6 +48,22 @@ describe('EmbeddingEngine', () => {
|
|
|
48
48
|
const customEngine = new EmbeddingEngine('Xenova/all-MiniLM-L6-v2', 512);
|
|
49
49
|
expect(customEngine.getDimensions()).toBe(512);
|
|
50
50
|
});
|
|
51
|
+
|
|
52
|
+
it('skips initialization when already initialized', async () => {
|
|
53
|
+
// Engine is already initialized from beforeAll
|
|
54
|
+
// Calling initialize again should be a no-op
|
|
55
|
+
await engine.initialize();
|
|
56
|
+
const embedding = await engine.embed('test');
|
|
57
|
+
expect(embedding).toHaveLength(384);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('handles large batch with multiple chunks', async () => {
|
|
61
|
+
// Create enough texts to trigger multiple batch iterations (BATCH_SIZE = 32)
|
|
62
|
+
const texts = Array.from({ length: 40 }, (_, i) => `Text number ${String(i)}`);
|
|
63
|
+
const embeddings = await engine.embedBatch(texts);
|
|
64
|
+
expect(embeddings).toHaveLength(40);
|
|
65
|
+
expect(embeddings.every((e) => e.length === 384)).toBe(true);
|
|
66
|
+
}, 60000);
|
|
51
67
|
});
|
|
52
68
|
|
|
53
69
|
function cosineSimilarity(a: number[], b: number[]): number {
|
package/src/db/lance.test.ts
CHANGED
|
@@ -292,6 +292,37 @@ describe('LanceStore', () => {
|
|
|
292
292
|
});
|
|
293
293
|
});
|
|
294
294
|
|
|
295
|
+
describe('close', () => {
|
|
296
|
+
it('clears tables and connection', async () => {
|
|
297
|
+
const closeStoreId = createStoreId('close-test-store');
|
|
298
|
+
const closeStore = new LanceStore(tempDir);
|
|
299
|
+
await closeStore.initialize(closeStoreId);
|
|
300
|
+
|
|
301
|
+
const doc = {
|
|
302
|
+
id: createDocumentId('close-doc'),
|
|
303
|
+
content: 'test',
|
|
304
|
+
vector: new Array(384).fill(0.1),
|
|
305
|
+
metadata: {
|
|
306
|
+
type: 'file' as const,
|
|
307
|
+
storeId: closeStoreId,
|
|
308
|
+
indexedAt: new Date(),
|
|
309
|
+
},
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
await closeStore.addDocuments(closeStoreId, [doc]);
|
|
313
|
+
|
|
314
|
+
// Close should not throw
|
|
315
|
+
expect(() => closeStore.close()).not.toThrow();
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
it('handles close when never initialized', () => {
|
|
319
|
+
const uninitializedStore = new LanceStore(tempDir);
|
|
320
|
+
|
|
321
|
+
// Should not throw even when never initialized
|
|
322
|
+
expect(() => uninitializedStore.close()).not.toThrow();
|
|
323
|
+
});
|
|
324
|
+
});
|
|
325
|
+
|
|
295
326
|
describe('multiple documents operations', () => {
|
|
296
327
|
it('adds multiple documents at once', async () => {
|
|
297
328
|
const multiStoreId = createStoreId('multi-doc-store');
|
package/src/db/lance.ts
CHANGED
|
@@ -145,6 +145,14 @@ export class LanceStore {
|
|
|
145
145
|
}
|
|
146
146
|
}
|
|
147
147
|
|
|
148
|
+
close(): void {
|
|
149
|
+
this.tables.clear();
|
|
150
|
+
if (this.connection !== null) {
|
|
151
|
+
this.connection.close();
|
|
152
|
+
this.connection = null;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
148
156
|
private getTableName(storeId: StoreId): string {
|
|
149
157
|
return `documents_${storeId}`;
|
|
150
158
|
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Logging module - pino-based file logging with auto-rotation
|
|
3
|
+
*
|
|
4
|
+
* @example
|
|
5
|
+
* import { createLogger, summarizePayload } from './logging/index.js';
|
|
6
|
+
*
|
|
7
|
+
* const logger = createLogger('my-module');
|
|
8
|
+
* logger.info({ data }, 'Something happened');
|
|
9
|
+
*
|
|
10
|
+
* // For large payloads:
|
|
11
|
+
* logger.info({
|
|
12
|
+
* ...summarizePayload(html, 'raw-html', url),
|
|
13
|
+
* }, 'Fetched HTML');
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
export {
|
|
17
|
+
createLogger,
|
|
18
|
+
shutdownLogger,
|
|
19
|
+
getCurrentLogLevel,
|
|
20
|
+
isLevelEnabled,
|
|
21
|
+
getLogDirectory,
|
|
22
|
+
type LogLevel,
|
|
23
|
+
} from './logger.js';
|
|
24
|
+
|
|
25
|
+
export {
|
|
26
|
+
summarizePayload,
|
|
27
|
+
truncateForLog,
|
|
28
|
+
type PayloadSummary,
|
|
29
|
+
} from './payload.js';
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import { getCurrentLogLevel, isLevelEnabled, getLogDirectory } from './logger.js';
|
|
3
|
+
|
|
4
|
+
describe('logger', () => {
|
|
5
|
+
const originalEnv = process.env['LOG_LEVEL'];
|
|
6
|
+
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
delete process.env['LOG_LEVEL'];
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
afterEach(() => {
|
|
12
|
+
if (originalEnv !== undefined) {
|
|
13
|
+
process.env['LOG_LEVEL'] = originalEnv;
|
|
14
|
+
} else {
|
|
15
|
+
delete process.env['LOG_LEVEL'];
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
describe('getCurrentLogLevel', () => {
|
|
20
|
+
it('returns info as default level', () => {
|
|
21
|
+
expect(getCurrentLogLevel()).toBe('info');
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('returns level from environment variable', () => {
|
|
25
|
+
process.env['LOG_LEVEL'] = 'debug';
|
|
26
|
+
expect(getCurrentLogLevel()).toBe('debug');
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('handles lowercase environment variable', () => {
|
|
30
|
+
process.env['LOG_LEVEL'] = 'WARN';
|
|
31
|
+
expect(getCurrentLogLevel()).toBe('warn');
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('treats empty string as default', () => {
|
|
35
|
+
process.env['LOG_LEVEL'] = '';
|
|
36
|
+
expect(getCurrentLogLevel()).toBe('info');
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it('throws on invalid log level', () => {
|
|
40
|
+
process.env['LOG_LEVEL'] = 'invalid';
|
|
41
|
+
expect(() => getCurrentLogLevel()).toThrow('Invalid LOG_LEVEL: "invalid"');
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe('isLevelEnabled', () => {
|
|
46
|
+
it('returns true when check level is at or above current level', () => {
|
|
47
|
+
process.env['LOG_LEVEL'] = 'info';
|
|
48
|
+
expect(isLevelEnabled('info')).toBe(true);
|
|
49
|
+
expect(isLevelEnabled('warn')).toBe(true);
|
|
50
|
+
expect(isLevelEnabled('error')).toBe(true);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('returns false when check level is below current level', () => {
|
|
54
|
+
process.env['LOG_LEVEL'] = 'warn';
|
|
55
|
+
expect(isLevelEnabled('debug')).toBe(false);
|
|
56
|
+
expect(isLevelEnabled('info')).toBe(false);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('enables all levels when set to trace', () => {
|
|
60
|
+
process.env['LOG_LEVEL'] = 'trace';
|
|
61
|
+
expect(isLevelEnabled('trace')).toBe(true);
|
|
62
|
+
expect(isLevelEnabled('debug')).toBe(true);
|
|
63
|
+
expect(isLevelEnabled('info')).toBe(true);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('getLogDirectory', () => {
|
|
68
|
+
it('returns path under home directory', () => {
|
|
69
|
+
const logDir = getLogDirectory();
|
|
70
|
+
expect(logDir).toContain('.bluera');
|
|
71
|
+
expect(logDir).toContain('bluera-knowledge');
|
|
72
|
+
expect(logDir).toContain('logs');
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
});
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core logger factory using pino with file-based rolling logs
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* - File-only output (no console pollution for Claude Code)
|
|
6
|
+
* - Size-based rotation (10MB, keeps 5 files)
|
|
7
|
+
* - LOG_LEVEL env var control (trace/debug/info/warn/error/fatal)
|
|
8
|
+
* - Child loggers per module for context
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import pino, { type Logger, type LoggerOptions } from 'pino';
|
|
12
|
+
import { homedir } from 'node:os';
|
|
13
|
+
import { mkdirSync, existsSync } from 'node:fs';
|
|
14
|
+
import { join } from 'node:path';
|
|
15
|
+
|
|
16
|
+
/** Valid log levels */
|
|
17
|
+
export type LogLevel = 'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal';
|
|
18
|
+
|
|
19
|
+
const VALID_LEVELS: readonly LogLevel[] = ['trace', 'debug', 'info', 'warn', 'error', 'fatal'];
|
|
20
|
+
const VALID_LEVELS_SET: ReadonlySet<string> = new Set(VALID_LEVELS);
|
|
21
|
+
|
|
22
|
+
/** Default log directory under user home */
|
|
23
|
+
function getLogDir(): string {
|
|
24
|
+
return join(homedir(), '.bluera', 'bluera-knowledge', 'logs');
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Resolve and create log directory - fails fast if cannot create */
|
|
28
|
+
function ensureLogDir(): string {
|
|
29
|
+
const logDir = getLogDir();
|
|
30
|
+
if (!existsSync(logDir)) {
|
|
31
|
+
mkdirSync(logDir, { recursive: true });
|
|
32
|
+
}
|
|
33
|
+
return logDir;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Check if a string is a valid log level */
|
|
37
|
+
function isValidLogLevel(level: string): level is LogLevel {
|
|
38
|
+
return VALID_LEVELS_SET.has(level);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Get log level from environment - fails fast on invalid value */
|
|
42
|
+
function getLogLevel(): LogLevel {
|
|
43
|
+
const level = process.env['LOG_LEVEL']?.toLowerCase();
|
|
44
|
+
|
|
45
|
+
if (level === undefined || level === '') {
|
|
46
|
+
return 'info';
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (!isValidLogLevel(level)) {
|
|
50
|
+
throw new Error(
|
|
51
|
+
`Invalid LOG_LEVEL: "${level}". Valid values: ${VALID_LEVELS.join(', ')}`
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return level;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Root logger instance - lazily initialized */
|
|
59
|
+
let rootLogger: Logger | null = null;
|
|
60
|
+
|
|
61
|
+
/** Initialize the root logger with pino-roll transport */
|
|
62
|
+
function initializeLogger(): Logger {
|
|
63
|
+
if (rootLogger !== null) {
|
|
64
|
+
return rootLogger;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const logDir = ensureLogDir();
|
|
68
|
+
const logFile = join(logDir, 'app.log');
|
|
69
|
+
const level = getLogLevel();
|
|
70
|
+
|
|
71
|
+
const options: LoggerOptions = {
|
|
72
|
+
level,
|
|
73
|
+
timestamp: pino.stdTimeFunctions.isoTime,
|
|
74
|
+
formatters: {
|
|
75
|
+
level: (label) => ({ level: label }),
|
|
76
|
+
},
|
|
77
|
+
transport: {
|
|
78
|
+
target: 'pino-roll',
|
|
79
|
+
options: {
|
|
80
|
+
file: logFile,
|
|
81
|
+
size: '10m', // 10MB rotation
|
|
82
|
+
limit: { count: 5 }, // Keep 5 rotated files
|
|
83
|
+
mkdir: true,
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
rootLogger = pino(options);
|
|
89
|
+
return rootLogger;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Create a named child logger for a specific module
|
|
94
|
+
*
|
|
95
|
+
* @param module - Module name (e.g., 'crawler', 'mcp-server', 'search-service')
|
|
96
|
+
* @returns Logger instance with module context
|
|
97
|
+
*
|
|
98
|
+
* @example
|
|
99
|
+
* const logger = createLogger('crawler');
|
|
100
|
+
* logger.info({ url }, 'Fetching page');
|
|
101
|
+
*/
|
|
102
|
+
export function createLogger(module: string): Logger {
|
|
103
|
+
const root = initializeLogger();
|
|
104
|
+
return root.child({ module });
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Get the current log level
|
|
109
|
+
*/
|
|
110
|
+
export function getCurrentLogLevel(): LogLevel {
|
|
111
|
+
return getLogLevel();
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Check if a specific log level is enabled
|
|
116
|
+
*/
|
|
117
|
+
export function isLevelEnabled(level: LogLevel): boolean {
|
|
118
|
+
const currentLevel = getLogLevel();
|
|
119
|
+
const currentIndex = VALID_LEVELS.indexOf(currentLevel);
|
|
120
|
+
const checkIndex = VALID_LEVELS.indexOf(level);
|
|
121
|
+
return checkIndex >= currentIndex;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Get the log directory path
|
|
126
|
+
*/
|
|
127
|
+
export function getLogDirectory(): string {
|
|
128
|
+
return getLogDir();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Flush and shutdown the logger - call before process exit
|
|
133
|
+
*/
|
|
134
|
+
export function shutdownLogger(): Promise<void> {
|
|
135
|
+
return new Promise((resolve) => {
|
|
136
|
+
if (rootLogger !== null) {
|
|
137
|
+
rootLogger.flush();
|
|
138
|
+
// Give time for async transport to flush
|
|
139
|
+
setTimeout(() => {
|
|
140
|
+
rootLogger = null;
|
|
141
|
+
resolve();
|
|
142
|
+
}, 100);
|
|
143
|
+
} else {
|
|
144
|
+
resolve();
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import { mkdtempSync, rmSync, existsSync, readdirSync, readFileSync } from 'node:fs';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
|
|
6
|
+
// Mock logger module before importing payload
|
|
7
|
+
vi.mock('./logger.js', () => ({
|
|
8
|
+
getLogDirectory: vi.fn(),
|
|
9
|
+
isLevelEnabled: vi.fn()
|
|
10
|
+
}));
|
|
11
|
+
|
|
12
|
+
import { summarizePayload, truncateForLog } from './payload.js';
|
|
13
|
+
import { getLogDirectory, isLevelEnabled } from './logger.js';
|
|
14
|
+
|
|
15
|
+
const mockGetLogDirectory = getLogDirectory as ReturnType<typeof vi.fn>;
|
|
16
|
+
const mockIsLevelEnabled = isLevelEnabled as ReturnType<typeof vi.fn>;
|
|
17
|
+
|
|
18
|
+
describe('payload utilities', () => {
|
|
19
|
+
let tempDir: string;
|
|
20
|
+
|
|
21
|
+
beforeEach(() => {
|
|
22
|
+
tempDir = mkdtempSync(join(tmpdir(), 'payload-test-'));
|
|
23
|
+
mockGetLogDirectory.mockReturnValue(tempDir);
|
|
24
|
+
mockIsLevelEnabled.mockReturnValue(false);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
afterEach(() => {
|
|
28
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
29
|
+
vi.clearAllMocks();
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
describe('truncateForLog', () => {
|
|
33
|
+
it('returns content unchanged when under max length', () => {
|
|
34
|
+
const content = 'short content';
|
|
35
|
+
expect(truncateForLog(content, 500)).toBe(content);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('truncates content when over max length', () => {
|
|
39
|
+
const content = 'a'.repeat(600);
|
|
40
|
+
const result = truncateForLog(content, 500);
|
|
41
|
+
expect(result.length).toBe(500 + '... [truncated]'.length);
|
|
42
|
+
expect(result).toContain('... [truncated]');
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('uses default max length of 500', () => {
|
|
46
|
+
const content = 'a'.repeat(600);
|
|
47
|
+
const result = truncateForLog(content);
|
|
48
|
+
expect(result.startsWith('a'.repeat(500))).toBe(true);
|
|
49
|
+
expect(result).toContain('... [truncated]');
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('handles exact max length content', () => {
|
|
53
|
+
const content = 'a'.repeat(500);
|
|
54
|
+
expect(truncateForLog(content, 500)).toBe(content);
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
describe('summarizePayload', () => {
|
|
59
|
+
it('returns summary with preview, size, and hash', () => {
|
|
60
|
+
const content = 'test content for summarization';
|
|
61
|
+
const result = summarizePayload(content, 'test-type', 'test-id');
|
|
62
|
+
|
|
63
|
+
expect(result.preview).toBe(content);
|
|
64
|
+
expect(result.sizeBytes).toBe(Buffer.byteLength(content, 'utf8'));
|
|
65
|
+
expect(result.hash).toMatch(/^[a-f0-9]{12}$/);
|
|
66
|
+
expect(result.payloadFile).toBeUndefined();
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('truncates preview for large content', () => {
|
|
70
|
+
const content = 'x'.repeat(1000);
|
|
71
|
+
const result = summarizePayload(content, 'large', 'large-id');
|
|
72
|
+
|
|
73
|
+
expect(result.preview).toContain('... [truncated]');
|
|
74
|
+
expect(result.preview.length).toBeLessThan(content.length);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it('does not dump payload when dumpFull is false', () => {
|
|
78
|
+
const content = 'x'.repeat(20000); // Above threshold
|
|
79
|
+
const result = summarizePayload(content, 'type', 'id', false);
|
|
80
|
+
|
|
81
|
+
expect(result.payloadFile).toBeUndefined();
|
|
82
|
+
const payloadDir = join(tempDir, 'payload');
|
|
83
|
+
expect(existsSync(payloadDir)).toBe(false);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it('dumps payload to file when dumpFull is true and above threshold', () => {
|
|
87
|
+
const content = 'x'.repeat(20000); // Above 10KB threshold
|
|
88
|
+
const result = summarizePayload(content, 'dump-type', 'dump-id', true);
|
|
89
|
+
|
|
90
|
+
expect(result.payloadFile).toBeDefined();
|
|
91
|
+
expect(result.payloadFile).toContain('dump-type');
|
|
92
|
+
expect(result.payloadFile).toContain(result.hash);
|
|
93
|
+
|
|
94
|
+
const payloadDir = join(tempDir, 'payload');
|
|
95
|
+
expect(existsSync(payloadDir)).toBe(true);
|
|
96
|
+
|
|
97
|
+
const files = readdirSync(payloadDir);
|
|
98
|
+
expect(files.length).toBe(1);
|
|
99
|
+
expect(files[0]).toBe(result.payloadFile);
|
|
100
|
+
|
|
101
|
+
const fileContent = JSON.parse(readFileSync(join(payloadDir, files[0]), 'utf8'));
|
|
102
|
+
expect(fileContent.type).toBe('dump-type');
|
|
103
|
+
expect(fileContent.identifier).toBe('dump-id');
|
|
104
|
+
expect(fileContent.content).toBe(content);
|
|
105
|
+
expect(fileContent.sizeBytes).toBe(result.sizeBytes);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('does not dump payload below threshold even with dumpFull true', () => {
|
|
109
|
+
const content = 'small content'; // Below 10KB threshold
|
|
110
|
+
const result = summarizePayload(content, 'small-type', 'small-id', true);
|
|
111
|
+
|
|
112
|
+
expect(result.payloadFile).toBeUndefined();
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('creates payload directory if it does not exist', () => {
|
|
116
|
+
const content = 'y'.repeat(20000);
|
|
117
|
+
const payloadDir = join(tempDir, 'payload');
|
|
118
|
+
expect(existsSync(payloadDir)).toBe(false);
|
|
119
|
+
|
|
120
|
+
summarizePayload(content, 'create-dir', 'create-id', true);
|
|
121
|
+
|
|
122
|
+
expect(existsSync(payloadDir)).toBe(true);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it('sanitizes identifier for filename', () => {
|
|
126
|
+
const content = 'z'.repeat(20000);
|
|
127
|
+
const result = summarizePayload(content, 'type', 'https://example.com/path?query=1', true);
|
|
128
|
+
|
|
129
|
+
expect(result.payloadFile).toBeDefined();
|
|
130
|
+
expect(result.payloadFile).not.toContain('://');
|
|
131
|
+
expect(result.payloadFile).not.toContain('?');
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('uses trace level check for dumpFull default', () => {
|
|
135
|
+
mockIsLevelEnabled.mockReturnValue(true);
|
|
136
|
+
const content = 'a'.repeat(20000);
|
|
137
|
+
|
|
138
|
+
const result = summarizePayload(content, 'trace-type', 'trace-id');
|
|
139
|
+
|
|
140
|
+
expect(mockIsLevelEnabled).toHaveBeenCalledWith('trace');
|
|
141
|
+
expect(result.payloadFile).toBeDefined();
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('generates consistent hash for same content', () => {
|
|
145
|
+
const content = 'consistent content';
|
|
146
|
+
const result1 = summarizePayload(content, 'type1', 'id1');
|
|
147
|
+
const result2 = summarizePayload(content, 'type2', 'id2');
|
|
148
|
+
|
|
149
|
+
expect(result1.hash).toBe(result2.hash);
|
|
150
|
+
});
|
|
151
|
+
});
|
|
152
|
+
});
|