bluera-knowledge 0.9.26 → 0.9.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude/commands/commit.md +4 -7
  2. package/.claude/hooks/post-edit-check.sh +21 -24
  3. package/.claude/skills/atomic-commits/SKILL.md +6 -0
  4. package/.claude-plugin/plugin.json +1 -1
  5. package/.env.example +4 -0
  6. package/.husky/pre-push +12 -2
  7. package/.versionrc.json +0 -4
  8. package/CHANGELOG.md +69 -0
  9. package/README.md +55 -20
  10. package/bun.lock +35 -1
  11. package/commands/crawl.md +2 -0
  12. package/dist/{chunk-BICFAWMN.js → chunk-DNOIM7BO.js} +73 -8
  13. package/dist/chunk-DNOIM7BO.js.map +1 -0
  14. package/dist/{chunk-5QMHZUC4.js → chunk-NJUMU4X2.js} +462 -105
  15. package/dist/chunk-NJUMU4X2.js.map +1 -0
  16. package/dist/{chunk-J7J6LXOJ.js → chunk-SZNTYLYT.js} +106 -41
  17. package/dist/chunk-SZNTYLYT.js.map +1 -0
  18. package/dist/index.js +65 -25
  19. package/dist/index.js.map +1 -1
  20. package/dist/mcp/server.js +2 -2
  21. package/dist/workers/background-worker-cli.js +2 -2
  22. package/eslint.config.js +1 -1
  23. package/package.json +3 -1
  24. package/src/analysis/ast-parser.test.ts +46 -0
  25. package/src/cli/commands/crawl.test.ts +99 -12
  26. package/src/cli/commands/crawl.ts +76 -24
  27. package/src/crawl/article-converter.ts +36 -1
  28. package/src/crawl/bridge.ts +18 -7
  29. package/src/crawl/intelligent-crawler.ts +45 -4
  30. package/src/db/embeddings.test.ts +16 -0
  31. package/src/logging/index.ts +29 -0
  32. package/src/logging/logger.test.ts +75 -0
  33. package/src/logging/logger.ts +147 -0
  34. package/src/logging/payload.test.ts +152 -0
  35. package/src/logging/payload.ts +121 -0
  36. package/src/mcp/handlers/search.handler.test.ts +28 -9
  37. package/src/mcp/handlers/search.handler.ts +69 -29
  38. package/src/mcp/handlers/store.handler.test.ts +1 -0
  39. package/src/mcp/server.ts +44 -16
  40. package/src/services/chunking.service.ts +23 -0
  41. package/src/services/index.service.test.ts +921 -1
  42. package/src/services/index.service.ts +76 -1
  43. package/src/services/index.ts +10 -1
  44. package/src/services/search.service.test.ts +573 -21
  45. package/src/services/search.service.ts +257 -105
  46. package/src/services/snippet.service.ts +28 -3
  47. package/src/services/token.service.test.ts +45 -0
  48. package/src/services/token.service.ts +33 -0
  49. package/src/types/result.test.ts +10 -0
  50. package/tests/integration/cli-consistency.test.ts +1 -4
  51. package/vitest.config.ts +4 -0
  52. package/dist/chunk-5QMHZUC4.js.map +0 -1
  53. package/dist/chunk-BICFAWMN.js.map +0 -1
  54. package/dist/chunk-J7J6LXOJ.js.map +0 -1
  55. package/scripts/readme-version-updater.cjs +0 -18
@@ -4,6 +4,10 @@ import { SearchArgsSchema, GetFullContextArgsSchema } from '../schemas/index.js'
4
4
  import type { SearchQuery, DocumentId, StoreId } from '../../types/index.js';
5
5
  import { LRUCache } from '../cache.js';
6
6
  import type { SearchResult } from '../../types/search.js';
7
+ import { createLogger, summarizePayload } from '../../logging/index.js';
8
+ import { estimateTokens, formatTokenCount } from '../../services/token.service.js';
9
+
10
+ const logger = createLogger('mcp-search');
7
11
 
8
12
  // Create result cache for get_full_context
9
13
  // Uses LRU cache to prevent memory leaks (max 1000 items)
@@ -22,6 +26,14 @@ export const handleSearch: ToolHandler<SearchArgs> = async (
22
26
  // Validate arguments with Zod
23
27
  const validated = SearchArgsSchema.parse(args);
24
28
 
29
+ logger.info({
30
+ query: validated.query,
31
+ stores: validated.stores,
32
+ detail: validated.detail,
33
+ limit: validated.limit,
34
+ intent: validated.intent,
35
+ }, 'Search started');
36
+
25
37
  const { services } = context;
26
38
 
27
39
  // Get all stores if none specified, resolve store names to IDs
@@ -63,14 +75,6 @@ export const handleSearch: ToolHandler<SearchArgs> = async (
63
75
  resultCache.set(result.id, result);
64
76
  }
65
77
 
66
- // Calculate estimated tokens
67
- const estimatedTokens = results.results.reduce((sum, r) => {
68
- let tokens = 100; // Base for summary
69
- if (r.context) tokens += 200;
70
- if (r.full) tokens += 800;
71
- return sum + tokens;
72
- }, 0);
73
-
74
78
  // Add repoRoot to results for cloned repos
75
79
  const enhancedResults = await Promise.all(results.results.map(async (r) => {
76
80
  const storeId = r.metadata.storeId;
@@ -89,17 +93,33 @@ export const handleSearch: ToolHandler<SearchArgs> = async (
89
93
  };
90
94
  }));
91
95
 
96
+ const responseJson = JSON.stringify({
97
+ results: enhancedResults,
98
+ totalResults: results.totalResults,
99
+ mode: results.mode,
100
+ timeMs: results.timeMs
101
+ }, null, 2);
102
+
103
+ // Calculate actual token estimate based on response content
104
+ const responseTokens = estimateTokens(responseJson);
105
+
106
+ // Create visible header with token usage
107
+ const header = `Search: "${validated.query}" | Results: ${String(results.totalResults)} | ${formatTokenCount(responseTokens)} tokens | ${String(results.timeMs)}ms\n\n`;
108
+
109
+ // Log the complete MCP response that will be sent to Claude Code
110
+ logger.info({
111
+ query: validated.query,
112
+ totalResults: results.totalResults,
113
+ responseTokens,
114
+ timeMs: results.timeMs,
115
+ ...summarizePayload(responseJson, 'mcp-response', validated.query),
116
+ }, 'Search complete - context sent to Claude Code');
117
+
92
118
  return {
93
119
  content: [
94
120
  {
95
121
  type: 'text',
96
- text: JSON.stringify({
97
- results: enhancedResults,
98
- totalResults: results.totalResults,
99
- estimatedTokens,
100
- mode: results.mode,
101
- timeMs: results.timeMs
102
- }, null, 2)
122
+ text: header + responseJson
103
123
  }
104
124
  ]
105
125
  };
@@ -118,6 +138,8 @@ export const handleGetFullContext: ToolHandler<GetFullContextArgs> = async (
118
138
  // Validate arguments with Zod
119
139
  const validated = GetFullContextArgsSchema.parse(args);
120
140
 
141
+ logger.info({ resultId: validated.resultId }, 'Get full context requested');
142
+
121
143
  // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
122
144
  const resultId = validated.resultId as DocumentId;
123
145
 
@@ -132,17 +154,26 @@ export const handleGetFullContext: ToolHandler<GetFullContextArgs> = async (
132
154
 
133
155
  // If result already has full context, return it
134
156
  if (cachedResult.full) {
157
+ const responseJson = JSON.stringify({
158
+ id: cachedResult.id,
159
+ score: cachedResult.score,
160
+ summary: cachedResult.summary,
161
+ context: cachedResult.context,
162
+ full: cachedResult.full
163
+ }, null, 2);
164
+
165
+ logger.info({
166
+ resultId,
167
+ cached: true,
168
+ hasFullContext: true,
169
+ ...summarizePayload(responseJson, 'mcp-full-context', resultId),
170
+ }, 'Full context retrieved from cache');
171
+
135
172
  return {
136
173
  content: [
137
174
  {
138
175
  type: 'text',
139
- text: JSON.stringify({
140
- id: cachedResult.id,
141
- score: cachedResult.score,
142
- summary: cachedResult.summary,
143
- context: cachedResult.context,
144
- full: cachedResult.full
145
- }, null, 2)
176
+ text: responseJson
146
177
  }
147
178
  ]
148
179
  };
@@ -192,17 +223,26 @@ export const handleGetFullContext: ToolHandler<GetFullContextArgs> = async (
192
223
  // Update cache with full result
193
224
  resultCache.set(resultId, fullResult);
194
225
 
226
+ const responseJson = JSON.stringify({
227
+ id: fullResult.id,
228
+ score: fullResult.score,
229
+ summary: fullResult.summary,
230
+ context: fullResult.context,
231
+ full: fullResult.full
232
+ }, null, 2);
233
+
234
+ logger.info({
235
+ resultId,
236
+ cached: false,
237
+ hasFullContext: true,
238
+ ...summarizePayload(responseJson, 'mcp-full-context', resultId),
239
+ }, 'Full context retrieved via re-query');
240
+
195
241
  return {
196
242
  content: [
197
243
  {
198
244
  type: 'text',
199
- text: JSON.stringify({
200
- id: fullResult.id,
201
- score: fullResult.score,
202
- summary: fullResult.summary,
203
- context: fullResult.context,
204
- full: fullResult.full
205
- }, null, 2)
245
+ text: responseJson
206
246
  }
207
247
  ]
208
248
  };
@@ -411,5 +411,6 @@ describe('store.handler', () => {
411
411
  const data = JSON.parse(result.content[0].text);
412
412
  expect(data.store.type).toBe('file');
413
413
  });
414
+
414
415
  });
415
416
  });
package/src/mcp/server.ts CHANGED
@@ -9,6 +9,9 @@ import { tools } from './handlers/index.js';
9
9
  import { handleExecute } from './handlers/execute.handler.js';
10
10
  import { ExecuteArgsSchema } from './schemas/index.js';
11
11
  import type { MCPServerOptions } from './types.js';
12
+ import { createLogger } from '../logging/index.js';
13
+
14
+ const logger = createLogger('mcp-server');
12
15
 
13
16
  // eslint-disable-next-line @typescript-eslint/no-deprecated
14
17
  export function createMCPServer(options: MCPServerOptions): Server {
@@ -106,6 +109,9 @@ export function createMCPServer(options: MCPServerOptions): Server {
106
109
  // Handle tool calls
107
110
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
108
111
  const { name, arguments: args } = request.params;
112
+ const startTime = Date.now();
113
+
114
+ logger.info({ tool: name, args: JSON.stringify(args) }, 'Tool invoked');
109
115
 
110
116
  // Create services once (needed by all handlers)
111
117
  const services = await createServices(
@@ -115,34 +121,56 @@ export function createMCPServer(options: MCPServerOptions): Server {
115
121
  );
116
122
  const context = { services, options };
117
123
 
118
- // Handle execute meta-tool
119
- if (name === 'execute') {
120
- const validated = ExecuteArgsSchema.parse(args ?? {});
121
- return handleExecute(validated, context);
122
- }
124
+ try {
125
+ let result;
123
126
 
124
- // Find handler in registry for native tools (search, get_full_context)
125
- const tool = tools.find(t => t.name === name);
126
- if (tool === undefined) {
127
- throw new Error(`Unknown tool: ${name}`);
128
- }
127
+ // Handle execute meta-tool
128
+ if (name === 'execute') {
129
+ const validated = ExecuteArgsSchema.parse(args ?? {});
130
+ result = await handleExecute(validated, context);
131
+ } else {
132
+ // Find handler in registry for native tools (search, get_full_context)
133
+ const tool = tools.find(t => t.name === name);
134
+ if (tool === undefined) {
135
+ throw new Error(`Unknown tool: ${name}`);
136
+ }
129
137
 
130
- // Validate arguments with Zod
131
- const validated = tool.schema.parse(args ?? {});
138
+ // Validate arguments with Zod
139
+ const validated = tool.schema.parse(args ?? {});
132
140
 
133
- // Execute handler with context
134
- return tool.handler(validated, context);
141
+ // Execute handler with context
142
+ result = await tool.handler(validated, context);
143
+ }
144
+
145
+ const durationMs = Date.now() - startTime;
146
+ logger.info({ tool: name, durationMs }, 'Tool completed');
147
+
148
+ return result;
149
+ } catch (error) {
150
+ const durationMs = Date.now() - startTime;
151
+ logger.error({
152
+ tool: name,
153
+ durationMs,
154
+ error: error instanceof Error ? error.message : String(error),
155
+ }, 'Tool execution failed');
156
+ throw error;
157
+ }
135
158
  });
136
159
 
137
160
  return server;
138
161
  }
139
162
 
140
163
  export async function runMCPServer(options: MCPServerOptions): Promise<void> {
164
+ logger.info({
165
+ dataDir: options.dataDir,
166
+ projectRoot: options.projectRoot,
167
+ }, 'MCP server starting');
168
+
141
169
  const server = createMCPServer(options);
142
170
  const transport = new StdioServerTransport();
143
171
  await server.connect(transport);
144
172
 
145
- console.error('Bluera Knowledge MCP server running on stdio');
173
+ logger.info('MCP server connected to stdio transport');
146
174
  }
147
175
 
148
176
  // Run the server only when this file is executed directly (not imported by CLI)
@@ -156,7 +184,7 @@ if (isMCPServerEntry) {
156
184
  config: process.env['CONFIG_PATH'],
157
185
  projectRoot: process.env['PROJECT_ROOT'] ?? process.env['PWD']
158
186
  }).catch((error: unknown) => {
159
- console.error('Failed to start MCP server:', error);
187
+ logger.error({ error: error instanceof Error ? error.message : String(error) }, 'Failed to start MCP server');
160
188
  process.exit(1);
161
189
  });
162
190
  }
@@ -17,6 +17,19 @@ export interface Chunk {
17
17
  docSummary?: string | undefined;
18
18
  }
19
19
 
20
+ /**
21
+ * Preset configurations for different content types.
22
+ * Code uses smaller chunks for precise symbol matching.
23
+ * Web/docs use larger chunks to preserve prose context.
24
+ */
25
+ const CHUNK_PRESETS = {
26
+ code: { chunkSize: 768, chunkOverlap: 100 },
27
+ web: { chunkSize: 1200, chunkOverlap: 200 },
28
+ docs: { chunkSize: 1200, chunkOverlap: 200 },
29
+ } as const;
30
+
31
+ export type ContentType = keyof typeof CHUNK_PRESETS;
32
+
20
33
  export class ChunkingService {
21
34
  private readonly chunkSize: number;
22
35
  private readonly chunkOverlap: number;
@@ -26,6 +39,16 @@ export class ChunkingService {
26
39
  this.chunkOverlap = config.chunkOverlap;
27
40
  }
28
41
 
42
+ /**
43
+ * Create a ChunkingService with preset configuration for a content type.
44
+ * - 'code': Smaller chunks (768/100) for precise code symbol matching
45
+ * - 'web': Larger chunks (1200/200) for web prose content
46
+ * - 'docs': Larger chunks (1200/200) for documentation
47
+ */
48
+ static forContentType(type: ContentType): ChunkingService {
49
+ return new ChunkingService(CHUNK_PRESETS[type]);
50
+ }
51
+
29
52
  /**
30
53
  * Chunk text content. Uses semantic chunking for Markdown and code files,
31
54
  * falling back to sliding window for other content.