npm - bluera-knowledge - Versions diffs - 0.9.26 → 0.9.31 - Mend

bluera-knowledge 0.9.26 → 0.9.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/.claude/commands/commit.md +4 -7
package/.claude/hooks/post-edit-check.sh +21 -24
package/.claude/skills/atomic-commits/SKILL.md +6 -0
package/.claude-plugin/plugin.json +1 -1
package/.env.example +4 -0
package/.husky/pre-push +12 -2
package/.versionrc.json +0 -4
package/BUGS-FOUND.md +71 -0
package/CHANGELOG.md +76 -0
package/README.md +55 -20
package/bun.lock +35 -1
package/commands/crawl.md +2 -0
package/dist/{chunk-BICFAWMN.js → chunk-2SJHNRXD.js} +73 -8
package/dist/chunk-2SJHNRXD.js.map +1 -0
package/dist/{chunk-J7J6LXOJ.js → chunk-OGEY66FZ.js} +106 -41
package/dist/chunk-OGEY66FZ.js.map +1 -0
package/dist/{chunk-5QMHZUC4.js → chunk-RWSXP3PQ.js} +482 -106
package/dist/chunk-RWSXP3PQ.js.map +1 -0
package/dist/index.js +73 -28
package/dist/index.js.map +1 -1
package/dist/mcp/server.js +2 -2
package/dist/workers/background-worker-cli.js +2 -2
package/eslint.config.js +1 -1
package/package.json +3 -1
package/src/analysis/ast-parser.test.ts +46 -0
package/src/cli/commands/crawl.test.ts +99 -12
package/src/cli/commands/crawl.ts +76 -24
package/src/cli/commands/store.test.ts +68 -1
package/src/cli/commands/store.ts +9 -3
package/src/crawl/article-converter.ts +36 -1
package/src/crawl/bridge.ts +18 -7
package/src/crawl/intelligent-crawler.ts +45 -4
package/src/db/embeddings.test.ts +16 -0
package/src/db/lance.test.ts +31 -0
package/src/db/lance.ts +8 -0
package/src/logging/index.ts +29 -0
package/src/logging/logger.test.ts +75 -0
package/src/logging/logger.ts +147 -0
package/src/logging/payload.test.ts +152 -0
package/src/logging/payload.ts +121 -0
package/src/mcp/handlers/search.handler.test.ts +28 -9
package/src/mcp/handlers/search.handler.ts +69 -29
package/src/mcp/handlers/store.handler.test.ts +1 -0
package/src/mcp/server.ts +44 -16
package/src/services/chunking.service.ts +23 -0
package/src/services/index.service.test.ts +921 -1
package/src/services/index.service.ts +76 -1
package/src/services/index.ts +20 -2
package/src/services/search.service.test.ts +573 -21
package/src/services/search.service.ts +257 -105
package/src/services/services.test.ts +2 -2
package/src/services/snippet.service.ts +28 -3
package/src/services/store.service.test.ts +28 -0
package/src/services/store.service.ts +4 -0
package/src/services/token.service.test.ts +45 -0
package/src/services/token.service.ts +33 -0
package/src/types/result.test.ts +10 -0
package/tests/integration/cli-consistency.test.ts +1 -4
package/vitest.config.ts +4 -0
package/dist/chunk-5QMHZUC4.js.map +0 -1
package/dist/chunk-BICFAWMN.js.map +0 -1
package/dist/chunk-J7J6LXOJ.js.map +0 -1
package/scripts/readme-version-updater.cjs +0 -18

package/src/logging/payload.ts ADDED Viewed

@@ -0,0 +1,121 @@
+/**
+ * Large payload handling utilities for logging
+ *
+ * Handles large content (raw HTML, MCP responses) by:
+ * - Truncating to preview in log entries
+ * - Optionally dumping full content to separate files at trace level
+ */
+import { writeFileSync, mkdirSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { createHash } from 'node:crypto';
+import { getLogDirectory, isLevelEnabled } from './logger.js';
+/** Maximum characters for log preview */
+const MAX_PREVIEW_LENGTH = 500;
+/** Minimum size to trigger payload dump (10KB) */
+const PAYLOAD_DUMP_THRESHOLD = 10_000;
+/** Summary of a large payload for logging */
+export interface PayloadSummary {
+  /** Truncated preview of content */
+  preview: string;
+  /** Size in bytes */
+  sizeBytes: number;
+  /** Short hash for identification */
+  hash: string;
+  /** Filename if full content was dumped (trace level only) */
+  payloadFile?: string;
+}
+/** Get the payload dump directory */
+function getPayloadDir(): string {
+  const dir = join(getLogDirectory(), 'payload');
+  if (!existsSync(dir)) {
+    mkdirSync(dir, { recursive: true });
+  }
+  return dir;
+}
+/** Generate a safe filename from an identifier */
+function safeFilename(identifier: string): string {
+  return identifier
+    .replace(/[^a-zA-Z0-9-]/g, '_')
+    .substring(0, 50);
+}
+/**
+ * Summarize a large payload for logging
+ *
+ * Creates a summary with:
+ * - Truncated preview (first 500 chars)
+ * - Size in bytes
+ * - Short MD5 hash for identification
+ * - Optional full dump to file at trace level
+ *
+ * @param content - The full content to summarize
+ * @param type - Type identifier (e.g., 'raw-html', 'mcp-response')
+ * @param identifier - Unique identifier (e.g., URL, query)
+ * @param dumpFull - Whether to dump full content to file (default: trace level check)
+ * @returns PayloadSummary for inclusion in log entry
+ *
+ * @example
+ * logger.info({
+ *   url,
+ *   ...summarizePayload(html, 'raw-html', url),
+ * }, 'Fetched HTML');
+ */
+export function summarizePayload(
+  content: string,
+  type: string,
+  identifier: string,
+  dumpFull: boolean = isLevelEnabled('trace')
+): PayloadSummary {
+  const sizeBytes = Buffer.byteLength(content, 'utf8');
+  const hash = createHash('md5').update(content).digest('hex').substring(0, 12);
+  const preview = truncateForLog(content, MAX_PREVIEW_LENGTH);
+  const baseSummary = { preview, sizeBytes, hash };
+  // Dump full payload to file if enabled and above threshold
+  if (dumpFull && sizeBytes > PAYLOAD_DUMP_THRESHOLD) {
+    const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+    const safeId = safeFilename(identifier);
+    const filename = `${timestamp}-${type}-${safeId}-${hash}.json`;
+    const filepath = join(getPayloadDir(), filename);
+    writeFileSync(
+      filepath,
+      JSON.stringify(
+        {
+          timestamp: new Date().toISOString(),
+          type,
+          identifier,
+          sizeBytes,
+          content,
+        },
+        null,
+        2
+      )
+    );
+    return { ...baseSummary, payloadFile: filename };
+  }
+  return baseSummary;
+}
+/**
+ * Truncate content for logging with ellipsis indicator
+ *
+ * @param content - Content to truncate
+ * @param maxLength - Maximum length (default: 500)
+ * @returns Truncated string with '... [truncated]' if needed
+ */
+export function truncateForLog(content: string, maxLength: number = MAX_PREVIEW_LENGTH): string {
+  if (content.length <= maxLength) {
+    return content;
+  }
+  return content.substring(0, maxLength) + '... [truncated]';
+}

package/src/mcp/handlers/search.handler.test.ts CHANGED Viewed

@@ -3,6 +3,20 @@ import { handleSearch, handleGetFullContext, resultCache } from './search.handle
 import type { HandlerContext } from '../types.js';
 import type { ServiceContainer } from '../../services/index.js';
+/**
+ * Extract JSON from search response that includes a header line.
+ * Format: "Search: ... | Results: ... | ~X tokens | Xms\n\n{json}"
+ */
+function parseSearchResponse(text: string): { header: string; json: Record<string, unknown> } {
+  const parts = text.split('\n\n');
+  const header = parts[0] ?? '';
+  const jsonStr = parts.slice(1).join('\n\n');
+  return {
+    header,
+    json: JSON.parse(jsonStr || '{}')
+  };
+}
 describe('Search Handlers', () => {
   let mockContext: HandlerContext;
   let mockServices: ServiceContainer;
@@ -70,7 +84,10 @@ describe('Search Handlers', () => {
         })
       );
-      const response = JSON.parse(result.content[0]?.text ?? '{}');
+      const { header, json: response } = parseSearchResponse(result.content[0]?.text ?? '');
+      expect(header).toContain('Search: "test query"');
+      expect(header).toContain('Results: 1');
+      expect(header).toContain('tokens');
       expect(response.results).toHaveLength(1);
       expect(response.totalResults).toBe(1);
     });
@@ -129,14 +146,15 @@ describe('Search Handlers', () => {
       expect(cached?.id).toBe('doc1');
     });
-    it('should calculate estimated tokens', async () => {
+    it('should show token count in header', async () => {
       const result = await handleSearch(
         { query: 'test', detail: 'minimal', limit: 10 },
         mockContext
       );
-      const response = JSON.parse(result.content[0]?.text ?? '{}');
-      expect(response.estimatedTokens).toBeGreaterThan(0);
+      const { header } = parseSearchResponse(result.content[0]?.text ?? '');
+      // Header should contain token count (either "~X tokens" or "~X.Xk tokens")
+      expect(header).toMatch(/~\d+\.?\d*k? tokens/);
     });
     it('should add repoRoot for repo stores', async () => {
@@ -156,7 +174,7 @@ describe('Search Handlers', () => {
         mockContext
       );
-      const response = JSON.parse(result.content[0]?.text ?? '{}');
+      const { json: response } = parseSearchResponse(result.content[0]?.text ?? '');
       expect(response.results[0]?.summary.repoRoot).toBe('/repos/test');
     });
@@ -166,7 +184,7 @@ describe('Search Handlers', () => {
         mockContext
       );
-      const response = JSON.parse(result.content[0]?.text ?? '{}');
+      const { json: response } = parseSearchResponse(result.content[0]?.text ?? '');
       expect(response.results[0]?.summary.repoRoot).toBeUndefined();
     });
@@ -176,7 +194,7 @@ describe('Search Handlers', () => {
         mockContext
       );
-      const response = JSON.parse(result.content[0]?.text ?? '{}');
+      const { json: response } = parseSearchResponse(result.content[0]?.text ?? '');
       expect(response.results[0]?.summary.storeName).toBe('Test Store');
     });
@@ -186,11 +204,12 @@ describe('Search Handlers', () => {
         mockContext
       );
-      const response = JSON.parse(result.content[0]?.text ?? '{}');
+      const { header, json: response } = parseSearchResponse(result.content[0]?.text ?? '');
       expect(response).toHaveProperty('totalResults', 1);
-      expect(response).toHaveProperty('estimatedTokens');
       expect(response).toHaveProperty('mode', 'hybrid');
       expect(response).toHaveProperty('timeMs', 50);
+      // Token count is now in header, not in JSON
+      expect(header).toContain('tokens');
     });
   });

package/src/mcp/handlers/search.handler.ts CHANGED Viewed

@@ -4,6 +4,10 @@ import { SearchArgsSchema, GetFullContextArgsSchema } from '../schemas/index.js'
 import type { SearchQuery, DocumentId, StoreId } from '../../types/index.js';
 import { LRUCache } from '../cache.js';
 import type { SearchResult } from '../../types/search.js';
+import { createLogger, summarizePayload } from '../../logging/index.js';
+import { estimateTokens, formatTokenCount } from '../../services/token.service.js';
+const logger = createLogger('mcp-search');
 // Create result cache for get_full_context
 // Uses LRU cache to prevent memory leaks (max 1000 items)
@@ -22,6 +26,14 @@ export const handleSearch: ToolHandler<SearchArgs> = async (
   // Validate arguments with Zod
   const validated = SearchArgsSchema.parse(args);
+  logger.info({
+    query: validated.query,
+    stores: validated.stores,
+    detail: validated.detail,
+    limit: validated.limit,
+    intent: validated.intent,
+  }, 'Search started');
   const { services } = context;
   // Get all stores if none specified, resolve store names to IDs
@@ -63,14 +75,6 @@ export const handleSearch: ToolHandler<SearchArgs> = async (
     resultCache.set(result.id, result);
   }
-  // Calculate estimated tokens
-  const estimatedTokens = results.results.reduce((sum, r) => {
-    let tokens = 100; // Base for summary
-    if (r.context) tokens += 200;
-    if (r.full) tokens += 800;
-    return sum + tokens;
-  }, 0);
   // Add repoRoot to results for cloned repos
   const enhancedResults = await Promise.all(results.results.map(async (r) => {
     const storeId = r.metadata.storeId;
@@ -89,17 +93,33 @@ export const handleSearch: ToolHandler<SearchArgs> = async (
     };
   }));
+  const responseJson = JSON.stringify({
+    results: enhancedResults,
+    totalResults: results.totalResults,
+    mode: results.mode,
+    timeMs: results.timeMs
+  }, null, 2);
+  // Calculate actual token estimate based on response content
+  const responseTokens = estimateTokens(responseJson);
+  // Create visible header with token usage
+  const header = `Search: "${validated.query}" | Results: ${String(results.totalResults)} | ${formatTokenCount(responseTokens)} tokens | ${String(results.timeMs)}ms\n\n`;
+  // Log the complete MCP response that will be sent to Claude Code
+  logger.info({
+    query: validated.query,
+    totalResults: results.totalResults,
+    responseTokens,
+    timeMs: results.timeMs,
+    ...summarizePayload(responseJson, 'mcp-response', validated.query),
+  }, 'Search complete - context sent to Claude Code');
   return {
     content: [
       {
         type: 'text',
-        text: JSON.stringify({
-          results: enhancedResults,
-          totalResults: results.totalResults,
-          estimatedTokens,
-          mode: results.mode,
-          timeMs: results.timeMs
-        }, null, 2)
+        text: header + responseJson
       }
     ]
   };
@@ -118,6 +138,8 @@ export const handleGetFullContext: ToolHandler<GetFullContextArgs> = async (
   // Validate arguments with Zod
   const validated = GetFullContextArgsSchema.parse(args);
+  logger.info({ resultId: validated.resultId }, 'Get full context requested');
   // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
   const resultId = validated.resultId as DocumentId;
@@ -132,17 +154,26 @@ export const handleGetFullContext: ToolHandler<GetFullContextArgs> = async (
   // If result already has full context, return it
   if (cachedResult.full) {
+    const responseJson = JSON.stringify({
+      id: cachedResult.id,
+      score: cachedResult.score,
+      summary: cachedResult.summary,
+      context: cachedResult.context,
+      full: cachedResult.full
+    }, null, 2);
+    logger.info({
+      resultId,
+      cached: true,
+      hasFullContext: true,
+      ...summarizePayload(responseJson, 'mcp-full-context', resultId),
+    }, 'Full context retrieved from cache');
     return {
       content: [
         {
           type: 'text',
-          text: JSON.stringify({
-            id: cachedResult.id,
-            score: cachedResult.score,
-            summary: cachedResult.summary,
-            context: cachedResult.context,
-            full: cachedResult.full
-          }, null, 2)
+          text: responseJson
         }
       ]
     };
@@ -192,17 +223,26 @@ export const handleGetFullContext: ToolHandler<GetFullContextArgs> = async (
   // Update cache with full result
   resultCache.set(resultId, fullResult);
+  const responseJson = JSON.stringify({
+    id: fullResult.id,
+    score: fullResult.score,
+    summary: fullResult.summary,
+    context: fullResult.context,
+    full: fullResult.full
+  }, null, 2);
+  logger.info({
+    resultId,
+    cached: false,
+    hasFullContext: true,
+    ...summarizePayload(responseJson, 'mcp-full-context', resultId),
+  }, 'Full context retrieved via re-query');
   return {
     content: [
       {
         type: 'text',
-        text: JSON.stringify({
-          id: fullResult.id,
-          score: fullResult.score,
-          summary: fullResult.summary,
-          context: fullResult.context,
-          full: fullResult.full
-        }, null, 2)
+        text: responseJson
       }
     ]
   };

package/src/mcp/handlers/store.handler.test.ts CHANGED Viewed

@@ -411,5 +411,6 @@ describe('store.handler', () => {
       const data = JSON.parse(result.content[0].text);
       expect(data.store.type).toBe('file');
     });
   });
 });

package/src/mcp/server.ts CHANGED Viewed

@@ -9,6 +9,9 @@ import { tools } from './handlers/index.js';
 import { handleExecute } from './handlers/execute.handler.js';
 import { ExecuteArgsSchema } from './schemas/index.js';
 import type { MCPServerOptions } from './types.js';
+import { createLogger } from '../logging/index.js';
+const logger = createLogger('mcp-server');
 // eslint-disable-next-line @typescript-eslint/no-deprecated
 export function createMCPServer(options: MCPServerOptions): Server {
@@ -106,6 +109,9 @@ export function createMCPServer(options: MCPServerOptions): Server {
   // Handle tool calls
   server.setRequestHandler(CallToolRequestSchema, async (request) => {
     const { name, arguments: args } = request.params;
+    const startTime = Date.now();
+    logger.info({ tool: name, args: JSON.stringify(args) }, 'Tool invoked');
     // Create services once (needed by all handlers)
     const services = await createServices(
@@ -115,34 +121,56 @@ export function createMCPServer(options: MCPServerOptions): Server {
     );
     const context = { services, options };
-    // Handle execute meta-tool
-    if (name === 'execute') {
-      const validated = ExecuteArgsSchema.parse(args ?? {});
-      return handleExecute(validated, context);
-    }
+    try {
+      let result;
-    // Find handler in registry for native tools (search, get_full_context)
-    const tool = tools.find(t => t.name === name);
-    if (tool === undefined) {
-      throw new Error(`Unknown tool: ${name}`);
-    }
+      // Handle execute meta-tool
+      if (name === 'execute') {
+        const validated = ExecuteArgsSchema.parse(args ?? {});
+        result = await handleExecute(validated, context);
+      } else {
+        // Find handler in registry for native tools (search, get_full_context)
+        const tool = tools.find(t => t.name === name);
+        if (tool === undefined) {
+          throw new Error(`Unknown tool: ${name}`);
+        }
-    // Validate arguments with Zod
-    const validated = tool.schema.parse(args ?? {});
+        // Validate arguments with Zod
+        const validated = tool.schema.parse(args ?? {});
-    // Execute handler with context
-    return tool.handler(validated, context);
+        // Execute handler with context
+        result = await tool.handler(validated, context);
+      }
+      const durationMs = Date.now() - startTime;
+      logger.info({ tool: name, durationMs }, 'Tool completed');
+      return result;
+    } catch (error) {
+      const durationMs = Date.now() - startTime;
+      logger.error({
+        tool: name,
+        durationMs,
+        error: error instanceof Error ? error.message : String(error),
+      }, 'Tool execution failed');
+      throw error;
+    }
   });
   return server;
 }
 export async function runMCPServer(options: MCPServerOptions): Promise<void> {
+  logger.info({
+    dataDir: options.dataDir,
+    projectRoot: options.projectRoot,
+  }, 'MCP server starting');
   const server = createMCPServer(options);
   const transport = new StdioServerTransport();
   await server.connect(transport);
-  console.error('Bluera Knowledge MCP server running on stdio');
+  logger.info('MCP server connected to stdio transport');
 }
 // Run the server only when this file is executed directly (not imported by CLI)
@@ -156,7 +184,7 @@ if (isMCPServerEntry) {
     config: process.env['CONFIG_PATH'],
     projectRoot: process.env['PROJECT_ROOT'] ?? process.env['PWD']
   }).catch((error: unknown) => {
-    console.error('Failed to start MCP server:', error);
+    logger.error({ error: error instanceof Error ? error.message : String(error) }, 'Failed to start MCP server');
     process.exit(1);
   });
 }

package/src/services/chunking.service.ts CHANGED Viewed

@@ -17,6 +17,19 @@ export interface Chunk {
   docSummary?: string | undefined;
 }
+/**
+ * Preset configurations for different content types.
+ * Code uses smaller chunks for precise symbol matching.
+ * Web/docs use larger chunks to preserve prose context.
+ */
+const CHUNK_PRESETS = {
+  code: { chunkSize: 768, chunkOverlap: 100 },
+  web: { chunkSize: 1200, chunkOverlap: 200 },
+  docs: { chunkSize: 1200, chunkOverlap: 200 },
+} as const;
+export type ContentType = keyof typeof CHUNK_PRESETS;
 export class ChunkingService {
   private readonly chunkSize: number;
   private readonly chunkOverlap: number;
@@ -26,6 +39,16 @@ export class ChunkingService {
     this.chunkOverlap = config.chunkOverlap;
   }
+  /**
+   * Create a ChunkingService with preset configuration for a content type.
+   * - 'code': Smaller chunks (768/100) for precise code symbol matching
+   * - 'web': Larger chunks (1200/200) for web prose content
+   * - 'docs': Larger chunks (1200/200) for documentation
+   */
+  static forContentType(type: ContentType): ChunkingService {
+    return new ChunkingService(CHUNK_PRESETS[type]);
+  }
   /**
    * Chunk text content. Uses semantic chunking for Markdown and code files,
    * falling back to sliding window for other content.