bluera-knowledge 0.9.26 → 0.9.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.claude/commands/commit.md +4 -7
  2. package/.claude/hooks/post-edit-check.sh +21 -24
  3. package/.claude/skills/atomic-commits/SKILL.md +6 -0
  4. package/.claude-plugin/plugin.json +1 -1
  5. package/.env.example +4 -0
  6. package/.husky/pre-push +12 -2
  7. package/.versionrc.json +0 -4
  8. package/BUGS-FOUND.md +71 -0
  9. package/CHANGELOG.md +76 -0
  10. package/README.md +55 -20
  11. package/bun.lock +35 -1
  12. package/commands/crawl.md +2 -0
  13. package/dist/{chunk-BICFAWMN.js → chunk-2SJHNRXD.js} +73 -8
  14. package/dist/chunk-2SJHNRXD.js.map +1 -0
  15. package/dist/{chunk-J7J6LXOJ.js → chunk-OGEY66FZ.js} +106 -41
  16. package/dist/chunk-OGEY66FZ.js.map +1 -0
  17. package/dist/{chunk-5QMHZUC4.js → chunk-RWSXP3PQ.js} +482 -106
  18. package/dist/chunk-RWSXP3PQ.js.map +1 -0
  19. package/dist/index.js +73 -28
  20. package/dist/index.js.map +1 -1
  21. package/dist/mcp/server.js +2 -2
  22. package/dist/workers/background-worker-cli.js +2 -2
  23. package/eslint.config.js +1 -1
  24. package/package.json +3 -1
  25. package/src/analysis/ast-parser.test.ts +46 -0
  26. package/src/cli/commands/crawl.test.ts +99 -12
  27. package/src/cli/commands/crawl.ts +76 -24
  28. package/src/cli/commands/store.test.ts +68 -1
  29. package/src/cli/commands/store.ts +9 -3
  30. package/src/crawl/article-converter.ts +36 -1
  31. package/src/crawl/bridge.ts +18 -7
  32. package/src/crawl/intelligent-crawler.ts +45 -4
  33. package/src/db/embeddings.test.ts +16 -0
  34. package/src/db/lance.test.ts +31 -0
  35. package/src/db/lance.ts +8 -0
  36. package/src/logging/index.ts +29 -0
  37. package/src/logging/logger.test.ts +75 -0
  38. package/src/logging/logger.ts +147 -0
  39. package/src/logging/payload.test.ts +152 -0
  40. package/src/logging/payload.ts +121 -0
  41. package/src/mcp/handlers/search.handler.test.ts +28 -9
  42. package/src/mcp/handlers/search.handler.ts +69 -29
  43. package/src/mcp/handlers/store.handler.test.ts +1 -0
  44. package/src/mcp/server.ts +44 -16
  45. package/src/services/chunking.service.ts +23 -0
  46. package/src/services/index.service.test.ts +921 -1
  47. package/src/services/index.service.ts +76 -1
  48. package/src/services/index.ts +20 -2
  49. package/src/services/search.service.test.ts +573 -21
  50. package/src/services/search.service.ts +257 -105
  51. package/src/services/services.test.ts +2 -2
  52. package/src/services/snippet.service.ts +28 -3
  53. package/src/services/store.service.test.ts +28 -0
  54. package/src/services/store.service.ts +4 -0
  55. package/src/services/token.service.test.ts +45 -0
  56. package/src/services/token.service.ts +33 -0
  57. package/src/types/result.test.ts +10 -0
  58. package/tests/integration/cli-consistency.test.ts +1 -4
  59. package/vitest.config.ts +4 -0
  60. package/dist/chunk-5QMHZUC4.js.map +0 -1
  61. package/dist/chunk-BICFAWMN.js.map +0 -1
  62. package/dist/chunk-J7J6LXOJ.js.map +0 -1
  63. package/scripts/readme-version-updater.cjs +0 -18
@@ -1,3 +1,142 @@
1
+ // src/logging/logger.ts
2
+ import pino from "pino";
3
+ import { homedir } from "os";
4
+ import { mkdirSync, existsSync } from "fs";
5
+ import { join } from "path";
6
+ var VALID_LEVELS = ["trace", "debug", "info", "warn", "error", "fatal"];
7
+ var VALID_LEVELS_SET = new Set(VALID_LEVELS);
8
+ function getLogDir() {
9
+ return join(homedir(), ".bluera", "bluera-knowledge", "logs");
10
+ }
11
+ function ensureLogDir() {
12
+ const logDir = getLogDir();
13
+ if (!existsSync(logDir)) {
14
+ mkdirSync(logDir, { recursive: true });
15
+ }
16
+ return logDir;
17
+ }
18
+ function isValidLogLevel(level) {
19
+ return VALID_LEVELS_SET.has(level);
20
+ }
21
+ function getLogLevel() {
22
+ const level = process.env["LOG_LEVEL"]?.toLowerCase();
23
+ if (level === void 0 || level === "") {
24
+ return "info";
25
+ }
26
+ if (!isValidLogLevel(level)) {
27
+ throw new Error(
28
+ `Invalid LOG_LEVEL: "${level}". Valid values: ${VALID_LEVELS.join(", ")}`
29
+ );
30
+ }
31
+ return level;
32
+ }
33
+ var rootLogger = null;
34
+ function initializeLogger() {
35
+ if (rootLogger !== null) {
36
+ return rootLogger;
37
+ }
38
+ const logDir = ensureLogDir();
39
+ const logFile = join(logDir, "app.log");
40
+ const level = getLogLevel();
41
+ const options = {
42
+ level,
43
+ timestamp: pino.stdTimeFunctions.isoTime,
44
+ formatters: {
45
+ level: (label) => ({ level: label })
46
+ },
47
+ transport: {
48
+ target: "pino-roll",
49
+ options: {
50
+ file: logFile,
51
+ size: "10m",
52
+ // 10MB rotation
53
+ limit: { count: 5 },
54
+ // Keep 5 rotated files
55
+ mkdir: true
56
+ }
57
+ }
58
+ };
59
+ rootLogger = pino(options);
60
+ return rootLogger;
61
+ }
62
+ function createLogger(module) {
63
+ const root = initializeLogger();
64
+ return root.child({ module });
65
+ }
66
+ function isLevelEnabled(level) {
67
+ const currentLevel = getLogLevel();
68
+ const currentIndex = VALID_LEVELS.indexOf(currentLevel);
69
+ const checkIndex = VALID_LEVELS.indexOf(level);
70
+ return checkIndex >= currentIndex;
71
+ }
72
+ function getLogDirectory() {
73
+ return getLogDir();
74
+ }
75
+ function shutdownLogger() {
76
+ return new Promise((resolve3) => {
77
+ if (rootLogger !== null) {
78
+ rootLogger.flush();
79
+ setTimeout(() => {
80
+ rootLogger = null;
81
+ resolve3();
82
+ }, 100);
83
+ } else {
84
+ resolve3();
85
+ }
86
+ });
87
+ }
88
+
89
+ // src/logging/payload.ts
90
+ import { writeFileSync, mkdirSync as mkdirSync2, existsSync as existsSync2 } from "fs";
91
+ import { join as join2 } from "path";
92
+ import { createHash } from "crypto";
93
+ var MAX_PREVIEW_LENGTH = 500;
94
+ var PAYLOAD_DUMP_THRESHOLD = 1e4;
95
+ function getPayloadDir() {
96
+ const dir = join2(getLogDirectory(), "payload");
97
+ if (!existsSync2(dir)) {
98
+ mkdirSync2(dir, { recursive: true });
99
+ }
100
+ return dir;
101
+ }
102
+ function safeFilename(identifier) {
103
+ return identifier.replace(/[^a-zA-Z0-9-]/g, "_").substring(0, 50);
104
+ }
105
+ function summarizePayload(content, type, identifier, dumpFull = isLevelEnabled("trace")) {
106
+ const sizeBytes = Buffer.byteLength(content, "utf8");
107
+ const hash = createHash("md5").update(content).digest("hex").substring(0, 12);
108
+ const preview = truncateForLog(content, MAX_PREVIEW_LENGTH);
109
+ const baseSummary = { preview, sizeBytes, hash };
110
+ if (dumpFull && sizeBytes > PAYLOAD_DUMP_THRESHOLD) {
111
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
112
+ const safeId = safeFilename(identifier);
113
+ const filename = `${timestamp}-${type}-${safeId}-${hash}.json`;
114
+ const filepath = join2(getPayloadDir(), filename);
115
+ writeFileSync(
116
+ filepath,
117
+ JSON.stringify(
118
+ {
119
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
120
+ type,
121
+ identifier,
122
+ sizeBytes,
123
+ content
124
+ },
125
+ null,
126
+ 2
127
+ )
128
+ );
129
+ return { ...baseSummary, payloadFile: filename };
130
+ }
131
+ return baseSummary;
132
+ }
133
+ function truncateForLog(content, maxLength = MAX_PREVIEW_LENGTH) {
134
+ if (content.length <= maxLength) {
135
+ return content;
136
+ }
137
+ return content.substring(0, maxLength) + "... [truncated]";
138
+ }
139
+
1
140
  // src/services/job.service.ts
2
141
  import fs from "fs";
3
142
  import path from "path";
@@ -200,7 +339,7 @@ var JobService = class {
200
339
  // src/services/config.service.ts
201
340
  import { readFile, writeFile, mkdir } from "fs/promises";
202
341
  import { dirname as dirname2, resolve } from "path";
203
- import { homedir } from "os";
342
+ import { homedir as homedir2 } from "os";
204
343
 
205
344
  // src/types/config.ts
206
345
  var DEFAULT_CONFIG = {
@@ -239,8 +378,8 @@ var DEFAULT_CONFIG = {
239
378
  };
240
379
 
241
380
  // src/services/project-root.service.ts
242
- import { existsSync, statSync, realpathSync } from "fs";
243
- import { dirname, join, normalize, sep } from "path";
381
+ import { existsSync as existsSync3, statSync, realpathSync } from "fs";
382
+ import { dirname, join as join3, normalize, sep } from "path";
244
383
  var ProjectRootService = class {
245
384
  /**
246
385
  * Resolve project root directory using hierarchical detection.
@@ -270,8 +409,8 @@ var ProjectRootService = class {
270
409
  let currentPath = normalize(startPath);
271
410
  const root = normalize(sep);
272
411
  while (currentPath !== root) {
273
- const gitPath = join(currentPath, ".git");
274
- if (existsSync(gitPath)) {
412
+ const gitPath = join3(currentPath, ".git");
413
+ if (existsSync3(gitPath)) {
275
414
  try {
276
415
  const stats = statSync(gitPath);
277
416
  if (stats.isDirectory() || stats.isFile()) {
@@ -317,7 +456,7 @@ var ConfigService = class {
317
456
  configPath;
318
457
  dataDir;
319
458
  config = null;
320
- constructor(configPath = `${homedir()}/.bluera/bluera-knowledge/config.json`, dataDir, projectRoot) {
459
+ constructor(configPath = `${homedir2()}/.bluera/bluera-knowledge/config.json`, dataDir, projectRoot) {
321
460
  this.configPath = configPath;
322
461
  if (dataDir !== void 0 && dataDir !== "") {
323
462
  this.dataDir = dataDir;
@@ -348,7 +487,7 @@ var ConfigService = class {
348
487
  }
349
488
  expandPath(path3, baseDir) {
350
489
  if (path3.startsWith("~")) {
351
- return path3.replace("~", homedir());
490
+ return path3.replace("~", homedir2());
352
491
  }
353
492
  if (!path3.startsWith("/")) {
354
493
  return resolve(baseDir, path3);
@@ -359,7 +498,7 @@ var ConfigService = class {
359
498
 
360
499
  // src/services/store.service.ts
361
500
  import { readFile as readFile2, writeFile as writeFile2, mkdir as mkdir3, stat } from "fs/promises";
362
- import { join as join2, resolve as resolve2 } from "path";
501
+ import { join as join4, resolve as resolve2 } from "path";
363
502
  import { randomUUID as randomUUID2 } from "crypto";
364
503
 
365
504
  // src/types/brands.ts
@@ -430,6 +569,9 @@ var StoreService = class {
430
569
  await this.loadRegistry();
431
570
  }
432
571
  async create(input) {
572
+ if (!input.name || input.name.trim() === "") {
573
+ return err(new Error("Store name cannot be empty"));
574
+ }
433
575
  const existing = await this.getByName(input.name);
434
576
  if (existing !== void 0) {
435
577
  return err(new Error(`Store with name "${input.name}" already exists`));
@@ -467,7 +609,7 @@ var StoreService = class {
467
609
  case "repo": {
468
610
  let repoPath = input.path;
469
611
  if (input.url !== void 0) {
470
- const cloneDir = join2(this.dataDir, "repos", id);
612
+ const cloneDir = join4(this.dataDir, "repos", id);
471
613
  const result = await cloneRepository({
472
614
  url: input.url,
473
615
  targetDir: cloneDir,
@@ -563,7 +705,7 @@ var StoreService = class {
563
705
  return ok(void 0);
564
706
  }
565
707
  async loadRegistry() {
566
- const registryPath = join2(this.dataDir, "stores.json");
708
+ const registryPath = join4(this.dataDir, "stores.json");
567
709
  try {
568
710
  const content = await readFile2(registryPath, "utf-8");
569
711
  const data = JSON.parse(content);
@@ -580,7 +722,7 @@ var StoreService = class {
580
722
  }
581
723
  }
582
724
  async saveRegistry() {
583
- const registryPath = join2(this.dataDir, "stores.json");
725
+ const registryPath = join4(this.dataDir, "stores.json");
584
726
  await writeFile2(registryPath, JSON.stringify(this.registry, null, 2));
585
727
  }
586
728
  };
@@ -659,6 +801,7 @@ var CodeUnitService = class {
659
801
  };
660
802
 
661
803
  // src/services/search.service.ts
804
+ var logger = createLogger("search-service");
662
805
  var INTENT_FILE_BOOSTS = {
663
806
  "how-to": {
664
807
  "documentation-primary": 1.3,
@@ -729,79 +872,92 @@ var FRAMEWORK_PATTERNS = [
729
872
  { pattern: /\btypescript\b/i, terms: ["typescript", "ts"] },
730
873
  { pattern: /\bjwt\b/i, terms: ["jwt", "jsonwebtoken", "json-web-token"] }
731
874
  ];
732
- function classifyQueryIntent(query) {
875
+ var HOW_TO_PATTERNS = [
876
+ /how (do|can|should|would) (i|you|we)/i,
877
+ /how to\b/i,
878
+ /what('s| is) the (best |right |correct )?(way|approach) to/i,
879
+ /i (need|want|have) to/i,
880
+ /show me how/i,
881
+ /\bwhat's the syntax\b/i,
882
+ /\bhow do i (use|create|make|set up|configure|implement|add|get)\b/i,
883
+ /\bi'm (trying|building|creating|making)\b/i
884
+ ];
885
+ var IMPLEMENTATION_PATTERNS = [
886
+ /how (does|is) .* (implemented|work internally)/i,
887
+ /\binternal(ly)?\b/i,
888
+ /\bsource code\b/i,
889
+ /\bunder the hood\b/i,
890
+ /\bimplementation (of|details?)\b/i
891
+ ];
892
+ var COMPARISON_PATTERNS = [
893
+ /\b(vs\.?|versus)\b/i,
894
+ /\bdifference(s)? between\b/i,
895
+ /\bcompare\b/i,
896
+ /\bshould (i|we) use .* or\b/i,
897
+ /\bwhat's the difference\b/i,
898
+ /\bwhich (one|is better)\b/i,
899
+ /\bwhen (should|to) use\b/i
900
+ ];
901
+ var DEBUGGING_PATTERNS = [
902
+ /\b(error|bug|issue|problem|crash|fail|broken|wrong)\b/i,
903
+ /\bdoesn't (work|compile|run)\b/i,
904
+ /\bisn't (working|updating|rendering)\b/i,
905
+ /\bwhy (is|does|doesn't|isn't)\b/i,
906
+ /\bwhat('s| is) (wrong|happening|going on)\b/i,
907
+ /\bwhat am i doing wrong\b/i,
908
+ /\bnot (working|updating|showing)\b/i,
909
+ /\bhow do i (fix|debug|solve|resolve)\b/i
910
+ ];
911
+ var CONCEPTUAL_PATTERNS = [
912
+ /\bwhat (is|are)\b/i,
913
+ /\bexplain\b/i,
914
+ /\bwhat does .* (mean|do)\b/i,
915
+ /\bhow does .* work\b/i,
916
+ /\bwhat('s| is) the (purpose|point|idea)\b/i
917
+ ];
918
+ function classifyQueryIntents(query) {
733
919
  const q = query.toLowerCase();
734
- const howToPatterns = [
735
- /how (do|can|should|would) (i|you|we)/i,
736
- /how to\b/i,
737
- /what('s| is) the (best |right |correct )?(way|approach) to/i,
738
- /i (need|want|have) to/i,
739
- /show me how/i,
740
- /\bwhat's the syntax\b/i,
741
- /\bhow do i (use|create|make|set up|configure|implement|add|get)\b/i,
742
- /\bi'm (trying|building|creating|making)\b/i
743
- ];
744
- const implementationPatterns = [
745
- /how (does|is) .* (implemented|work internally)/i,
746
- /\binternal(ly)?\b/i,
747
- /\bsource code\b/i,
748
- /\bunder the hood\b/i,
749
- /\bimplementation (of|details?)\b/i
750
- ];
751
- const comparisonPatterns = [
752
- /\b(vs\.?|versus)\b/i,
753
- /\bdifference(s)? between\b/i,
754
- /\bcompare\b/i,
755
- /\bshould (i|we) use .* or\b/i,
756
- /\bwhat's the difference\b/i,
757
- /\bwhich (one|is better)\b/i,
758
- /\bwhen (should|to) use\b/i
759
- ];
760
- const debuggingPatterns = [
761
- /\b(error|bug|issue|problem|crash|fail|broken|wrong)\b/i,
762
- /\bdoesn't (work|compile|run)\b/i,
763
- /\bisn't (working|updating|rendering)\b/i,
764
- /\bwhy (is|does|doesn't|isn't)\b/i,
765
- /\bwhat('s| is) (wrong|happening|going on)\b/i,
766
- /\bwhat am i doing wrong\b/i,
767
- /\bnot (working|updating|showing)\b/i,
768
- /\bhow do i (fix|debug|solve|resolve)\b/i
769
- ];
770
- const conceptualPatterns = [
771
- /\bwhat (is|are)\b/i,
772
- /\bexplain\b/i,
773
- /\bwhat does .* (mean|do)\b/i,
774
- /\bhow does .* work\b/i,
775
- /\bwhat('s| is) the (purpose|point|idea)\b/i
776
- ];
777
- if (implementationPatterns.some((p) => p.test(q))) {
778
- return "implementation";
779
- }
780
- if (debuggingPatterns.some((p) => p.test(q))) {
781
- return "debugging";
782
- }
783
- if (comparisonPatterns.some((p) => p.test(q))) {
784
- return "comparison";
785
- }
786
- if (howToPatterns.some((p) => p.test(q))) {
787
- return "how-to";
788
- }
789
- if (conceptualPatterns.some((p) => p.test(q))) {
790
- return "conceptual";
791
- }
792
- return "how-to";
920
+ const intents = [];
921
+ if (IMPLEMENTATION_PATTERNS.some((p) => p.test(q))) {
922
+ intents.push({ intent: "implementation", confidence: 0.9 });
923
+ }
924
+ if (DEBUGGING_PATTERNS.some((p) => p.test(q))) {
925
+ intents.push({ intent: "debugging", confidence: 0.85 });
926
+ }
927
+ if (COMPARISON_PATTERNS.some((p) => p.test(q))) {
928
+ intents.push({ intent: "comparison", confidence: 0.8 });
929
+ }
930
+ if (HOW_TO_PATTERNS.some((p) => p.test(q))) {
931
+ intents.push({ intent: "how-to", confidence: 0.75 });
932
+ }
933
+ if (CONCEPTUAL_PATTERNS.some((p) => p.test(q))) {
934
+ intents.push({ intent: "conceptual", confidence: 0.7 });
935
+ }
936
+ if (intents.length === 0) {
937
+ intents.push({ intent: "how-to", confidence: 0.5 });
938
+ }
939
+ return intents.sort((a, b) => b.confidence - a.confidence);
940
+ }
941
+ function getPrimaryIntent(intents) {
942
+ return intents[0]?.intent ?? "how-to";
943
+ }
944
+ var RRF_PRESETS = {
945
+ code: { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 },
946
+ web: { k: 30, vectorWeight: 0.55, ftsWeight: 0.45 }
947
+ };
948
+ function detectContentType(results) {
949
+ const webCount = results.filter((r) => "url" in r.metadata).length;
950
+ return webCount > results.length / 2 ? "web" : "code";
793
951
  }
794
952
  var SearchService = class {
795
953
  lanceStore;
796
954
  embeddingEngine;
797
- rrfConfig;
798
955
  codeUnitService;
799
956
  codeGraphService;
800
957
  graphCache;
801
- constructor(lanceStore, embeddingEngine, rrfConfig = { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 }, codeGraphService) {
958
+ constructor(lanceStore, embeddingEngine, codeGraphService) {
802
959
  this.lanceStore = lanceStore;
803
960
  this.embeddingEngine = embeddingEngine;
804
- this.rrfConfig = rrfConfig;
805
961
  this.codeUnitService = new CodeUnitService();
806
962
  this.codeGraphService = codeGraphService;
807
963
  this.graphCache = /* @__PURE__ */ new Map();
@@ -825,6 +981,17 @@ var SearchService = class {
825
981
  const limit = query.limit ?? 10;
826
982
  const stores = query.stores ?? [];
827
983
  const detail = query.detail ?? "minimal";
984
+ const intents = classifyQueryIntents(query.query);
985
+ const primaryIntent = getPrimaryIntent(intents);
986
+ logger.debug({
987
+ query: query.query,
988
+ mode,
989
+ limit,
990
+ stores,
991
+ detail,
992
+ intent: primaryIntent,
993
+ intents
994
+ }, "Search query received");
828
995
  let allResults = [];
829
996
  const fetchLimit = limit * 3;
830
997
  if (mode === "vector") {
@@ -847,13 +1014,22 @@ var SearchService = class {
847
1014
  const graph = graphs.get(r.metadata.storeId) ?? null;
848
1015
  return this.addProgressiveContext(r, query.query, detail, graph);
849
1016
  });
1017
+ const timeMs = Date.now() - startTime;
1018
+ logger.info({
1019
+ query: query.query,
1020
+ mode,
1021
+ resultCount: enhancedResults.length,
1022
+ dedupedFrom: allResults.length,
1023
+ intents: intents.map((i) => `${i.intent}(${i.confidence.toFixed(2)})`),
1024
+ timeMs
1025
+ }, "Search complete");
850
1026
  return {
851
1027
  query: query.query,
852
1028
  mode,
853
1029
  stores,
854
1030
  results: enhancedResults,
855
1031
  totalResults: enhancedResults.length,
856
- timeMs: Date.now() - startTime
1032
+ timeMs
857
1033
  };
858
1034
  }
859
1035
  /**
@@ -871,7 +1047,9 @@ var SearchService = class {
871
1047
  } else {
872
1048
  const existingTermCount = this.countQueryTerms(existing.content, queryTerms);
873
1049
  const newTermCount = this.countQueryTerms(result.content, queryTerms);
874
- if (newTermCount > existingTermCount || newTermCount === existingTermCount && result.score > existing.score) {
1050
+ const existingRelevance = existingTermCount * existing.score;
1051
+ const newRelevance = newTermCount * result.score;
1052
+ if (newRelevance > existingRelevance) {
875
1053
  bySource.set(sourceKey, result);
876
1054
  }
877
1055
  }
@@ -913,7 +1091,7 @@ var SearchService = class {
913
1091
  return results.sort((a, b) => b.score - a.score).slice(0, limit);
914
1092
  }
915
1093
  async hybridSearch(query, stores, limit, threshold) {
916
- const intent = classifyQueryIntent(query);
1094
+ const intents = classifyQueryIntents(query);
917
1095
  const [vectorResults, ftsResults] = await Promise.all([
918
1096
  this.vectorSearch(query, stores, limit * 2, threshold),
919
1097
  this.ftsSearch(query, stores, limit * 2)
@@ -932,7 +1110,8 @@ var SearchService = class {
932
1110
  }
933
1111
  });
934
1112
  const rrfScores = [];
935
- const { k, vectorWeight, ftsWeight } = this.rrfConfig;
1113
+ const contentType = detectContentType([...allDocs.values()]);
1114
+ const { k, vectorWeight, ftsWeight } = RRF_PRESETS[contentType];
936
1115
  for (const [id, result] of allDocs) {
937
1116
  const vectorRank = vectorRanks.get(id) ?? Infinity;
938
1117
  const ftsRank = ftsRanks.get(id) ?? Infinity;
@@ -941,14 +1120,18 @@ var SearchService = class {
941
1120
  const fileTypeBoost = this.getFileTypeBoost(
942
1121
  // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
943
1122
  result.metadata["fileType"],
944
- intent
1123
+ intents
945
1124
  );
946
1125
  const frameworkBoost = this.getFrameworkContextBoost(query, result);
1126
+ const urlKeywordBoost = this.getUrlKeywordBoost(query, result);
1127
+ const pathKeywordBoost = this.getPathKeywordBoost(query, result);
947
1128
  const metadata = {
948
1129
  vectorRRF,
949
1130
  ftsRRF,
950
1131
  fileTypeBoost,
951
- frameworkBoost
1132
+ frameworkBoost,
1133
+ urlKeywordBoost,
1134
+ pathKeywordBoost
952
1135
  };
953
1136
  if (vectorRank !== Infinity) {
954
1137
  metadata.vectorRank = vectorRank;
@@ -958,7 +1141,7 @@ var SearchService = class {
958
1141
  }
959
1142
  rrfScores.push({
960
1143
  id,
961
- score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost,
1144
+ score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
962
1145
  result,
963
1146
  metadata
964
1147
  });
@@ -1003,7 +1186,7 @@ var SearchService = class {
1003
1186
  * Phase 4: Strengthened boosts for better documentation ranking.
1004
1187
  * Phase 1: Intent-based adjustments for context-aware ranking.
1005
1188
  */
1006
- getFileTypeBoost(fileType, intent) {
1189
+ getFileTypeBoost(fileType, intents) {
1007
1190
  let baseBoost;
1008
1191
  switch (fileType) {
1009
1192
  case "documentation-primary":
@@ -1030,9 +1213,106 @@ var SearchService = class {
1030
1213
  default:
1031
1214
  baseBoost = 1;
1032
1215
  }
1033
- const intentBoosts = INTENT_FILE_BOOSTS[intent];
1034
- const intentMultiplier = intentBoosts[fileType ?? "other"] ?? 1;
1035
- return baseBoost * intentMultiplier;
1216
+ let weightedMultiplier = 0;
1217
+ let totalConfidence = 0;
1218
+ for (const { intent, confidence } of intents) {
1219
+ const intentBoosts = INTENT_FILE_BOOSTS[intent];
1220
+ const multiplier = intentBoosts[fileType ?? "other"] ?? 1;
1221
+ weightedMultiplier += multiplier * confidence;
1222
+ totalConfidence += confidence;
1223
+ }
1224
+ const blendedMultiplier = totalConfidence > 0 ? weightedMultiplier / totalConfidence : 1;
1225
+ return baseBoost * blendedMultiplier;
1226
+ }
1227
+ /**
1228
+ * Get a score multiplier based on URL keyword matching.
1229
+ * Boosts results where URL path contains significant query keywords.
1230
+ * This helps queries like "troubleshooting" rank /troubleshooting pages first.
1231
+ */
1232
+ getUrlKeywordBoost(query, result) {
1233
+ const url = result.metadata.url;
1234
+ if (url === void 0 || url === "") return 1;
1235
+ const urlPath = url.toLowerCase().replace(/[^a-z0-9]+/g, " ");
1236
+ const stopWords = /* @__PURE__ */ new Set([
1237
+ "how",
1238
+ "to",
1239
+ "the",
1240
+ "a",
1241
+ "an",
1242
+ "is",
1243
+ "are",
1244
+ "what",
1245
+ "why",
1246
+ "when",
1247
+ "where",
1248
+ "can",
1249
+ "do",
1250
+ "does",
1251
+ "i",
1252
+ "my",
1253
+ "your",
1254
+ "it",
1255
+ "in",
1256
+ "on",
1257
+ "for",
1258
+ "with",
1259
+ "this",
1260
+ "that",
1261
+ "get",
1262
+ "use",
1263
+ "using"
1264
+ ]);
1265
+ const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2 && !stopWords.has(t2));
1266
+ if (queryTerms.length === 0) return 1;
1267
+ const matchingTerms = queryTerms.filter((term) => urlPath.includes(term));
1268
+ if (matchingTerms.length === 0) return 1;
1269
+ const matchRatio = matchingTerms.length / queryTerms.length;
1270
+ return 1 + 1 * matchRatio;
1271
+ }
1272
+ /**
1273
+ * Get a score multiplier based on file path keyword matching.
1274
+ * Boosts results where file path contains significant query keywords.
1275
+ * This helps queries like "dispatcher" rank async_dispatcher.py higher.
1276
+ */
1277
+ getPathKeywordBoost(query, result) {
1278
+ const path3 = result.metadata.path;
1279
+ if (path3 === void 0 || path3 === "") return 1;
1280
+ const pathSegments = path3.toLowerCase().replace(/[^a-z0-9]+/g, " ");
1281
+ const stopWords = /* @__PURE__ */ new Set([
1282
+ "how",
1283
+ "to",
1284
+ "the",
1285
+ "a",
1286
+ "an",
1287
+ "is",
1288
+ "are",
1289
+ "what",
1290
+ "why",
1291
+ "when",
1292
+ "where",
1293
+ "can",
1294
+ "do",
1295
+ "does",
1296
+ "i",
1297
+ "my",
1298
+ "your",
1299
+ "it",
1300
+ "in",
1301
+ "on",
1302
+ "for",
1303
+ "with",
1304
+ "this",
1305
+ "that",
1306
+ "get",
1307
+ "use",
1308
+ "using"
1309
+ ]);
1310
+ const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2 && !stopWords.has(t2));
1311
+ if (queryTerms.length === 0) return 1;
1312
+ const matchingTerms = queryTerms.filter((term) => pathSegments.includes(term));
1313
+ if (matchingTerms.length === 0) return 1;
1314
+ const matchRatio = matchingTerms.length / queryTerms.length;
1315
+ return 1 + 1 * matchRatio;
1036
1316
  }
1037
1317
  /**
1038
1318
  * Get a score multiplier based on framework context.
@@ -1331,17 +1611,31 @@ var SearchService = class {
1331
1611
 
1332
1612
  // src/services/index.service.ts
1333
1613
  import { readFile as readFile3, readdir } from "fs/promises";
1334
- import { join as join3, extname, basename } from "path";
1335
- import { createHash } from "crypto";
1614
+ import { join as join5, extname, basename } from "path";
1615
+ import { createHash as createHash2 } from "crypto";
1336
1616
 
1337
1617
  // src/services/chunking.service.ts
1338
- var ChunkingService = class {
1618
+ var CHUNK_PRESETS = {
1619
+ code: { chunkSize: 768, chunkOverlap: 100 },
1620
+ web: { chunkSize: 1200, chunkOverlap: 200 },
1621
+ docs: { chunkSize: 1200, chunkOverlap: 200 }
1622
+ };
1623
+ var ChunkingService = class _ChunkingService {
1339
1624
  chunkSize;
1340
1625
  chunkOverlap;
1341
1626
  constructor(config) {
1342
1627
  this.chunkSize = config.chunkSize;
1343
1628
  this.chunkOverlap = config.chunkOverlap;
1344
1629
  }
1630
+ /**
1631
+ * Create a ChunkingService with preset configuration for a content type.
1632
+ * - 'code': Smaller chunks (768/100) for precise code symbol matching
1633
+ * - 'web': Larger chunks (1200/200) for web prose content
1634
+ * - 'docs': Larger chunks (1200/200) for documentation
1635
+ */
1636
+ static forContentType(type) {
1637
+ return new _ChunkingService(CHUNK_PRESETS[type]);
1638
+ }
1345
1639
  /**
1346
1640
  * Chunk text content. Uses semantic chunking for Markdown and code files,
1347
1641
  * falling back to sliding window for other content.
@@ -1596,6 +1890,7 @@ var ChunkingService = class {
1596
1890
  };
1597
1891
 
1598
1892
  // src/services/index.service.ts
1893
+ var logger2 = createLogger("index-service");
1599
1894
  var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
1600
1895
  ".txt",
1601
1896
  ".md",
@@ -1640,12 +1935,22 @@ var IndexService = class {
1640
1935
  this.codeGraphService = options.codeGraphService;
1641
1936
  }
1642
1937
  async indexStore(store, onProgress) {
1938
+ logger2.info({
1939
+ storeId: store.id,
1940
+ storeName: store.name,
1941
+ storeType: store.type
1942
+ }, "Starting store indexing");
1643
1943
  try {
1644
1944
  if (store.type === "file" || store.type === "repo") {
1645
1945
  return await this.indexFileStore(store, onProgress);
1646
1946
  }
1947
+ logger2.error({ storeId: store.id, storeType: store.type }, "Unsupported store type for indexing");
1647
1948
  return err(new Error(`Indexing not supported for store type: ${store.type}`));
1648
1949
  } catch (error) {
1950
+ logger2.error({
1951
+ storeId: store.id,
1952
+ error: error instanceof Error ? error.message : String(error)
1953
+ }, "Store indexing failed");
1649
1954
  return err(error instanceof Error ? error : new Error(String(error)));
1650
1955
  }
1651
1956
  }
@@ -1654,6 +1959,11 @@ var IndexService = class {
1654
1959
  const files = await this.scanDirectory(store.path);
1655
1960
  const documents = [];
1656
1961
  let filesProcessed = 0;
1962
+ logger2.debug({
1963
+ storeId: store.id,
1964
+ path: store.path,
1965
+ fileCount: files.length
1966
+ }, "Files scanned for indexing");
1657
1967
  const sourceFiles = [];
1658
1968
  onProgress?.({
1659
1969
  type: "start",
@@ -1663,7 +1973,7 @@ var IndexService = class {
1663
1973
  });
1664
1974
  for (const filePath of files) {
1665
1975
  const content = await readFile3(filePath, "utf-8");
1666
- const fileHash = createHash("md5").update(content).digest("hex");
1976
+ const fileHash = createHash2("md5").update(content).digest("hex");
1667
1977
  const chunks = this.chunker.chunk(content, filePath);
1668
1978
  const ext = extname(filePath).toLowerCase();
1669
1979
  const fileName = basename(filePath).toLowerCase();
@@ -1717,17 +2027,26 @@ var IndexService = class {
1717
2027
  total: files.length,
1718
2028
  message: "Indexing complete"
1719
2029
  });
2030
+ const timeMs = Date.now() - startTime;
2031
+ logger2.info({
2032
+ storeId: store.id,
2033
+ storeName: store.name,
2034
+ documentsIndexed: filesProcessed,
2035
+ chunksCreated: documents.length,
2036
+ sourceFilesForGraph: sourceFiles.length,
2037
+ timeMs
2038
+ }, "Store indexing complete");
1720
2039
  return ok({
1721
2040
  documentsIndexed: filesProcessed,
1722
2041
  chunksCreated: documents.length,
1723
- timeMs: Date.now() - startTime
2042
+ timeMs
1724
2043
  });
1725
2044
  }
1726
2045
  async scanDirectory(dir) {
1727
2046
  const files = [];
1728
2047
  const entries = await readdir(dir, { withFileTypes: true });
1729
2048
  for (const entry of entries) {
1730
- const fullPath = join3(dir, entry.name);
2049
+ const fullPath = join5(dir, entry.name);
1731
2050
  if (entry.isDirectory()) {
1732
2051
  if (!["node_modules", ".git", "dist", "build"].includes(entry.name)) {
1733
2052
  files.push(...await this.scanDirectory(fullPath));
@@ -1798,10 +2117,33 @@ var IndexService = class {
1798
2117
  return false;
1799
2118
  }
1800
2119
  };
2120
+ function classifyWebContentType(url, title) {
2121
+ const urlLower = url.toLowerCase();
2122
+ const titleLower = (title ?? "").toLowerCase();
2123
+ if (/\/api[-/]?(ref|reference|docs?)?\//i.test(urlLower) || /api\s*(reference|documentation)/i.test(titleLower)) {
2124
+ return "documentation-primary";
2125
+ }
2126
+ if (/\/(getting[-_]?started|quickstart|tutorial|setup)\b/i.test(urlLower) || /(getting started|quickstart|tutorial)/i.test(titleLower)) {
2127
+ return "documentation-primary";
2128
+ }
2129
+ if (/\/(docs?|documentation|reference|learn|manual|guide)/i.test(urlLower)) {
2130
+ return "documentation";
2131
+ }
2132
+ if (/\/(examples?|demos?|samples?|cookbook)/i.test(urlLower)) {
2133
+ return "example";
2134
+ }
2135
+ if (/changelog|release[-_]?notes/i.test(urlLower)) {
2136
+ return "changelog";
2137
+ }
2138
+ if (/\/blog\//i.test(urlLower)) {
2139
+ return "other";
2140
+ }
2141
+ return "documentation";
2142
+ }
1801
2143
 
1802
2144
  // src/services/code-graph.service.ts
1803
2145
  import { readFile as readFile4, writeFile as writeFile3, mkdir as mkdir4 } from "fs/promises";
1804
- import { join as join4, dirname as dirname3 } from "path";
2146
+ import { join as join6, dirname as dirname3 } from "path";
1805
2147
 
1806
2148
  // src/analysis/code-graph.ts
1807
2149
  var CodeGraph = class {
@@ -3118,7 +3460,7 @@ var CodeGraphService = class {
3118
3460
  this.graphCache.clear();
3119
3461
  }
3120
3462
  getGraphPath(storeId) {
3121
- return join4(this.dataDir, "graphs", `${storeId}.json`);
3463
+ return join6(this.dataDir, "graphs", `${storeId}.json`);
3122
3464
  }
3123
3465
  /**
3124
3466
  * Type guard for SerializedGraph structure.
@@ -3254,6 +3596,13 @@ var LanceStore = class {
3254
3596
  this.tables.delete(tableName);
3255
3597
  }
3256
3598
  }
3599
+ close() {
3600
+ this.tables.clear();
3601
+ if (this.connection !== null) {
3602
+ this.connection.close();
3603
+ this.connection = null;
3604
+ }
3605
+ }
3257
3606
  getTableName(storeId) {
3258
3607
  return `documents_${storeId}`;
3259
3608
  }
@@ -3273,9 +3622,9 @@ var LanceStore = class {
3273
3622
 
3274
3623
  // src/db/embeddings.ts
3275
3624
  import { pipeline, env } from "@huggingface/transformers";
3276
- import { homedir as homedir2 } from "os";
3277
- import { join as join5 } from "path";
3278
- env.cacheDir = join5(homedir2(), ".cache", "huggingface-transformers");
3625
+ import { homedir as homedir3 } from "os";
3626
+ import { join as join7 } from "path";
3627
+ env.cacheDir = join7(homedir3(), ".cache", "huggingface-transformers");
3279
3628
  var EmbeddingEngine = class {
3280
3629
  extractor = null;
3281
3630
  modelName;
@@ -3398,25 +3747,27 @@ function validateParsePythonResult(data) {
3398
3747
  }
3399
3748
 
3400
3749
  // src/crawl/bridge.ts
3750
+ var logger3 = createLogger("python-bridge");
3401
3751
  var PythonBridge = class {
3402
3752
  process = null;
3403
3753
  pending = /* @__PURE__ */ new Map();
3404
3754
  stoppingIntentionally = false;
3405
3755
  start() {
3406
3756
  if (this.process) return Promise.resolve();
3757
+ logger3.debug("Starting Python bridge process");
3407
3758
  this.process = spawn2("python3", ["python/crawl_worker.py"], {
3408
3759
  stdio: ["pipe", "pipe", "pipe"]
3409
3760
  });
3410
3761
  this.process.on("error", (err2) => {
3411
- console.error("Python bridge process error:", err2);
3762
+ logger3.error({ error: err2.message, stack: err2.stack }, "Python bridge process error");
3412
3763
  this.rejectAllPending(new Error(`Process error: ${err2.message}`));
3413
3764
  });
3414
3765
  this.process.on("exit", (code, signal) => {
3415
3766
  if (code !== 0 && code !== null) {
3416
- console.error(`Python bridge process exited with code ${String(code)}`);
3767
+ logger3.error({ code }, "Python bridge process exited with non-zero code");
3417
3768
  this.rejectAllPending(new Error(`Process exited with code ${String(code)}`));
3418
3769
  } else if (signal && !this.stoppingIntentionally) {
3419
- console.error(`Python bridge process killed with signal ${signal}`);
3770
+ logger3.error({ signal }, "Python bridge process killed with signal");
3420
3771
  this.rejectAllPending(new Error(`Process killed with signal ${signal}`));
3421
3772
  }
3422
3773
  this.process = null;
@@ -3425,7 +3776,7 @@ var PythonBridge = class {
3425
3776
  if (this.process.stderr) {
3426
3777
  const stderrRl = createInterface({ input: this.process.stderr });
3427
3778
  stderrRl.on("line", (line) => {
3428
- console.error("Python bridge stderr:", line);
3779
+ logger3.warn({ stderr: line }, "Python bridge stderr output");
3429
3780
  });
3430
3781
  }
3431
3782
  if (this.process.stdout === null) {
@@ -3460,18 +3811,24 @@ var PythonBridge = class {
3460
3811
  pending.resolve(validated);
3461
3812
  } catch (error) {
3462
3813
  if (error instanceof ZodError) {
3463
- console.error("Python bridge response validation failed:", error.issues);
3464
- console.error("Original response:", JSON.stringify(response.result));
3814
+ logger3.error({
3815
+ issues: error.issues,
3816
+ response: JSON.stringify(response.result)
3817
+ }, "Python bridge response validation failed");
3465
3818
  pending.reject(new Error(`Invalid response format from Python bridge: ${error.message}`));
3466
3819
  } else {
3467
3820
  const errorMessage = error instanceof Error ? error.message : String(error);
3821
+ logger3.error({ error: errorMessage }, "Response validation error");
3468
3822
  pending.reject(new Error(`Response validation error: ${errorMessage}`));
3469
3823
  }
3470
3824
  }
3471
3825
  }
3472
3826
  }
3473
3827
  } catch (err2) {
3474
- console.error("Failed to parse JSON response from Python bridge:", err2, "Line:", line);
3828
+ logger3.error({
3829
+ error: err2 instanceof Error ? err2.message : String(err2),
3830
+ line
3831
+ }, "Failed to parse JSON response from Python bridge");
3475
3832
  }
3476
3833
  });
3477
3834
  return Promise.resolve();
@@ -3570,7 +3927,9 @@ var PythonBridge = class {
3570
3927
  };
3571
3928
 
3572
3929
  // src/services/index.ts
3930
+ var logger4 = createLogger("services");
3573
3931
  async function createServices(configPath, dataDir, projectRoot) {
3932
+ logger4.info({ configPath, dataDir, projectRoot }, "Initializing services");
3574
3933
  const config = new ConfigService(configPath, dataDir, projectRoot);
3575
3934
  const appConfig = await config.load();
3576
3935
  const resolvedDataDir = config.resolveDataDir();
@@ -3585,8 +3944,9 @@ async function createServices(configPath, dataDir, projectRoot) {
3585
3944
  const pythonBridge = new PythonBridge();
3586
3945
  await pythonBridge.start();
3587
3946
  const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
3588
- const search = new SearchService(lance, embeddings, void 0, codeGraph);
3947
+ const search = new SearchService(lance, embeddings, codeGraph);
3589
3948
  const index = new IndexService(lance, embeddings, { codeGraphService: codeGraph });
3949
+ logger4.info({ dataDir: resolvedDataDir }, "Services initialized successfully");
3590
3950
  return {
3591
3951
  config,
3592
3952
  store,
@@ -3599,7 +3959,18 @@ async function createServices(configPath, dataDir, projectRoot) {
3599
3959
  };
3600
3960
  }
3601
3961
  async function destroyServices(services) {
3602
- await services.pythonBridge.stop();
3962
+ logger4.info("Shutting down services");
3963
+ try {
3964
+ services.lance.close();
3965
+ } catch (e) {
3966
+ logger4.error({ error: e }, "Error closing LanceStore");
3967
+ }
3968
+ try {
3969
+ await services.pythonBridge.stop();
3970
+ } catch (e) {
3971
+ logger4.error({ error: e }, "Error stopping Python bridge");
3972
+ }
3973
+ await shutdownLogger();
3603
3974
  }
3604
3975
 
3605
3976
  export {
@@ -3608,10 +3979,15 @@ export {
3608
3979
  ok,
3609
3980
  err,
3610
3981
  extractRepoName,
3982
+ createLogger,
3983
+ summarizePayload,
3984
+ truncateForLog,
3985
+ ChunkingService,
3986
+ classifyWebContentType,
3611
3987
  ASTParser,
3612
3988
  PythonBridge,
3613
3989
  JobService,
3614
3990
  createServices,
3615
3991
  destroyServices
3616
3992
  };
3617
- //# sourceMappingURL=chunk-5QMHZUC4.js.map
3993
+ //# sourceMappingURL=chunk-RWSXP3PQ.js.map