bluera-knowledge 0.9.26 → 0.9.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude/commands/commit.md +4 -7
  2. package/.claude/hooks/post-edit-check.sh +21 -24
  3. package/.claude/skills/atomic-commits/SKILL.md +6 -0
  4. package/.claude-plugin/plugin.json +1 -1
  5. package/.env.example +4 -0
  6. package/.husky/pre-push +12 -2
  7. package/.versionrc.json +0 -4
  8. package/CHANGELOG.md +69 -0
  9. package/README.md +55 -20
  10. package/bun.lock +35 -1
  11. package/commands/crawl.md +2 -0
  12. package/dist/{chunk-BICFAWMN.js → chunk-DNOIM7BO.js} +73 -8
  13. package/dist/chunk-DNOIM7BO.js.map +1 -0
  14. package/dist/{chunk-5QMHZUC4.js → chunk-NJUMU4X2.js} +462 -105
  15. package/dist/chunk-NJUMU4X2.js.map +1 -0
  16. package/dist/{chunk-J7J6LXOJ.js → chunk-SZNTYLYT.js} +106 -41
  17. package/dist/chunk-SZNTYLYT.js.map +1 -0
  18. package/dist/index.js +65 -25
  19. package/dist/index.js.map +1 -1
  20. package/dist/mcp/server.js +2 -2
  21. package/dist/workers/background-worker-cli.js +2 -2
  22. package/eslint.config.js +1 -1
  23. package/package.json +3 -1
  24. package/src/analysis/ast-parser.test.ts +46 -0
  25. package/src/cli/commands/crawl.test.ts +99 -12
  26. package/src/cli/commands/crawl.ts +76 -24
  27. package/src/crawl/article-converter.ts +36 -1
  28. package/src/crawl/bridge.ts +18 -7
  29. package/src/crawl/intelligent-crawler.ts +45 -4
  30. package/src/db/embeddings.test.ts +16 -0
  31. package/src/logging/index.ts +29 -0
  32. package/src/logging/logger.test.ts +75 -0
  33. package/src/logging/logger.ts +147 -0
  34. package/src/logging/payload.test.ts +152 -0
  35. package/src/logging/payload.ts +121 -0
  36. package/src/mcp/handlers/search.handler.test.ts +28 -9
  37. package/src/mcp/handlers/search.handler.ts +69 -29
  38. package/src/mcp/handlers/store.handler.test.ts +1 -0
  39. package/src/mcp/server.ts +44 -16
  40. package/src/services/chunking.service.ts +23 -0
  41. package/src/services/index.service.test.ts +921 -1
  42. package/src/services/index.service.ts +76 -1
  43. package/src/services/index.ts +10 -1
  44. package/src/services/search.service.test.ts +573 -21
  45. package/src/services/search.service.ts +257 -105
  46. package/src/services/snippet.service.ts +28 -3
  47. package/src/services/token.service.test.ts +45 -0
  48. package/src/services/token.service.ts +33 -0
  49. package/src/types/result.test.ts +10 -0
  50. package/tests/integration/cli-consistency.test.ts +1 -4
  51. package/vitest.config.ts +4 -0
  52. package/dist/chunk-5QMHZUC4.js.map +0 -1
  53. package/dist/chunk-BICFAWMN.js.map +0 -1
  54. package/dist/chunk-J7J6LXOJ.js.map +0 -1
  55. package/scripts/readme-version-updater.cjs +0 -18
@@ -1,3 +1,142 @@
1
+ // src/logging/logger.ts
2
+ import pino from "pino";
3
+ import { homedir } from "os";
4
+ import { mkdirSync, existsSync } from "fs";
5
+ import { join } from "path";
6
+ var VALID_LEVELS = ["trace", "debug", "info", "warn", "error", "fatal"];
7
+ var VALID_LEVELS_SET = new Set(VALID_LEVELS);
8
+ function getLogDir() {
9
+ return join(homedir(), ".bluera", "bluera-knowledge", "logs");
10
+ }
11
+ function ensureLogDir() {
12
+ const logDir = getLogDir();
13
+ if (!existsSync(logDir)) {
14
+ mkdirSync(logDir, { recursive: true });
15
+ }
16
+ return logDir;
17
+ }
18
+ function isValidLogLevel(level) {
19
+ return VALID_LEVELS_SET.has(level);
20
+ }
21
+ function getLogLevel() {
22
+ const level = process.env["LOG_LEVEL"]?.toLowerCase();
23
+ if (level === void 0 || level === "") {
24
+ return "info";
25
+ }
26
+ if (!isValidLogLevel(level)) {
27
+ throw new Error(
28
+ `Invalid LOG_LEVEL: "${level}". Valid values: ${VALID_LEVELS.join(", ")}`
29
+ );
30
+ }
31
+ return level;
32
+ }
33
+ var rootLogger = null;
34
+ function initializeLogger() {
35
+ if (rootLogger !== null) {
36
+ return rootLogger;
37
+ }
38
+ const logDir = ensureLogDir();
39
+ const logFile = join(logDir, "app.log");
40
+ const level = getLogLevel();
41
+ const options = {
42
+ level,
43
+ timestamp: pino.stdTimeFunctions.isoTime,
44
+ formatters: {
45
+ level: (label) => ({ level: label })
46
+ },
47
+ transport: {
48
+ target: "pino-roll",
49
+ options: {
50
+ file: logFile,
51
+ size: "10m",
52
+ // 10MB rotation
53
+ limit: { count: 5 },
54
+ // Keep 5 rotated files
55
+ mkdir: true
56
+ }
57
+ }
58
+ };
59
+ rootLogger = pino(options);
60
+ return rootLogger;
61
+ }
62
+ function createLogger(module) {
63
+ const root = initializeLogger();
64
+ return root.child({ module });
65
+ }
66
+ function isLevelEnabled(level) {
67
+ const currentLevel = getLogLevel();
68
+ const currentIndex = VALID_LEVELS.indexOf(currentLevel);
69
+ const checkIndex = VALID_LEVELS.indexOf(level);
70
+ return checkIndex >= currentIndex;
71
+ }
72
+ function getLogDirectory() {
73
+ return getLogDir();
74
+ }
75
+ function shutdownLogger() {
76
+ return new Promise((resolve3) => {
77
+ if (rootLogger !== null) {
78
+ rootLogger.flush();
79
+ setTimeout(() => {
80
+ rootLogger = null;
81
+ resolve3();
82
+ }, 100);
83
+ } else {
84
+ resolve3();
85
+ }
86
+ });
87
+ }
88
+
89
+ // src/logging/payload.ts
90
+ import { writeFileSync, mkdirSync as mkdirSync2, existsSync as existsSync2 } from "fs";
91
+ import { join as join2 } from "path";
92
+ import { createHash } from "crypto";
93
+ var MAX_PREVIEW_LENGTH = 500;
94
+ var PAYLOAD_DUMP_THRESHOLD = 1e4;
95
+ function getPayloadDir() {
96
+ const dir = join2(getLogDirectory(), "payload");
97
+ if (!existsSync2(dir)) {
98
+ mkdirSync2(dir, { recursive: true });
99
+ }
100
+ return dir;
101
+ }
102
+ function safeFilename(identifier) {
103
+ return identifier.replace(/[^a-zA-Z0-9-]/g, "_").substring(0, 50);
104
+ }
105
+ function summarizePayload(content, type, identifier, dumpFull = isLevelEnabled("trace")) {
106
+ const sizeBytes = Buffer.byteLength(content, "utf8");
107
+ const hash = createHash("md5").update(content).digest("hex").substring(0, 12);
108
+ const preview = truncateForLog(content, MAX_PREVIEW_LENGTH);
109
+ const baseSummary = { preview, sizeBytes, hash };
110
+ if (dumpFull && sizeBytes > PAYLOAD_DUMP_THRESHOLD) {
111
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
112
+ const safeId = safeFilename(identifier);
113
+ const filename = `${timestamp}-${type}-${safeId}-${hash}.json`;
114
+ const filepath = join2(getPayloadDir(), filename);
115
+ writeFileSync(
116
+ filepath,
117
+ JSON.stringify(
118
+ {
119
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
120
+ type,
121
+ identifier,
122
+ sizeBytes,
123
+ content
124
+ },
125
+ null,
126
+ 2
127
+ )
128
+ );
129
+ return { ...baseSummary, payloadFile: filename };
130
+ }
131
+ return baseSummary;
132
+ }
133
+ function truncateForLog(content, maxLength = MAX_PREVIEW_LENGTH) {
134
+ if (content.length <= maxLength) {
135
+ return content;
136
+ }
137
+ return content.substring(0, maxLength) + "... [truncated]";
138
+ }
139
+
1
140
  // src/services/job.service.ts
2
141
  import fs from "fs";
3
142
  import path from "path";
@@ -200,7 +339,7 @@ var JobService = class {
200
339
  // src/services/config.service.ts
201
340
  import { readFile, writeFile, mkdir } from "fs/promises";
202
341
  import { dirname as dirname2, resolve } from "path";
203
- import { homedir } from "os";
342
+ import { homedir as homedir2 } from "os";
204
343
 
205
344
  // src/types/config.ts
206
345
  var DEFAULT_CONFIG = {
@@ -239,8 +378,8 @@ var DEFAULT_CONFIG = {
239
378
  };
240
379
 
241
380
  // src/services/project-root.service.ts
242
- import { existsSync, statSync, realpathSync } from "fs";
243
- import { dirname, join, normalize, sep } from "path";
381
+ import { existsSync as existsSync3, statSync, realpathSync } from "fs";
382
+ import { dirname, join as join3, normalize, sep } from "path";
244
383
  var ProjectRootService = class {
245
384
  /**
246
385
  * Resolve project root directory using hierarchical detection.
@@ -270,8 +409,8 @@ var ProjectRootService = class {
270
409
  let currentPath = normalize(startPath);
271
410
  const root = normalize(sep);
272
411
  while (currentPath !== root) {
273
- const gitPath = join(currentPath, ".git");
274
- if (existsSync(gitPath)) {
412
+ const gitPath = join3(currentPath, ".git");
413
+ if (existsSync3(gitPath)) {
275
414
  try {
276
415
  const stats = statSync(gitPath);
277
416
  if (stats.isDirectory() || stats.isFile()) {
@@ -317,7 +456,7 @@ var ConfigService = class {
317
456
  configPath;
318
457
  dataDir;
319
458
  config = null;
320
- constructor(configPath = `${homedir()}/.bluera/bluera-knowledge/config.json`, dataDir, projectRoot) {
459
+ constructor(configPath = `${homedir2()}/.bluera/bluera-knowledge/config.json`, dataDir, projectRoot) {
321
460
  this.configPath = configPath;
322
461
  if (dataDir !== void 0 && dataDir !== "") {
323
462
  this.dataDir = dataDir;
@@ -348,7 +487,7 @@ var ConfigService = class {
348
487
  }
349
488
  expandPath(path3, baseDir) {
350
489
  if (path3.startsWith("~")) {
351
- return path3.replace("~", homedir());
490
+ return path3.replace("~", homedir2());
352
491
  }
353
492
  if (!path3.startsWith("/")) {
354
493
  return resolve(baseDir, path3);
@@ -359,7 +498,7 @@ var ConfigService = class {
359
498
 
360
499
  // src/services/store.service.ts
361
500
  import { readFile as readFile2, writeFile as writeFile2, mkdir as mkdir3, stat } from "fs/promises";
362
- import { join as join2, resolve as resolve2 } from "path";
501
+ import { join as join4, resolve as resolve2 } from "path";
363
502
  import { randomUUID as randomUUID2 } from "crypto";
364
503
 
365
504
  // src/types/brands.ts
@@ -467,7 +606,7 @@ var StoreService = class {
467
606
  case "repo": {
468
607
  let repoPath = input.path;
469
608
  if (input.url !== void 0) {
470
- const cloneDir = join2(this.dataDir, "repos", id);
609
+ const cloneDir = join4(this.dataDir, "repos", id);
471
610
  const result = await cloneRepository({
472
611
  url: input.url,
473
612
  targetDir: cloneDir,
@@ -563,7 +702,7 @@ var StoreService = class {
563
702
  return ok(void 0);
564
703
  }
565
704
  async loadRegistry() {
566
- const registryPath = join2(this.dataDir, "stores.json");
705
+ const registryPath = join4(this.dataDir, "stores.json");
567
706
  try {
568
707
  const content = await readFile2(registryPath, "utf-8");
569
708
  const data = JSON.parse(content);
@@ -580,7 +719,7 @@ var StoreService = class {
580
719
  }
581
720
  }
582
721
  async saveRegistry() {
583
- const registryPath = join2(this.dataDir, "stores.json");
722
+ const registryPath = join4(this.dataDir, "stores.json");
584
723
  await writeFile2(registryPath, JSON.stringify(this.registry, null, 2));
585
724
  }
586
725
  };
@@ -659,6 +798,7 @@ var CodeUnitService = class {
659
798
  };
660
799
 
661
800
  // src/services/search.service.ts
801
+ var logger = createLogger("search-service");
662
802
  var INTENT_FILE_BOOSTS = {
663
803
  "how-to": {
664
804
  "documentation-primary": 1.3,
@@ -729,79 +869,92 @@ var FRAMEWORK_PATTERNS = [
729
869
  { pattern: /\btypescript\b/i, terms: ["typescript", "ts"] },
730
870
  { pattern: /\bjwt\b/i, terms: ["jwt", "jsonwebtoken", "json-web-token"] }
731
871
  ];
732
- function classifyQueryIntent(query) {
872
+ var HOW_TO_PATTERNS = [
873
+ /how (do|can|should|would) (i|you|we)/i,
874
+ /how to\b/i,
875
+ /what('s| is) the (best |right |correct )?(way|approach) to/i,
876
+ /i (need|want|have) to/i,
877
+ /show me how/i,
878
+ /\bwhat's the syntax\b/i,
879
+ /\bhow do i (use|create|make|set up|configure|implement|add|get)\b/i,
880
+ /\bi'm (trying|building|creating|making)\b/i
881
+ ];
882
+ var IMPLEMENTATION_PATTERNS = [
883
+ /how (does|is) .* (implemented|work internally)/i,
884
+ /\binternal(ly)?\b/i,
885
+ /\bsource code\b/i,
886
+ /\bunder the hood\b/i,
887
+ /\bimplementation (of|details?)\b/i
888
+ ];
889
+ var COMPARISON_PATTERNS = [
890
+ /\b(vs\.?|versus)\b/i,
891
+ /\bdifference(s)? between\b/i,
892
+ /\bcompare\b/i,
893
+ /\bshould (i|we) use .* or\b/i,
894
+ /\bwhat's the difference\b/i,
895
+ /\bwhich (one|is better)\b/i,
896
+ /\bwhen (should|to) use\b/i
897
+ ];
898
+ var DEBUGGING_PATTERNS = [
899
+ /\b(error|bug|issue|problem|crash|fail|broken|wrong)\b/i,
900
+ /\bdoesn't (work|compile|run)\b/i,
901
+ /\bisn't (working|updating|rendering)\b/i,
902
+ /\bwhy (is|does|doesn't|isn't)\b/i,
903
+ /\bwhat('s| is) (wrong|happening|going on)\b/i,
904
+ /\bwhat am i doing wrong\b/i,
905
+ /\bnot (working|updating|showing)\b/i,
906
+ /\bhow do i (fix|debug|solve|resolve)\b/i
907
+ ];
908
+ var CONCEPTUAL_PATTERNS = [
909
+ /\bwhat (is|are)\b/i,
910
+ /\bexplain\b/i,
911
+ /\bwhat does .* (mean|do)\b/i,
912
+ /\bhow does .* work\b/i,
913
+ /\bwhat('s| is) the (purpose|point|idea)\b/i
914
+ ];
915
+ function classifyQueryIntents(query) {
733
916
  const q = query.toLowerCase();
734
- const howToPatterns = [
735
- /how (do|can|should|would) (i|you|we)/i,
736
- /how to\b/i,
737
- /what('s| is) the (best |right |correct )?(way|approach) to/i,
738
- /i (need|want|have) to/i,
739
- /show me how/i,
740
- /\bwhat's the syntax\b/i,
741
- /\bhow do i (use|create|make|set up|configure|implement|add|get)\b/i,
742
- /\bi'm (trying|building|creating|making)\b/i
743
- ];
744
- const implementationPatterns = [
745
- /how (does|is) .* (implemented|work internally)/i,
746
- /\binternal(ly)?\b/i,
747
- /\bsource code\b/i,
748
- /\bunder the hood\b/i,
749
- /\bimplementation (of|details?)\b/i
750
- ];
751
- const comparisonPatterns = [
752
- /\b(vs\.?|versus)\b/i,
753
- /\bdifference(s)? between\b/i,
754
- /\bcompare\b/i,
755
- /\bshould (i|we) use .* or\b/i,
756
- /\bwhat's the difference\b/i,
757
- /\bwhich (one|is better)\b/i,
758
- /\bwhen (should|to) use\b/i
759
- ];
760
- const debuggingPatterns = [
761
- /\b(error|bug|issue|problem|crash|fail|broken|wrong)\b/i,
762
- /\bdoesn't (work|compile|run)\b/i,
763
- /\bisn't (working|updating|rendering)\b/i,
764
- /\bwhy (is|does|doesn't|isn't)\b/i,
765
- /\bwhat('s| is) (wrong|happening|going on)\b/i,
766
- /\bwhat am i doing wrong\b/i,
767
- /\bnot (working|updating|showing)\b/i,
768
- /\bhow do i (fix|debug|solve|resolve)\b/i
769
- ];
770
- const conceptualPatterns = [
771
- /\bwhat (is|are)\b/i,
772
- /\bexplain\b/i,
773
- /\bwhat does .* (mean|do)\b/i,
774
- /\bhow does .* work\b/i,
775
- /\bwhat('s| is) the (purpose|point|idea)\b/i
776
- ];
777
- if (implementationPatterns.some((p) => p.test(q))) {
778
- return "implementation";
779
- }
780
- if (debuggingPatterns.some((p) => p.test(q))) {
781
- return "debugging";
782
- }
783
- if (comparisonPatterns.some((p) => p.test(q))) {
784
- return "comparison";
785
- }
786
- if (howToPatterns.some((p) => p.test(q))) {
787
- return "how-to";
788
- }
789
- if (conceptualPatterns.some((p) => p.test(q))) {
790
- return "conceptual";
791
- }
792
- return "how-to";
917
+ const intents = [];
918
+ if (IMPLEMENTATION_PATTERNS.some((p) => p.test(q))) {
919
+ intents.push({ intent: "implementation", confidence: 0.9 });
920
+ }
921
+ if (DEBUGGING_PATTERNS.some((p) => p.test(q))) {
922
+ intents.push({ intent: "debugging", confidence: 0.85 });
923
+ }
924
+ if (COMPARISON_PATTERNS.some((p) => p.test(q))) {
925
+ intents.push({ intent: "comparison", confidence: 0.8 });
926
+ }
927
+ if (HOW_TO_PATTERNS.some((p) => p.test(q))) {
928
+ intents.push({ intent: "how-to", confidence: 0.75 });
929
+ }
930
+ if (CONCEPTUAL_PATTERNS.some((p) => p.test(q))) {
931
+ intents.push({ intent: "conceptual", confidence: 0.7 });
932
+ }
933
+ if (intents.length === 0) {
934
+ intents.push({ intent: "how-to", confidence: 0.5 });
935
+ }
936
+ return intents.sort((a, b) => b.confidence - a.confidence);
937
+ }
938
+ function getPrimaryIntent(intents) {
939
+ return intents[0]?.intent ?? "how-to";
940
+ }
941
+ var RRF_PRESETS = {
942
+ code: { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 },
943
+ web: { k: 30, vectorWeight: 0.55, ftsWeight: 0.45 }
944
+ };
945
+ function detectContentType(results) {
946
+ const webCount = results.filter((r) => "url" in r.metadata).length;
947
+ return webCount > results.length / 2 ? "web" : "code";
793
948
  }
794
949
  var SearchService = class {
795
950
  lanceStore;
796
951
  embeddingEngine;
797
- rrfConfig;
798
952
  codeUnitService;
799
953
  codeGraphService;
800
954
  graphCache;
801
- constructor(lanceStore, embeddingEngine, rrfConfig = { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 }, codeGraphService) {
955
+ constructor(lanceStore, embeddingEngine, codeGraphService) {
802
956
  this.lanceStore = lanceStore;
803
957
  this.embeddingEngine = embeddingEngine;
804
- this.rrfConfig = rrfConfig;
805
958
  this.codeUnitService = new CodeUnitService();
806
959
  this.codeGraphService = codeGraphService;
807
960
  this.graphCache = /* @__PURE__ */ new Map();
@@ -825,6 +978,17 @@ var SearchService = class {
825
978
  const limit = query.limit ?? 10;
826
979
  const stores = query.stores ?? [];
827
980
  const detail = query.detail ?? "minimal";
981
+ const intents = classifyQueryIntents(query.query);
982
+ const primaryIntent = getPrimaryIntent(intents);
983
+ logger.debug({
984
+ query: query.query,
985
+ mode,
986
+ limit,
987
+ stores,
988
+ detail,
989
+ intent: primaryIntent,
990
+ intents
991
+ }, "Search query received");
828
992
  let allResults = [];
829
993
  const fetchLimit = limit * 3;
830
994
  if (mode === "vector") {
@@ -847,13 +1011,22 @@ var SearchService = class {
847
1011
  const graph = graphs.get(r.metadata.storeId) ?? null;
848
1012
  return this.addProgressiveContext(r, query.query, detail, graph);
849
1013
  });
1014
+ const timeMs = Date.now() - startTime;
1015
+ logger.info({
1016
+ query: query.query,
1017
+ mode,
1018
+ resultCount: enhancedResults.length,
1019
+ dedupedFrom: allResults.length,
1020
+ intents: intents.map((i) => `${i.intent}(${i.confidence.toFixed(2)})`),
1021
+ timeMs
1022
+ }, "Search complete");
850
1023
  return {
851
1024
  query: query.query,
852
1025
  mode,
853
1026
  stores,
854
1027
  results: enhancedResults,
855
1028
  totalResults: enhancedResults.length,
856
- timeMs: Date.now() - startTime
1029
+ timeMs
857
1030
  };
858
1031
  }
859
1032
  /**
@@ -871,7 +1044,9 @@ var SearchService = class {
871
1044
  } else {
872
1045
  const existingTermCount = this.countQueryTerms(existing.content, queryTerms);
873
1046
  const newTermCount = this.countQueryTerms(result.content, queryTerms);
874
- if (newTermCount > existingTermCount || newTermCount === existingTermCount && result.score > existing.score) {
1047
+ const existingRelevance = existingTermCount * existing.score;
1048
+ const newRelevance = newTermCount * result.score;
1049
+ if (newRelevance > existingRelevance) {
875
1050
  bySource.set(sourceKey, result);
876
1051
  }
877
1052
  }
@@ -913,7 +1088,7 @@ var SearchService = class {
913
1088
  return results.sort((a, b) => b.score - a.score).slice(0, limit);
914
1089
  }
915
1090
  async hybridSearch(query, stores, limit, threshold) {
916
- const intent = classifyQueryIntent(query);
1091
+ const intents = classifyQueryIntents(query);
917
1092
  const [vectorResults, ftsResults] = await Promise.all([
918
1093
  this.vectorSearch(query, stores, limit * 2, threshold),
919
1094
  this.ftsSearch(query, stores, limit * 2)
@@ -932,7 +1107,8 @@ var SearchService = class {
932
1107
  }
933
1108
  });
934
1109
  const rrfScores = [];
935
- const { k, vectorWeight, ftsWeight } = this.rrfConfig;
1110
+ const contentType = detectContentType([...allDocs.values()]);
1111
+ const { k, vectorWeight, ftsWeight } = RRF_PRESETS[contentType];
936
1112
  for (const [id, result] of allDocs) {
937
1113
  const vectorRank = vectorRanks.get(id) ?? Infinity;
938
1114
  const ftsRank = ftsRanks.get(id) ?? Infinity;
@@ -941,14 +1117,18 @@ var SearchService = class {
941
1117
  const fileTypeBoost = this.getFileTypeBoost(
942
1118
  // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
943
1119
  result.metadata["fileType"],
944
- intent
1120
+ intents
945
1121
  );
946
1122
  const frameworkBoost = this.getFrameworkContextBoost(query, result);
1123
+ const urlKeywordBoost = this.getUrlKeywordBoost(query, result);
1124
+ const pathKeywordBoost = this.getPathKeywordBoost(query, result);
947
1125
  const metadata = {
948
1126
  vectorRRF,
949
1127
  ftsRRF,
950
1128
  fileTypeBoost,
951
- frameworkBoost
1129
+ frameworkBoost,
1130
+ urlKeywordBoost,
1131
+ pathKeywordBoost
952
1132
  };
953
1133
  if (vectorRank !== Infinity) {
954
1134
  metadata.vectorRank = vectorRank;
@@ -958,7 +1138,7 @@ var SearchService = class {
958
1138
  }
959
1139
  rrfScores.push({
960
1140
  id,
961
- score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost,
1141
+ score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
962
1142
  result,
963
1143
  metadata
964
1144
  });
@@ -1003,7 +1183,7 @@ var SearchService = class {
1003
1183
  * Phase 4: Strengthened boosts for better documentation ranking.
1004
1184
  * Phase 1: Intent-based adjustments for context-aware ranking.
1005
1185
  */
1006
- getFileTypeBoost(fileType, intent) {
1186
+ getFileTypeBoost(fileType, intents) {
1007
1187
  let baseBoost;
1008
1188
  switch (fileType) {
1009
1189
  case "documentation-primary":
@@ -1030,9 +1210,106 @@ var SearchService = class {
1030
1210
  default:
1031
1211
  baseBoost = 1;
1032
1212
  }
1033
- const intentBoosts = INTENT_FILE_BOOSTS[intent];
1034
- const intentMultiplier = intentBoosts[fileType ?? "other"] ?? 1;
1035
- return baseBoost * intentMultiplier;
1213
+ let weightedMultiplier = 0;
1214
+ let totalConfidence = 0;
1215
+ for (const { intent, confidence } of intents) {
1216
+ const intentBoosts = INTENT_FILE_BOOSTS[intent];
1217
+ const multiplier = intentBoosts[fileType ?? "other"] ?? 1;
1218
+ weightedMultiplier += multiplier * confidence;
1219
+ totalConfidence += confidence;
1220
+ }
1221
+ const blendedMultiplier = totalConfidence > 0 ? weightedMultiplier / totalConfidence : 1;
1222
+ return baseBoost * blendedMultiplier;
1223
+ }
1224
+ /**
1225
+ * Get a score multiplier based on URL keyword matching.
1226
+ * Boosts results where URL path contains significant query keywords.
1227
+ * This helps queries like "troubleshooting" rank /troubleshooting pages first.
1228
+ */
1229
+ getUrlKeywordBoost(query, result) {
1230
+ const url = result.metadata.url;
1231
+ if (url === void 0 || url === "") return 1;
1232
+ const urlPath = url.toLowerCase().replace(/[^a-z0-9]+/g, " ");
1233
+ const stopWords = /* @__PURE__ */ new Set([
1234
+ "how",
1235
+ "to",
1236
+ "the",
1237
+ "a",
1238
+ "an",
1239
+ "is",
1240
+ "are",
1241
+ "what",
1242
+ "why",
1243
+ "when",
1244
+ "where",
1245
+ "can",
1246
+ "do",
1247
+ "does",
1248
+ "i",
1249
+ "my",
1250
+ "your",
1251
+ "it",
1252
+ "in",
1253
+ "on",
1254
+ "for",
1255
+ "with",
1256
+ "this",
1257
+ "that",
1258
+ "get",
1259
+ "use",
1260
+ "using"
1261
+ ]);
1262
+ const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2 && !stopWords.has(t2));
1263
+ if (queryTerms.length === 0) return 1;
1264
+ const matchingTerms = queryTerms.filter((term) => urlPath.includes(term));
1265
+ if (matchingTerms.length === 0) return 1;
1266
+ const matchRatio = matchingTerms.length / queryTerms.length;
1267
+ return 1 + 1 * matchRatio;
1268
+ }
1269
+ /**
1270
+ * Get a score multiplier based on file path keyword matching.
1271
+ * Boosts results where file path contains significant query keywords.
1272
+ * This helps queries like "dispatcher" rank async_dispatcher.py higher.
1273
+ */
1274
+ getPathKeywordBoost(query, result) {
1275
+ const path3 = result.metadata.path;
1276
+ if (path3 === void 0 || path3 === "") return 1;
1277
+ const pathSegments = path3.toLowerCase().replace(/[^a-z0-9]+/g, " ");
1278
+ const stopWords = /* @__PURE__ */ new Set([
1279
+ "how",
1280
+ "to",
1281
+ "the",
1282
+ "a",
1283
+ "an",
1284
+ "is",
1285
+ "are",
1286
+ "what",
1287
+ "why",
1288
+ "when",
1289
+ "where",
1290
+ "can",
1291
+ "do",
1292
+ "does",
1293
+ "i",
1294
+ "my",
1295
+ "your",
1296
+ "it",
1297
+ "in",
1298
+ "on",
1299
+ "for",
1300
+ "with",
1301
+ "this",
1302
+ "that",
1303
+ "get",
1304
+ "use",
1305
+ "using"
1306
+ ]);
1307
+ const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2 && !stopWords.has(t2));
1308
+ if (queryTerms.length === 0) return 1;
1309
+ const matchingTerms = queryTerms.filter((term) => pathSegments.includes(term));
1310
+ if (matchingTerms.length === 0) return 1;
1311
+ const matchRatio = matchingTerms.length / queryTerms.length;
1312
+ return 1 + 1 * matchRatio;
1036
1313
  }
1037
1314
  /**
1038
1315
  * Get a score multiplier based on framework context.
@@ -1331,17 +1608,31 @@ var SearchService = class {
1331
1608
 
1332
1609
  // src/services/index.service.ts
1333
1610
  import { readFile as readFile3, readdir } from "fs/promises";
1334
- import { join as join3, extname, basename } from "path";
1335
- import { createHash } from "crypto";
1611
+ import { join as join5, extname, basename } from "path";
1612
+ import { createHash as createHash2 } from "crypto";
1336
1613
 
1337
1614
  // src/services/chunking.service.ts
1338
- var ChunkingService = class {
1615
+ var CHUNK_PRESETS = {
1616
+ code: { chunkSize: 768, chunkOverlap: 100 },
1617
+ web: { chunkSize: 1200, chunkOverlap: 200 },
1618
+ docs: { chunkSize: 1200, chunkOverlap: 200 }
1619
+ };
1620
+ var ChunkingService = class _ChunkingService {
1339
1621
  chunkSize;
1340
1622
  chunkOverlap;
1341
1623
  constructor(config) {
1342
1624
  this.chunkSize = config.chunkSize;
1343
1625
  this.chunkOverlap = config.chunkOverlap;
1344
1626
  }
1627
+ /**
1628
+ * Create a ChunkingService with preset configuration for a content type.
1629
+ * - 'code': Smaller chunks (768/100) for precise code symbol matching
1630
+ * - 'web': Larger chunks (1200/200) for web prose content
1631
+ * - 'docs': Larger chunks (1200/200) for documentation
1632
+ */
1633
+ static forContentType(type) {
1634
+ return new _ChunkingService(CHUNK_PRESETS[type]);
1635
+ }
1345
1636
  /**
1346
1637
  * Chunk text content. Uses semantic chunking for Markdown and code files,
1347
1638
  * falling back to sliding window for other content.
@@ -1596,6 +1887,7 @@ var ChunkingService = class {
1596
1887
  };
1597
1888
 
1598
1889
  // src/services/index.service.ts
1890
+ var logger2 = createLogger("index-service");
1599
1891
  var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
1600
1892
  ".txt",
1601
1893
  ".md",
@@ -1640,12 +1932,22 @@ var IndexService = class {
1640
1932
  this.codeGraphService = options.codeGraphService;
1641
1933
  }
1642
1934
  async indexStore(store, onProgress) {
1935
+ logger2.info({
1936
+ storeId: store.id,
1937
+ storeName: store.name,
1938
+ storeType: store.type
1939
+ }, "Starting store indexing");
1643
1940
  try {
1644
1941
  if (store.type === "file" || store.type === "repo") {
1645
1942
  return await this.indexFileStore(store, onProgress);
1646
1943
  }
1944
+ logger2.error({ storeId: store.id, storeType: store.type }, "Unsupported store type for indexing");
1647
1945
  return err(new Error(`Indexing not supported for store type: ${store.type}`));
1648
1946
  } catch (error) {
1947
+ logger2.error({
1948
+ storeId: store.id,
1949
+ error: error instanceof Error ? error.message : String(error)
1950
+ }, "Store indexing failed");
1649
1951
  return err(error instanceof Error ? error : new Error(String(error)));
1650
1952
  }
1651
1953
  }
@@ -1654,6 +1956,11 @@ var IndexService = class {
1654
1956
  const files = await this.scanDirectory(store.path);
1655
1957
  const documents = [];
1656
1958
  let filesProcessed = 0;
1959
+ logger2.debug({
1960
+ storeId: store.id,
1961
+ path: store.path,
1962
+ fileCount: files.length
1963
+ }, "Files scanned for indexing");
1657
1964
  const sourceFiles = [];
1658
1965
  onProgress?.({
1659
1966
  type: "start",
@@ -1663,7 +1970,7 @@ var IndexService = class {
1663
1970
  });
1664
1971
  for (const filePath of files) {
1665
1972
  const content = await readFile3(filePath, "utf-8");
1666
- const fileHash = createHash("md5").update(content).digest("hex");
1973
+ const fileHash = createHash2("md5").update(content).digest("hex");
1667
1974
  const chunks = this.chunker.chunk(content, filePath);
1668
1975
  const ext = extname(filePath).toLowerCase();
1669
1976
  const fileName = basename(filePath).toLowerCase();
@@ -1717,17 +2024,26 @@ var IndexService = class {
1717
2024
  total: files.length,
1718
2025
  message: "Indexing complete"
1719
2026
  });
2027
+ const timeMs = Date.now() - startTime;
2028
+ logger2.info({
2029
+ storeId: store.id,
2030
+ storeName: store.name,
2031
+ documentsIndexed: filesProcessed,
2032
+ chunksCreated: documents.length,
2033
+ sourceFilesForGraph: sourceFiles.length,
2034
+ timeMs
2035
+ }, "Store indexing complete");
1720
2036
  return ok({
1721
2037
  documentsIndexed: filesProcessed,
1722
2038
  chunksCreated: documents.length,
1723
- timeMs: Date.now() - startTime
2039
+ timeMs
1724
2040
  });
1725
2041
  }
1726
2042
  async scanDirectory(dir) {
1727
2043
  const files = [];
1728
2044
  const entries = await readdir(dir, { withFileTypes: true });
1729
2045
  for (const entry of entries) {
1730
- const fullPath = join3(dir, entry.name);
2046
+ const fullPath = join5(dir, entry.name);
1731
2047
  if (entry.isDirectory()) {
1732
2048
  if (!["node_modules", ".git", "dist", "build"].includes(entry.name)) {
1733
2049
  files.push(...await this.scanDirectory(fullPath));
@@ -1798,10 +2114,33 @@ var IndexService = class {
1798
2114
  return false;
1799
2115
  }
1800
2116
  };
2117
+ function classifyWebContentType(url, title) {
2118
+ const urlLower = url.toLowerCase();
2119
+ const titleLower = (title ?? "").toLowerCase();
2120
+ if (/\/api[-/]?(ref|reference|docs?)?\//i.test(urlLower) || /api\s*(reference|documentation)/i.test(titleLower)) {
2121
+ return "documentation-primary";
2122
+ }
2123
+ if (/\/(getting[-_]?started|quickstart|tutorial|setup)\b/i.test(urlLower) || /(getting started|quickstart|tutorial)/i.test(titleLower)) {
2124
+ return "documentation-primary";
2125
+ }
2126
+ if (/\/(docs?|documentation|reference|learn|manual|guide)/i.test(urlLower)) {
2127
+ return "documentation";
2128
+ }
2129
+ if (/\/(examples?|demos?|samples?|cookbook)/i.test(urlLower)) {
2130
+ return "example";
2131
+ }
2132
+ if (/changelog|release[-_]?notes/i.test(urlLower)) {
2133
+ return "changelog";
2134
+ }
2135
+ if (/\/blog\//i.test(urlLower)) {
2136
+ return "other";
2137
+ }
2138
+ return "documentation";
2139
+ }
1801
2140
 
1802
2141
  // src/services/code-graph.service.ts
1803
2142
  import { readFile as readFile4, writeFile as writeFile3, mkdir as mkdir4 } from "fs/promises";
1804
- import { join as join4, dirname as dirname3 } from "path";
2143
+ import { join as join6, dirname as dirname3 } from "path";
1805
2144
 
1806
2145
  // src/analysis/code-graph.ts
1807
2146
  var CodeGraph = class {
@@ -3118,7 +3457,7 @@ var CodeGraphService = class {
3118
3457
  this.graphCache.clear();
3119
3458
  }
3120
3459
  getGraphPath(storeId) {
3121
- return join4(this.dataDir, "graphs", `${storeId}.json`);
3460
+ return join6(this.dataDir, "graphs", `${storeId}.json`);
3122
3461
  }
3123
3462
  /**
3124
3463
  * Type guard for SerializedGraph structure.
@@ -3273,9 +3612,9 @@ var LanceStore = class {
3273
3612
 
3274
3613
  // src/db/embeddings.ts
3275
3614
  import { pipeline, env } from "@huggingface/transformers";
3276
- import { homedir as homedir2 } from "os";
3277
- import { join as join5 } from "path";
3278
- env.cacheDir = join5(homedir2(), ".cache", "huggingface-transformers");
3615
+ import { homedir as homedir3 } from "os";
3616
+ import { join as join7 } from "path";
3617
+ env.cacheDir = join7(homedir3(), ".cache", "huggingface-transformers");
3279
3618
  var EmbeddingEngine = class {
3280
3619
  extractor = null;
3281
3620
  modelName;
@@ -3398,25 +3737,27 @@ function validateParsePythonResult(data) {
3398
3737
  }
3399
3738
 
3400
3739
  // src/crawl/bridge.ts
3740
+ var logger3 = createLogger("python-bridge");
3401
3741
  var PythonBridge = class {
3402
3742
  process = null;
3403
3743
  pending = /* @__PURE__ */ new Map();
3404
3744
  stoppingIntentionally = false;
3405
3745
  start() {
3406
3746
  if (this.process) return Promise.resolve();
3747
+ logger3.debug("Starting Python bridge process");
3407
3748
  this.process = spawn2("python3", ["python/crawl_worker.py"], {
3408
3749
  stdio: ["pipe", "pipe", "pipe"]
3409
3750
  });
3410
3751
  this.process.on("error", (err2) => {
3411
- console.error("Python bridge process error:", err2);
3752
+ logger3.error({ error: err2.message, stack: err2.stack }, "Python bridge process error");
3412
3753
  this.rejectAllPending(new Error(`Process error: ${err2.message}`));
3413
3754
  });
3414
3755
  this.process.on("exit", (code, signal) => {
3415
3756
  if (code !== 0 && code !== null) {
3416
- console.error(`Python bridge process exited with code ${String(code)}`);
3757
+ logger3.error({ code }, "Python bridge process exited with non-zero code");
3417
3758
  this.rejectAllPending(new Error(`Process exited with code ${String(code)}`));
3418
3759
  } else if (signal && !this.stoppingIntentionally) {
3419
- console.error(`Python bridge process killed with signal ${signal}`);
3760
+ logger3.error({ signal }, "Python bridge process killed with signal");
3420
3761
  this.rejectAllPending(new Error(`Process killed with signal ${signal}`));
3421
3762
  }
3422
3763
  this.process = null;
@@ -3425,7 +3766,7 @@ var PythonBridge = class {
3425
3766
  if (this.process.stderr) {
3426
3767
  const stderrRl = createInterface({ input: this.process.stderr });
3427
3768
  stderrRl.on("line", (line) => {
3428
- console.error("Python bridge stderr:", line);
3769
+ logger3.warn({ stderr: line }, "Python bridge stderr output");
3429
3770
  });
3430
3771
  }
3431
3772
  if (this.process.stdout === null) {
@@ -3460,18 +3801,24 @@ var PythonBridge = class {
3460
3801
  pending.resolve(validated);
3461
3802
  } catch (error) {
3462
3803
  if (error instanceof ZodError) {
3463
- console.error("Python bridge response validation failed:", error.issues);
3464
- console.error("Original response:", JSON.stringify(response.result));
3804
+ logger3.error({
3805
+ issues: error.issues,
3806
+ response: JSON.stringify(response.result)
3807
+ }, "Python bridge response validation failed");
3465
3808
  pending.reject(new Error(`Invalid response format from Python bridge: ${error.message}`));
3466
3809
  } else {
3467
3810
  const errorMessage = error instanceof Error ? error.message : String(error);
3811
+ logger3.error({ error: errorMessage }, "Response validation error");
3468
3812
  pending.reject(new Error(`Response validation error: ${errorMessage}`));
3469
3813
  }
3470
3814
  }
3471
3815
  }
3472
3816
  }
3473
3817
  } catch (err2) {
3474
- console.error("Failed to parse JSON response from Python bridge:", err2, "Line:", line);
3818
+ logger3.error({
3819
+ error: err2 instanceof Error ? err2.message : String(err2),
3820
+ line
3821
+ }, "Failed to parse JSON response from Python bridge");
3475
3822
  }
3476
3823
  });
3477
3824
  return Promise.resolve();
@@ -3570,7 +3917,9 @@ var PythonBridge = class {
3570
3917
  };
3571
3918
 
3572
3919
  // src/services/index.ts
3920
+ var logger4 = createLogger("services");
3573
3921
  async function createServices(configPath, dataDir, projectRoot) {
3922
+ logger4.info({ configPath, dataDir, projectRoot }, "Initializing services");
3574
3923
  const config = new ConfigService(configPath, dataDir, projectRoot);
3575
3924
  const appConfig = await config.load();
3576
3925
  const resolvedDataDir = config.resolveDataDir();
@@ -3585,8 +3934,9 @@ async function createServices(configPath, dataDir, projectRoot) {
3585
3934
  const pythonBridge = new PythonBridge();
3586
3935
  await pythonBridge.start();
3587
3936
  const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
3588
- const search = new SearchService(lance, embeddings, void 0, codeGraph);
3937
+ const search = new SearchService(lance, embeddings, codeGraph);
3589
3938
  const index = new IndexService(lance, embeddings, { codeGraphService: codeGraph });
3939
+ logger4.info({ dataDir: resolvedDataDir }, "Services initialized successfully");
3590
3940
  return {
3591
3941
  config,
3592
3942
  store,
@@ -3599,7 +3949,9 @@ async function createServices(configPath, dataDir, projectRoot) {
3599
3949
  };
3600
3950
  }
3601
3951
  async function destroyServices(services) {
3952
+ logger4.info("Shutting down services");
3602
3953
  await services.pythonBridge.stop();
3954
+ await shutdownLogger();
3603
3955
  }
3604
3956
 
3605
3957
  export {
@@ -3608,10 +3960,15 @@ export {
3608
3960
  ok,
3609
3961
  err,
3610
3962
  extractRepoName,
3963
+ createLogger,
3964
+ summarizePayload,
3965
+ truncateForLog,
3966
+ ChunkingService,
3967
+ classifyWebContentType,
3611
3968
  ASTParser,
3612
3969
  PythonBridge,
3613
3970
  JobService,
3614
3971
  createServices,
3615
3972
  destroyServices
3616
3973
  };
3617
- //# sourceMappingURL=chunk-5QMHZUC4.js.map
3974
+ //# sourceMappingURL=chunk-NJUMU4X2.js.map