bluera-knowledge 0.9.26 → 0.9.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/commit.md +4 -7
- package/.claude/hooks/post-edit-check.sh +21 -24
- package/.claude/skills/atomic-commits/SKILL.md +6 -0
- package/.claude-plugin/plugin.json +1 -1
- package/.env.example +4 -0
- package/.husky/pre-push +12 -2
- package/.versionrc.json +0 -4
- package/BUGS-FOUND.md +71 -0
- package/CHANGELOG.md +76 -0
- package/README.md +55 -20
- package/bun.lock +35 -1
- package/commands/crawl.md +2 -0
- package/dist/{chunk-BICFAWMN.js → chunk-2SJHNRXD.js} +73 -8
- package/dist/chunk-2SJHNRXD.js.map +1 -0
- package/dist/{chunk-J7J6LXOJ.js → chunk-OGEY66FZ.js} +106 -41
- package/dist/chunk-OGEY66FZ.js.map +1 -0
- package/dist/{chunk-5QMHZUC4.js → chunk-RWSXP3PQ.js} +482 -106
- package/dist/chunk-RWSXP3PQ.js.map +1 -0
- package/dist/index.js +73 -28
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/eslint.config.js +1 -1
- package/package.json +3 -1
- package/src/analysis/ast-parser.test.ts +46 -0
- package/src/cli/commands/crawl.test.ts +99 -12
- package/src/cli/commands/crawl.ts +76 -24
- package/src/cli/commands/store.test.ts +68 -1
- package/src/cli/commands/store.ts +9 -3
- package/src/crawl/article-converter.ts +36 -1
- package/src/crawl/bridge.ts +18 -7
- package/src/crawl/intelligent-crawler.ts +45 -4
- package/src/db/embeddings.test.ts +16 -0
- package/src/db/lance.test.ts +31 -0
- package/src/db/lance.ts +8 -0
- package/src/logging/index.ts +29 -0
- package/src/logging/logger.test.ts +75 -0
- package/src/logging/logger.ts +147 -0
- package/src/logging/payload.test.ts +152 -0
- package/src/logging/payload.ts +121 -0
- package/src/mcp/handlers/search.handler.test.ts +28 -9
- package/src/mcp/handlers/search.handler.ts +69 -29
- package/src/mcp/handlers/store.handler.test.ts +1 -0
- package/src/mcp/server.ts +44 -16
- package/src/services/chunking.service.ts +23 -0
- package/src/services/index.service.test.ts +921 -1
- package/src/services/index.service.ts +76 -1
- package/src/services/index.ts +20 -2
- package/src/services/search.service.test.ts +573 -21
- package/src/services/search.service.ts +257 -105
- package/src/services/services.test.ts +2 -2
- package/src/services/snippet.service.ts +28 -3
- package/src/services/store.service.test.ts +28 -0
- package/src/services/store.service.ts +4 -0
- package/src/services/token.service.test.ts +45 -0
- package/src/services/token.service.ts +33 -0
- package/src/types/result.test.ts +10 -0
- package/tests/integration/cli-consistency.test.ts +1 -4
- package/vitest.config.ts +4 -0
- package/dist/chunk-5QMHZUC4.js.map +0 -1
- package/dist/chunk-BICFAWMN.js.map +0 -1
- package/dist/chunk-J7J6LXOJ.js.map +0 -1
- package/scripts/readme-version-updater.cjs +0 -18
|
@@ -1,3 +1,142 @@
|
|
|
1
|
+
// src/logging/logger.ts
|
|
2
|
+
import pino from "pino";
|
|
3
|
+
import { homedir } from "os";
|
|
4
|
+
import { mkdirSync, existsSync } from "fs";
|
|
5
|
+
import { join } from "path";
|
|
6
|
+
var VALID_LEVELS = ["trace", "debug", "info", "warn", "error", "fatal"];
|
|
7
|
+
var VALID_LEVELS_SET = new Set(VALID_LEVELS);
|
|
8
|
+
function getLogDir() {
|
|
9
|
+
return join(homedir(), ".bluera", "bluera-knowledge", "logs");
|
|
10
|
+
}
|
|
11
|
+
function ensureLogDir() {
|
|
12
|
+
const logDir = getLogDir();
|
|
13
|
+
if (!existsSync(logDir)) {
|
|
14
|
+
mkdirSync(logDir, { recursive: true });
|
|
15
|
+
}
|
|
16
|
+
return logDir;
|
|
17
|
+
}
|
|
18
|
+
function isValidLogLevel(level) {
|
|
19
|
+
return VALID_LEVELS_SET.has(level);
|
|
20
|
+
}
|
|
21
|
+
function getLogLevel() {
|
|
22
|
+
const level = process.env["LOG_LEVEL"]?.toLowerCase();
|
|
23
|
+
if (level === void 0 || level === "") {
|
|
24
|
+
return "info";
|
|
25
|
+
}
|
|
26
|
+
if (!isValidLogLevel(level)) {
|
|
27
|
+
throw new Error(
|
|
28
|
+
`Invalid LOG_LEVEL: "${level}". Valid values: ${VALID_LEVELS.join(", ")}`
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
return level;
|
|
32
|
+
}
|
|
33
|
+
var rootLogger = null;
|
|
34
|
+
function initializeLogger() {
|
|
35
|
+
if (rootLogger !== null) {
|
|
36
|
+
return rootLogger;
|
|
37
|
+
}
|
|
38
|
+
const logDir = ensureLogDir();
|
|
39
|
+
const logFile = join(logDir, "app.log");
|
|
40
|
+
const level = getLogLevel();
|
|
41
|
+
const options = {
|
|
42
|
+
level,
|
|
43
|
+
timestamp: pino.stdTimeFunctions.isoTime,
|
|
44
|
+
formatters: {
|
|
45
|
+
level: (label) => ({ level: label })
|
|
46
|
+
},
|
|
47
|
+
transport: {
|
|
48
|
+
target: "pino-roll",
|
|
49
|
+
options: {
|
|
50
|
+
file: logFile,
|
|
51
|
+
size: "10m",
|
|
52
|
+
// 10MB rotation
|
|
53
|
+
limit: { count: 5 },
|
|
54
|
+
// Keep 5 rotated files
|
|
55
|
+
mkdir: true
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
rootLogger = pino(options);
|
|
60
|
+
return rootLogger;
|
|
61
|
+
}
|
|
62
|
+
function createLogger(module) {
|
|
63
|
+
const root = initializeLogger();
|
|
64
|
+
return root.child({ module });
|
|
65
|
+
}
|
|
66
|
+
function isLevelEnabled(level) {
|
|
67
|
+
const currentLevel = getLogLevel();
|
|
68
|
+
const currentIndex = VALID_LEVELS.indexOf(currentLevel);
|
|
69
|
+
const checkIndex = VALID_LEVELS.indexOf(level);
|
|
70
|
+
return checkIndex >= currentIndex;
|
|
71
|
+
}
|
|
72
|
+
function getLogDirectory() {
|
|
73
|
+
return getLogDir();
|
|
74
|
+
}
|
|
75
|
+
function shutdownLogger() {
|
|
76
|
+
return new Promise((resolve3) => {
|
|
77
|
+
if (rootLogger !== null) {
|
|
78
|
+
rootLogger.flush();
|
|
79
|
+
setTimeout(() => {
|
|
80
|
+
rootLogger = null;
|
|
81
|
+
resolve3();
|
|
82
|
+
}, 100);
|
|
83
|
+
} else {
|
|
84
|
+
resolve3();
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// src/logging/payload.ts
|
|
90
|
+
import { writeFileSync, mkdirSync as mkdirSync2, existsSync as existsSync2 } from "fs";
|
|
91
|
+
import { join as join2 } from "path";
|
|
92
|
+
import { createHash } from "crypto";
|
|
93
|
+
var MAX_PREVIEW_LENGTH = 500;
|
|
94
|
+
var PAYLOAD_DUMP_THRESHOLD = 1e4;
|
|
95
|
+
function getPayloadDir() {
|
|
96
|
+
const dir = join2(getLogDirectory(), "payload");
|
|
97
|
+
if (!existsSync2(dir)) {
|
|
98
|
+
mkdirSync2(dir, { recursive: true });
|
|
99
|
+
}
|
|
100
|
+
return dir;
|
|
101
|
+
}
|
|
102
|
+
function safeFilename(identifier) {
|
|
103
|
+
return identifier.replace(/[^a-zA-Z0-9-]/g, "_").substring(0, 50);
|
|
104
|
+
}
|
|
105
|
+
function summarizePayload(content, type, identifier, dumpFull = isLevelEnabled("trace")) {
|
|
106
|
+
const sizeBytes = Buffer.byteLength(content, "utf8");
|
|
107
|
+
const hash = createHash("md5").update(content).digest("hex").substring(0, 12);
|
|
108
|
+
const preview = truncateForLog(content, MAX_PREVIEW_LENGTH);
|
|
109
|
+
const baseSummary = { preview, sizeBytes, hash };
|
|
110
|
+
if (dumpFull && sizeBytes > PAYLOAD_DUMP_THRESHOLD) {
|
|
111
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
112
|
+
const safeId = safeFilename(identifier);
|
|
113
|
+
const filename = `${timestamp}-${type}-${safeId}-${hash}.json`;
|
|
114
|
+
const filepath = join2(getPayloadDir(), filename);
|
|
115
|
+
writeFileSync(
|
|
116
|
+
filepath,
|
|
117
|
+
JSON.stringify(
|
|
118
|
+
{
|
|
119
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
120
|
+
type,
|
|
121
|
+
identifier,
|
|
122
|
+
sizeBytes,
|
|
123
|
+
content
|
|
124
|
+
},
|
|
125
|
+
null,
|
|
126
|
+
2
|
|
127
|
+
)
|
|
128
|
+
);
|
|
129
|
+
return { ...baseSummary, payloadFile: filename };
|
|
130
|
+
}
|
|
131
|
+
return baseSummary;
|
|
132
|
+
}
|
|
133
|
+
function truncateForLog(content, maxLength = MAX_PREVIEW_LENGTH) {
|
|
134
|
+
if (content.length <= maxLength) {
|
|
135
|
+
return content;
|
|
136
|
+
}
|
|
137
|
+
return content.substring(0, maxLength) + "... [truncated]";
|
|
138
|
+
}
|
|
139
|
+
|
|
1
140
|
// src/services/job.service.ts
|
|
2
141
|
import fs from "fs";
|
|
3
142
|
import path from "path";
|
|
@@ -200,7 +339,7 @@ var JobService = class {
|
|
|
200
339
|
// src/services/config.service.ts
|
|
201
340
|
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
202
341
|
import { dirname as dirname2, resolve } from "path";
|
|
203
|
-
import { homedir } from "os";
|
|
342
|
+
import { homedir as homedir2 } from "os";
|
|
204
343
|
|
|
205
344
|
// src/types/config.ts
|
|
206
345
|
var DEFAULT_CONFIG = {
|
|
@@ -239,8 +378,8 @@ var DEFAULT_CONFIG = {
|
|
|
239
378
|
};
|
|
240
379
|
|
|
241
380
|
// src/services/project-root.service.ts
|
|
242
|
-
import { existsSync, statSync, realpathSync } from "fs";
|
|
243
|
-
import { dirname, join, normalize, sep } from "path";
|
|
381
|
+
import { existsSync as existsSync3, statSync, realpathSync } from "fs";
|
|
382
|
+
import { dirname, join as join3, normalize, sep } from "path";
|
|
244
383
|
var ProjectRootService = class {
|
|
245
384
|
/**
|
|
246
385
|
* Resolve project root directory using hierarchical detection.
|
|
@@ -270,8 +409,8 @@ var ProjectRootService = class {
|
|
|
270
409
|
let currentPath = normalize(startPath);
|
|
271
410
|
const root = normalize(sep);
|
|
272
411
|
while (currentPath !== root) {
|
|
273
|
-
const gitPath =
|
|
274
|
-
if (
|
|
412
|
+
const gitPath = join3(currentPath, ".git");
|
|
413
|
+
if (existsSync3(gitPath)) {
|
|
275
414
|
try {
|
|
276
415
|
const stats = statSync(gitPath);
|
|
277
416
|
if (stats.isDirectory() || stats.isFile()) {
|
|
@@ -317,7 +456,7 @@ var ConfigService = class {
|
|
|
317
456
|
configPath;
|
|
318
457
|
dataDir;
|
|
319
458
|
config = null;
|
|
320
|
-
constructor(configPath = `${
|
|
459
|
+
constructor(configPath = `${homedir2()}/.bluera/bluera-knowledge/config.json`, dataDir, projectRoot) {
|
|
321
460
|
this.configPath = configPath;
|
|
322
461
|
if (dataDir !== void 0 && dataDir !== "") {
|
|
323
462
|
this.dataDir = dataDir;
|
|
@@ -348,7 +487,7 @@ var ConfigService = class {
|
|
|
348
487
|
}
|
|
349
488
|
expandPath(path3, baseDir) {
|
|
350
489
|
if (path3.startsWith("~")) {
|
|
351
|
-
return path3.replace("~",
|
|
490
|
+
return path3.replace("~", homedir2());
|
|
352
491
|
}
|
|
353
492
|
if (!path3.startsWith("/")) {
|
|
354
493
|
return resolve(baseDir, path3);
|
|
@@ -359,7 +498,7 @@ var ConfigService = class {
|
|
|
359
498
|
|
|
360
499
|
// src/services/store.service.ts
|
|
361
500
|
import { readFile as readFile2, writeFile as writeFile2, mkdir as mkdir3, stat } from "fs/promises";
|
|
362
|
-
import { join as
|
|
501
|
+
import { join as join4, resolve as resolve2 } from "path";
|
|
363
502
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
364
503
|
|
|
365
504
|
// src/types/brands.ts
|
|
@@ -430,6 +569,9 @@ var StoreService = class {
|
|
|
430
569
|
await this.loadRegistry();
|
|
431
570
|
}
|
|
432
571
|
async create(input) {
|
|
572
|
+
if (!input.name || input.name.trim() === "") {
|
|
573
|
+
return err(new Error("Store name cannot be empty"));
|
|
574
|
+
}
|
|
433
575
|
const existing = await this.getByName(input.name);
|
|
434
576
|
if (existing !== void 0) {
|
|
435
577
|
return err(new Error(`Store with name "${input.name}" already exists`));
|
|
@@ -467,7 +609,7 @@ var StoreService = class {
|
|
|
467
609
|
case "repo": {
|
|
468
610
|
let repoPath = input.path;
|
|
469
611
|
if (input.url !== void 0) {
|
|
470
|
-
const cloneDir =
|
|
612
|
+
const cloneDir = join4(this.dataDir, "repos", id);
|
|
471
613
|
const result = await cloneRepository({
|
|
472
614
|
url: input.url,
|
|
473
615
|
targetDir: cloneDir,
|
|
@@ -563,7 +705,7 @@ var StoreService = class {
|
|
|
563
705
|
return ok(void 0);
|
|
564
706
|
}
|
|
565
707
|
async loadRegistry() {
|
|
566
|
-
const registryPath =
|
|
708
|
+
const registryPath = join4(this.dataDir, "stores.json");
|
|
567
709
|
try {
|
|
568
710
|
const content = await readFile2(registryPath, "utf-8");
|
|
569
711
|
const data = JSON.parse(content);
|
|
@@ -580,7 +722,7 @@ var StoreService = class {
|
|
|
580
722
|
}
|
|
581
723
|
}
|
|
582
724
|
async saveRegistry() {
|
|
583
|
-
const registryPath =
|
|
725
|
+
const registryPath = join4(this.dataDir, "stores.json");
|
|
584
726
|
await writeFile2(registryPath, JSON.stringify(this.registry, null, 2));
|
|
585
727
|
}
|
|
586
728
|
};
|
|
@@ -659,6 +801,7 @@ var CodeUnitService = class {
|
|
|
659
801
|
};
|
|
660
802
|
|
|
661
803
|
// src/services/search.service.ts
|
|
804
|
+
var logger = createLogger("search-service");
|
|
662
805
|
var INTENT_FILE_BOOSTS = {
|
|
663
806
|
"how-to": {
|
|
664
807
|
"documentation-primary": 1.3,
|
|
@@ -729,79 +872,92 @@ var FRAMEWORK_PATTERNS = [
|
|
|
729
872
|
{ pattern: /\btypescript\b/i, terms: ["typescript", "ts"] },
|
|
730
873
|
{ pattern: /\bjwt\b/i, terms: ["jwt", "jsonwebtoken", "json-web-token"] }
|
|
731
874
|
];
|
|
732
|
-
|
|
875
|
+
var HOW_TO_PATTERNS = [
|
|
876
|
+
/how (do|can|should|would) (i|you|we)/i,
|
|
877
|
+
/how to\b/i,
|
|
878
|
+
/what('s| is) the (best |right |correct )?(way|approach) to/i,
|
|
879
|
+
/i (need|want|have) to/i,
|
|
880
|
+
/show me how/i,
|
|
881
|
+
/\bwhat's the syntax\b/i,
|
|
882
|
+
/\bhow do i (use|create|make|set up|configure|implement|add|get)\b/i,
|
|
883
|
+
/\bi'm (trying|building|creating|making)\b/i
|
|
884
|
+
];
|
|
885
|
+
var IMPLEMENTATION_PATTERNS = [
|
|
886
|
+
/how (does|is) .* (implemented|work internally)/i,
|
|
887
|
+
/\binternal(ly)?\b/i,
|
|
888
|
+
/\bsource code\b/i,
|
|
889
|
+
/\bunder the hood\b/i,
|
|
890
|
+
/\bimplementation (of|details?)\b/i
|
|
891
|
+
];
|
|
892
|
+
var COMPARISON_PATTERNS = [
|
|
893
|
+
/\b(vs\.?|versus)\b/i,
|
|
894
|
+
/\bdifference(s)? between\b/i,
|
|
895
|
+
/\bcompare\b/i,
|
|
896
|
+
/\bshould (i|we) use .* or\b/i,
|
|
897
|
+
/\bwhat's the difference\b/i,
|
|
898
|
+
/\bwhich (one|is better)\b/i,
|
|
899
|
+
/\bwhen (should|to) use\b/i
|
|
900
|
+
];
|
|
901
|
+
var DEBUGGING_PATTERNS = [
|
|
902
|
+
/\b(error|bug|issue|problem|crash|fail|broken|wrong)\b/i,
|
|
903
|
+
/\bdoesn't (work|compile|run)\b/i,
|
|
904
|
+
/\bisn't (working|updating|rendering)\b/i,
|
|
905
|
+
/\bwhy (is|does|doesn't|isn't)\b/i,
|
|
906
|
+
/\bwhat('s| is) (wrong|happening|going on)\b/i,
|
|
907
|
+
/\bwhat am i doing wrong\b/i,
|
|
908
|
+
/\bnot (working|updating|showing)\b/i,
|
|
909
|
+
/\bhow do i (fix|debug|solve|resolve)\b/i
|
|
910
|
+
];
|
|
911
|
+
var CONCEPTUAL_PATTERNS = [
|
|
912
|
+
/\bwhat (is|are)\b/i,
|
|
913
|
+
/\bexplain\b/i,
|
|
914
|
+
/\bwhat does .* (mean|do)\b/i,
|
|
915
|
+
/\bhow does .* work\b/i,
|
|
916
|
+
/\bwhat('s| is) the (purpose|point|idea)\b/i
|
|
917
|
+
];
|
|
918
|
+
function classifyQueryIntents(query) {
|
|
733
919
|
const q = query.toLowerCase();
|
|
734
|
-
const
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
/\bwhat('s| is) (wrong|happening|going on)\b/i,
|
|
766
|
-
/\bwhat am i doing wrong\b/i,
|
|
767
|
-
/\bnot (working|updating|showing)\b/i,
|
|
768
|
-
/\bhow do i (fix|debug|solve|resolve)\b/i
|
|
769
|
-
];
|
|
770
|
-
const conceptualPatterns = [
|
|
771
|
-
/\bwhat (is|are)\b/i,
|
|
772
|
-
/\bexplain\b/i,
|
|
773
|
-
/\bwhat does .* (mean|do)\b/i,
|
|
774
|
-
/\bhow does .* work\b/i,
|
|
775
|
-
/\bwhat('s| is) the (purpose|point|idea)\b/i
|
|
776
|
-
];
|
|
777
|
-
if (implementationPatterns.some((p) => p.test(q))) {
|
|
778
|
-
return "implementation";
|
|
779
|
-
}
|
|
780
|
-
if (debuggingPatterns.some((p) => p.test(q))) {
|
|
781
|
-
return "debugging";
|
|
782
|
-
}
|
|
783
|
-
if (comparisonPatterns.some((p) => p.test(q))) {
|
|
784
|
-
return "comparison";
|
|
785
|
-
}
|
|
786
|
-
if (howToPatterns.some((p) => p.test(q))) {
|
|
787
|
-
return "how-to";
|
|
788
|
-
}
|
|
789
|
-
if (conceptualPatterns.some((p) => p.test(q))) {
|
|
790
|
-
return "conceptual";
|
|
791
|
-
}
|
|
792
|
-
return "how-to";
|
|
920
|
+
const intents = [];
|
|
921
|
+
if (IMPLEMENTATION_PATTERNS.some((p) => p.test(q))) {
|
|
922
|
+
intents.push({ intent: "implementation", confidence: 0.9 });
|
|
923
|
+
}
|
|
924
|
+
if (DEBUGGING_PATTERNS.some((p) => p.test(q))) {
|
|
925
|
+
intents.push({ intent: "debugging", confidence: 0.85 });
|
|
926
|
+
}
|
|
927
|
+
if (COMPARISON_PATTERNS.some((p) => p.test(q))) {
|
|
928
|
+
intents.push({ intent: "comparison", confidence: 0.8 });
|
|
929
|
+
}
|
|
930
|
+
if (HOW_TO_PATTERNS.some((p) => p.test(q))) {
|
|
931
|
+
intents.push({ intent: "how-to", confidence: 0.75 });
|
|
932
|
+
}
|
|
933
|
+
if (CONCEPTUAL_PATTERNS.some((p) => p.test(q))) {
|
|
934
|
+
intents.push({ intent: "conceptual", confidence: 0.7 });
|
|
935
|
+
}
|
|
936
|
+
if (intents.length === 0) {
|
|
937
|
+
intents.push({ intent: "how-to", confidence: 0.5 });
|
|
938
|
+
}
|
|
939
|
+
return intents.sort((a, b) => b.confidence - a.confidence);
|
|
940
|
+
}
|
|
941
|
+
function getPrimaryIntent(intents) {
|
|
942
|
+
return intents[0]?.intent ?? "how-to";
|
|
943
|
+
}
|
|
944
|
+
var RRF_PRESETS = {
|
|
945
|
+
code: { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 },
|
|
946
|
+
web: { k: 30, vectorWeight: 0.55, ftsWeight: 0.45 }
|
|
947
|
+
};
|
|
948
|
+
function detectContentType(results) {
|
|
949
|
+
const webCount = results.filter((r) => "url" in r.metadata).length;
|
|
950
|
+
return webCount > results.length / 2 ? "web" : "code";
|
|
793
951
|
}
|
|
794
952
|
var SearchService = class {
|
|
795
953
|
lanceStore;
|
|
796
954
|
embeddingEngine;
|
|
797
|
-
rrfConfig;
|
|
798
955
|
codeUnitService;
|
|
799
956
|
codeGraphService;
|
|
800
957
|
graphCache;
|
|
801
|
-
constructor(lanceStore, embeddingEngine,
|
|
958
|
+
constructor(lanceStore, embeddingEngine, codeGraphService) {
|
|
802
959
|
this.lanceStore = lanceStore;
|
|
803
960
|
this.embeddingEngine = embeddingEngine;
|
|
804
|
-
this.rrfConfig = rrfConfig;
|
|
805
961
|
this.codeUnitService = new CodeUnitService();
|
|
806
962
|
this.codeGraphService = codeGraphService;
|
|
807
963
|
this.graphCache = /* @__PURE__ */ new Map();
|
|
@@ -825,6 +981,17 @@ var SearchService = class {
|
|
|
825
981
|
const limit = query.limit ?? 10;
|
|
826
982
|
const stores = query.stores ?? [];
|
|
827
983
|
const detail = query.detail ?? "minimal";
|
|
984
|
+
const intents = classifyQueryIntents(query.query);
|
|
985
|
+
const primaryIntent = getPrimaryIntent(intents);
|
|
986
|
+
logger.debug({
|
|
987
|
+
query: query.query,
|
|
988
|
+
mode,
|
|
989
|
+
limit,
|
|
990
|
+
stores,
|
|
991
|
+
detail,
|
|
992
|
+
intent: primaryIntent,
|
|
993
|
+
intents
|
|
994
|
+
}, "Search query received");
|
|
828
995
|
let allResults = [];
|
|
829
996
|
const fetchLimit = limit * 3;
|
|
830
997
|
if (mode === "vector") {
|
|
@@ -847,13 +1014,22 @@ var SearchService = class {
|
|
|
847
1014
|
const graph = graphs.get(r.metadata.storeId) ?? null;
|
|
848
1015
|
return this.addProgressiveContext(r, query.query, detail, graph);
|
|
849
1016
|
});
|
|
1017
|
+
const timeMs = Date.now() - startTime;
|
|
1018
|
+
logger.info({
|
|
1019
|
+
query: query.query,
|
|
1020
|
+
mode,
|
|
1021
|
+
resultCount: enhancedResults.length,
|
|
1022
|
+
dedupedFrom: allResults.length,
|
|
1023
|
+
intents: intents.map((i) => `${i.intent}(${i.confidence.toFixed(2)})`),
|
|
1024
|
+
timeMs
|
|
1025
|
+
}, "Search complete");
|
|
850
1026
|
return {
|
|
851
1027
|
query: query.query,
|
|
852
1028
|
mode,
|
|
853
1029
|
stores,
|
|
854
1030
|
results: enhancedResults,
|
|
855
1031
|
totalResults: enhancedResults.length,
|
|
856
|
-
timeMs
|
|
1032
|
+
timeMs
|
|
857
1033
|
};
|
|
858
1034
|
}
|
|
859
1035
|
/**
|
|
@@ -871,7 +1047,9 @@ var SearchService = class {
|
|
|
871
1047
|
} else {
|
|
872
1048
|
const existingTermCount = this.countQueryTerms(existing.content, queryTerms);
|
|
873
1049
|
const newTermCount = this.countQueryTerms(result.content, queryTerms);
|
|
874
|
-
|
|
1050
|
+
const existingRelevance = existingTermCount * existing.score;
|
|
1051
|
+
const newRelevance = newTermCount * result.score;
|
|
1052
|
+
if (newRelevance > existingRelevance) {
|
|
875
1053
|
bySource.set(sourceKey, result);
|
|
876
1054
|
}
|
|
877
1055
|
}
|
|
@@ -913,7 +1091,7 @@ var SearchService = class {
|
|
|
913
1091
|
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
914
1092
|
}
|
|
915
1093
|
async hybridSearch(query, stores, limit, threshold) {
|
|
916
|
-
const
|
|
1094
|
+
const intents = classifyQueryIntents(query);
|
|
917
1095
|
const [vectorResults, ftsResults] = await Promise.all([
|
|
918
1096
|
this.vectorSearch(query, stores, limit * 2, threshold),
|
|
919
1097
|
this.ftsSearch(query, stores, limit * 2)
|
|
@@ -932,7 +1110,8 @@ var SearchService = class {
|
|
|
932
1110
|
}
|
|
933
1111
|
});
|
|
934
1112
|
const rrfScores = [];
|
|
935
|
-
const
|
|
1113
|
+
const contentType = detectContentType([...allDocs.values()]);
|
|
1114
|
+
const { k, vectorWeight, ftsWeight } = RRF_PRESETS[contentType];
|
|
936
1115
|
for (const [id, result] of allDocs) {
|
|
937
1116
|
const vectorRank = vectorRanks.get(id) ?? Infinity;
|
|
938
1117
|
const ftsRank = ftsRanks.get(id) ?? Infinity;
|
|
@@ -941,14 +1120,18 @@ var SearchService = class {
|
|
|
941
1120
|
const fileTypeBoost = this.getFileTypeBoost(
|
|
942
1121
|
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
943
1122
|
result.metadata["fileType"],
|
|
944
|
-
|
|
1123
|
+
intents
|
|
945
1124
|
);
|
|
946
1125
|
const frameworkBoost = this.getFrameworkContextBoost(query, result);
|
|
1126
|
+
const urlKeywordBoost = this.getUrlKeywordBoost(query, result);
|
|
1127
|
+
const pathKeywordBoost = this.getPathKeywordBoost(query, result);
|
|
947
1128
|
const metadata = {
|
|
948
1129
|
vectorRRF,
|
|
949
1130
|
ftsRRF,
|
|
950
1131
|
fileTypeBoost,
|
|
951
|
-
frameworkBoost
|
|
1132
|
+
frameworkBoost,
|
|
1133
|
+
urlKeywordBoost,
|
|
1134
|
+
pathKeywordBoost
|
|
952
1135
|
};
|
|
953
1136
|
if (vectorRank !== Infinity) {
|
|
954
1137
|
metadata.vectorRank = vectorRank;
|
|
@@ -958,7 +1141,7 @@ var SearchService = class {
|
|
|
958
1141
|
}
|
|
959
1142
|
rrfScores.push({
|
|
960
1143
|
id,
|
|
961
|
-
score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost,
|
|
1144
|
+
score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
|
|
962
1145
|
result,
|
|
963
1146
|
metadata
|
|
964
1147
|
});
|
|
@@ -1003,7 +1186,7 @@ var SearchService = class {
|
|
|
1003
1186
|
* Phase 4: Strengthened boosts for better documentation ranking.
|
|
1004
1187
|
* Phase 1: Intent-based adjustments for context-aware ranking.
|
|
1005
1188
|
*/
|
|
1006
|
-
getFileTypeBoost(fileType,
|
|
1189
|
+
getFileTypeBoost(fileType, intents) {
|
|
1007
1190
|
let baseBoost;
|
|
1008
1191
|
switch (fileType) {
|
|
1009
1192
|
case "documentation-primary":
|
|
@@ -1030,9 +1213,106 @@ var SearchService = class {
|
|
|
1030
1213
|
default:
|
|
1031
1214
|
baseBoost = 1;
|
|
1032
1215
|
}
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1216
|
+
let weightedMultiplier = 0;
|
|
1217
|
+
let totalConfidence = 0;
|
|
1218
|
+
for (const { intent, confidence } of intents) {
|
|
1219
|
+
const intentBoosts = INTENT_FILE_BOOSTS[intent];
|
|
1220
|
+
const multiplier = intentBoosts[fileType ?? "other"] ?? 1;
|
|
1221
|
+
weightedMultiplier += multiplier * confidence;
|
|
1222
|
+
totalConfidence += confidence;
|
|
1223
|
+
}
|
|
1224
|
+
const blendedMultiplier = totalConfidence > 0 ? weightedMultiplier / totalConfidence : 1;
|
|
1225
|
+
return baseBoost * blendedMultiplier;
|
|
1226
|
+
}
|
|
1227
|
+
/**
|
|
1228
|
+
* Get a score multiplier based on URL keyword matching.
|
|
1229
|
+
* Boosts results where URL path contains significant query keywords.
|
|
1230
|
+
* This helps queries like "troubleshooting" rank /troubleshooting pages first.
|
|
1231
|
+
*/
|
|
1232
|
+
getUrlKeywordBoost(query, result) {
|
|
1233
|
+
const url = result.metadata.url;
|
|
1234
|
+
if (url === void 0 || url === "") return 1;
|
|
1235
|
+
const urlPath = url.toLowerCase().replace(/[^a-z0-9]+/g, " ");
|
|
1236
|
+
const stopWords = /* @__PURE__ */ new Set([
|
|
1237
|
+
"how",
|
|
1238
|
+
"to",
|
|
1239
|
+
"the",
|
|
1240
|
+
"a",
|
|
1241
|
+
"an",
|
|
1242
|
+
"is",
|
|
1243
|
+
"are",
|
|
1244
|
+
"what",
|
|
1245
|
+
"why",
|
|
1246
|
+
"when",
|
|
1247
|
+
"where",
|
|
1248
|
+
"can",
|
|
1249
|
+
"do",
|
|
1250
|
+
"does",
|
|
1251
|
+
"i",
|
|
1252
|
+
"my",
|
|
1253
|
+
"your",
|
|
1254
|
+
"it",
|
|
1255
|
+
"in",
|
|
1256
|
+
"on",
|
|
1257
|
+
"for",
|
|
1258
|
+
"with",
|
|
1259
|
+
"this",
|
|
1260
|
+
"that",
|
|
1261
|
+
"get",
|
|
1262
|
+
"use",
|
|
1263
|
+
"using"
|
|
1264
|
+
]);
|
|
1265
|
+
const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2 && !stopWords.has(t2));
|
|
1266
|
+
if (queryTerms.length === 0) return 1;
|
|
1267
|
+
const matchingTerms = queryTerms.filter((term) => urlPath.includes(term));
|
|
1268
|
+
if (matchingTerms.length === 0) return 1;
|
|
1269
|
+
const matchRatio = matchingTerms.length / queryTerms.length;
|
|
1270
|
+
return 1 + 1 * matchRatio;
|
|
1271
|
+
}
|
|
1272
|
+
/**
|
|
1273
|
+
* Get a score multiplier based on file path keyword matching.
|
|
1274
|
+
* Boosts results where file path contains significant query keywords.
|
|
1275
|
+
* This helps queries like "dispatcher" rank async_dispatcher.py higher.
|
|
1276
|
+
*/
|
|
1277
|
+
getPathKeywordBoost(query, result) {
|
|
1278
|
+
const path3 = result.metadata.path;
|
|
1279
|
+
if (path3 === void 0 || path3 === "") return 1;
|
|
1280
|
+
const pathSegments = path3.toLowerCase().replace(/[^a-z0-9]+/g, " ");
|
|
1281
|
+
const stopWords = /* @__PURE__ */ new Set([
|
|
1282
|
+
"how",
|
|
1283
|
+
"to",
|
|
1284
|
+
"the",
|
|
1285
|
+
"a",
|
|
1286
|
+
"an",
|
|
1287
|
+
"is",
|
|
1288
|
+
"are",
|
|
1289
|
+
"what",
|
|
1290
|
+
"why",
|
|
1291
|
+
"when",
|
|
1292
|
+
"where",
|
|
1293
|
+
"can",
|
|
1294
|
+
"do",
|
|
1295
|
+
"does",
|
|
1296
|
+
"i",
|
|
1297
|
+
"my",
|
|
1298
|
+
"your",
|
|
1299
|
+
"it",
|
|
1300
|
+
"in",
|
|
1301
|
+
"on",
|
|
1302
|
+
"for",
|
|
1303
|
+
"with",
|
|
1304
|
+
"this",
|
|
1305
|
+
"that",
|
|
1306
|
+
"get",
|
|
1307
|
+
"use",
|
|
1308
|
+
"using"
|
|
1309
|
+
]);
|
|
1310
|
+
const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2 && !stopWords.has(t2));
|
|
1311
|
+
if (queryTerms.length === 0) return 1;
|
|
1312
|
+
const matchingTerms = queryTerms.filter((term) => pathSegments.includes(term));
|
|
1313
|
+
if (matchingTerms.length === 0) return 1;
|
|
1314
|
+
const matchRatio = matchingTerms.length / queryTerms.length;
|
|
1315
|
+
return 1 + 1 * matchRatio;
|
|
1036
1316
|
}
|
|
1037
1317
|
/**
|
|
1038
1318
|
* Get a score multiplier based on framework context.
|
|
@@ -1331,17 +1611,31 @@ var SearchService = class {
|
|
|
1331
1611
|
|
|
1332
1612
|
// src/services/index.service.ts
|
|
1333
1613
|
import { readFile as readFile3, readdir } from "fs/promises";
|
|
1334
|
-
import { join as
|
|
1335
|
-
import { createHash } from "crypto";
|
|
1614
|
+
import { join as join5, extname, basename } from "path";
|
|
1615
|
+
import { createHash as createHash2 } from "crypto";
|
|
1336
1616
|
|
|
1337
1617
|
// src/services/chunking.service.ts
|
|
1338
|
-
var
|
|
1618
|
+
var CHUNK_PRESETS = {
|
|
1619
|
+
code: { chunkSize: 768, chunkOverlap: 100 },
|
|
1620
|
+
web: { chunkSize: 1200, chunkOverlap: 200 },
|
|
1621
|
+
docs: { chunkSize: 1200, chunkOverlap: 200 }
|
|
1622
|
+
};
|
|
1623
|
+
var ChunkingService = class _ChunkingService {
|
|
1339
1624
|
chunkSize;
|
|
1340
1625
|
chunkOverlap;
|
|
1341
1626
|
constructor(config) {
|
|
1342
1627
|
this.chunkSize = config.chunkSize;
|
|
1343
1628
|
this.chunkOverlap = config.chunkOverlap;
|
|
1344
1629
|
}
|
|
1630
|
+
/**
|
|
1631
|
+
* Create a ChunkingService with preset configuration for a content type.
|
|
1632
|
+
* - 'code': Smaller chunks (768/100) for precise code symbol matching
|
|
1633
|
+
* - 'web': Larger chunks (1200/200) for web prose content
|
|
1634
|
+
* - 'docs': Larger chunks (1200/200) for documentation
|
|
1635
|
+
*/
|
|
1636
|
+
static forContentType(type) {
|
|
1637
|
+
return new _ChunkingService(CHUNK_PRESETS[type]);
|
|
1638
|
+
}
|
|
1345
1639
|
/**
|
|
1346
1640
|
* Chunk text content. Uses semantic chunking for Markdown and code files,
|
|
1347
1641
|
* falling back to sliding window for other content.
|
|
@@ -1596,6 +1890,7 @@ var ChunkingService = class {
|
|
|
1596
1890
|
};
|
|
1597
1891
|
|
|
1598
1892
|
// src/services/index.service.ts
|
|
1893
|
+
var logger2 = createLogger("index-service");
|
|
1599
1894
|
var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
1600
1895
|
".txt",
|
|
1601
1896
|
".md",
|
|
@@ -1640,12 +1935,22 @@ var IndexService = class {
|
|
|
1640
1935
|
this.codeGraphService = options.codeGraphService;
|
|
1641
1936
|
}
|
|
1642
1937
|
async indexStore(store, onProgress) {
|
|
1938
|
+
logger2.info({
|
|
1939
|
+
storeId: store.id,
|
|
1940
|
+
storeName: store.name,
|
|
1941
|
+
storeType: store.type
|
|
1942
|
+
}, "Starting store indexing");
|
|
1643
1943
|
try {
|
|
1644
1944
|
if (store.type === "file" || store.type === "repo") {
|
|
1645
1945
|
return await this.indexFileStore(store, onProgress);
|
|
1646
1946
|
}
|
|
1947
|
+
logger2.error({ storeId: store.id, storeType: store.type }, "Unsupported store type for indexing");
|
|
1647
1948
|
return err(new Error(`Indexing not supported for store type: ${store.type}`));
|
|
1648
1949
|
} catch (error) {
|
|
1950
|
+
logger2.error({
|
|
1951
|
+
storeId: store.id,
|
|
1952
|
+
error: error instanceof Error ? error.message : String(error)
|
|
1953
|
+
}, "Store indexing failed");
|
|
1649
1954
|
return err(error instanceof Error ? error : new Error(String(error)));
|
|
1650
1955
|
}
|
|
1651
1956
|
}
|
|
@@ -1654,6 +1959,11 @@ var IndexService = class {
|
|
|
1654
1959
|
const files = await this.scanDirectory(store.path);
|
|
1655
1960
|
const documents = [];
|
|
1656
1961
|
let filesProcessed = 0;
|
|
1962
|
+
logger2.debug({
|
|
1963
|
+
storeId: store.id,
|
|
1964
|
+
path: store.path,
|
|
1965
|
+
fileCount: files.length
|
|
1966
|
+
}, "Files scanned for indexing");
|
|
1657
1967
|
const sourceFiles = [];
|
|
1658
1968
|
onProgress?.({
|
|
1659
1969
|
type: "start",
|
|
@@ -1663,7 +1973,7 @@ var IndexService = class {
|
|
|
1663
1973
|
});
|
|
1664
1974
|
for (const filePath of files) {
|
|
1665
1975
|
const content = await readFile3(filePath, "utf-8");
|
|
1666
|
-
const fileHash =
|
|
1976
|
+
const fileHash = createHash2("md5").update(content).digest("hex");
|
|
1667
1977
|
const chunks = this.chunker.chunk(content, filePath);
|
|
1668
1978
|
const ext = extname(filePath).toLowerCase();
|
|
1669
1979
|
const fileName = basename(filePath).toLowerCase();
|
|
@@ -1717,17 +2027,26 @@ var IndexService = class {
|
|
|
1717
2027
|
total: files.length,
|
|
1718
2028
|
message: "Indexing complete"
|
|
1719
2029
|
});
|
|
2030
|
+
const timeMs = Date.now() - startTime;
|
|
2031
|
+
logger2.info({
|
|
2032
|
+
storeId: store.id,
|
|
2033
|
+
storeName: store.name,
|
|
2034
|
+
documentsIndexed: filesProcessed,
|
|
2035
|
+
chunksCreated: documents.length,
|
|
2036
|
+
sourceFilesForGraph: sourceFiles.length,
|
|
2037
|
+
timeMs
|
|
2038
|
+
}, "Store indexing complete");
|
|
1720
2039
|
return ok({
|
|
1721
2040
|
documentsIndexed: filesProcessed,
|
|
1722
2041
|
chunksCreated: documents.length,
|
|
1723
|
-
timeMs
|
|
2042
|
+
timeMs
|
|
1724
2043
|
});
|
|
1725
2044
|
}
|
|
1726
2045
|
async scanDirectory(dir) {
|
|
1727
2046
|
const files = [];
|
|
1728
2047
|
const entries = await readdir(dir, { withFileTypes: true });
|
|
1729
2048
|
for (const entry of entries) {
|
|
1730
|
-
const fullPath =
|
|
2049
|
+
const fullPath = join5(dir, entry.name);
|
|
1731
2050
|
if (entry.isDirectory()) {
|
|
1732
2051
|
if (!["node_modules", ".git", "dist", "build"].includes(entry.name)) {
|
|
1733
2052
|
files.push(...await this.scanDirectory(fullPath));
|
|
@@ -1798,10 +2117,33 @@ var IndexService = class {
|
|
|
1798
2117
|
return false;
|
|
1799
2118
|
}
|
|
1800
2119
|
};
|
|
2120
|
+
function classifyWebContentType(url, title) {
|
|
2121
|
+
const urlLower = url.toLowerCase();
|
|
2122
|
+
const titleLower = (title ?? "").toLowerCase();
|
|
2123
|
+
if (/\/api[-/]?(ref|reference|docs?)?\//i.test(urlLower) || /api\s*(reference|documentation)/i.test(titleLower)) {
|
|
2124
|
+
return "documentation-primary";
|
|
2125
|
+
}
|
|
2126
|
+
if (/\/(getting[-_]?started|quickstart|tutorial|setup)\b/i.test(urlLower) || /(getting started|quickstart|tutorial)/i.test(titleLower)) {
|
|
2127
|
+
return "documentation-primary";
|
|
2128
|
+
}
|
|
2129
|
+
if (/\/(docs?|documentation|reference|learn|manual|guide)/i.test(urlLower)) {
|
|
2130
|
+
return "documentation";
|
|
2131
|
+
}
|
|
2132
|
+
if (/\/(examples?|demos?|samples?|cookbook)/i.test(urlLower)) {
|
|
2133
|
+
return "example";
|
|
2134
|
+
}
|
|
2135
|
+
if (/changelog|release[-_]?notes/i.test(urlLower)) {
|
|
2136
|
+
return "changelog";
|
|
2137
|
+
}
|
|
2138
|
+
if (/\/blog\//i.test(urlLower)) {
|
|
2139
|
+
return "other";
|
|
2140
|
+
}
|
|
2141
|
+
return "documentation";
|
|
2142
|
+
}
|
|
1801
2143
|
|
|
1802
2144
|
// src/services/code-graph.service.ts
|
|
1803
2145
|
import { readFile as readFile4, writeFile as writeFile3, mkdir as mkdir4 } from "fs/promises";
|
|
1804
|
-
import { join as
|
|
2146
|
+
import { join as join6, dirname as dirname3 } from "path";
|
|
1805
2147
|
|
|
1806
2148
|
// src/analysis/code-graph.ts
|
|
1807
2149
|
var CodeGraph = class {
|
|
@@ -3118,7 +3460,7 @@ var CodeGraphService = class {
|
|
|
3118
3460
|
this.graphCache.clear();
|
|
3119
3461
|
}
|
|
3120
3462
|
getGraphPath(storeId) {
|
|
3121
|
-
return
|
|
3463
|
+
return join6(this.dataDir, "graphs", `${storeId}.json`);
|
|
3122
3464
|
}
|
|
3123
3465
|
/**
|
|
3124
3466
|
* Type guard for SerializedGraph structure.
|
|
@@ -3254,6 +3596,13 @@ var LanceStore = class {
|
|
|
3254
3596
|
this.tables.delete(tableName);
|
|
3255
3597
|
}
|
|
3256
3598
|
}
|
|
3599
|
+
close() {
|
|
3600
|
+
this.tables.clear();
|
|
3601
|
+
if (this.connection !== null) {
|
|
3602
|
+
this.connection.close();
|
|
3603
|
+
this.connection = null;
|
|
3604
|
+
}
|
|
3605
|
+
}
|
|
3257
3606
|
getTableName(storeId) {
|
|
3258
3607
|
return `documents_${storeId}`;
|
|
3259
3608
|
}
|
|
@@ -3273,9 +3622,9 @@ var LanceStore = class {
|
|
|
3273
3622
|
|
|
3274
3623
|
// src/db/embeddings.ts
|
|
3275
3624
|
import { pipeline, env } from "@huggingface/transformers";
|
|
3276
|
-
import { homedir as
|
|
3277
|
-
import { join as
|
|
3278
|
-
env.cacheDir =
|
|
3625
|
+
import { homedir as homedir3 } from "os";
|
|
3626
|
+
import { join as join7 } from "path";
|
|
3627
|
+
env.cacheDir = join7(homedir3(), ".cache", "huggingface-transformers");
|
|
3279
3628
|
var EmbeddingEngine = class {
|
|
3280
3629
|
extractor = null;
|
|
3281
3630
|
modelName;
|
|
@@ -3398,25 +3747,27 @@ function validateParsePythonResult(data) {
|
|
|
3398
3747
|
}
|
|
3399
3748
|
|
|
3400
3749
|
// src/crawl/bridge.ts
|
|
3750
|
+
var logger3 = createLogger("python-bridge");
|
|
3401
3751
|
var PythonBridge = class {
|
|
3402
3752
|
process = null;
|
|
3403
3753
|
pending = /* @__PURE__ */ new Map();
|
|
3404
3754
|
stoppingIntentionally = false;
|
|
3405
3755
|
start() {
|
|
3406
3756
|
if (this.process) return Promise.resolve();
|
|
3757
|
+
logger3.debug("Starting Python bridge process");
|
|
3407
3758
|
this.process = spawn2("python3", ["python/crawl_worker.py"], {
|
|
3408
3759
|
stdio: ["pipe", "pipe", "pipe"]
|
|
3409
3760
|
});
|
|
3410
3761
|
this.process.on("error", (err2) => {
|
|
3411
|
-
|
|
3762
|
+
logger3.error({ error: err2.message, stack: err2.stack }, "Python bridge process error");
|
|
3412
3763
|
this.rejectAllPending(new Error(`Process error: ${err2.message}`));
|
|
3413
3764
|
});
|
|
3414
3765
|
this.process.on("exit", (code, signal) => {
|
|
3415
3766
|
if (code !== 0 && code !== null) {
|
|
3416
|
-
|
|
3767
|
+
logger3.error({ code }, "Python bridge process exited with non-zero code");
|
|
3417
3768
|
this.rejectAllPending(new Error(`Process exited with code ${String(code)}`));
|
|
3418
3769
|
} else if (signal && !this.stoppingIntentionally) {
|
|
3419
|
-
|
|
3770
|
+
logger3.error({ signal }, "Python bridge process killed with signal");
|
|
3420
3771
|
this.rejectAllPending(new Error(`Process killed with signal ${signal}`));
|
|
3421
3772
|
}
|
|
3422
3773
|
this.process = null;
|
|
@@ -3425,7 +3776,7 @@ var PythonBridge = class {
|
|
|
3425
3776
|
if (this.process.stderr) {
|
|
3426
3777
|
const stderrRl = createInterface({ input: this.process.stderr });
|
|
3427
3778
|
stderrRl.on("line", (line) => {
|
|
3428
|
-
|
|
3779
|
+
logger3.warn({ stderr: line }, "Python bridge stderr output");
|
|
3429
3780
|
});
|
|
3430
3781
|
}
|
|
3431
3782
|
if (this.process.stdout === null) {
|
|
@@ -3460,18 +3811,24 @@ var PythonBridge = class {
|
|
|
3460
3811
|
pending.resolve(validated);
|
|
3461
3812
|
} catch (error) {
|
|
3462
3813
|
if (error instanceof ZodError) {
|
|
3463
|
-
|
|
3464
|
-
|
|
3814
|
+
logger3.error({
|
|
3815
|
+
issues: error.issues,
|
|
3816
|
+
response: JSON.stringify(response.result)
|
|
3817
|
+
}, "Python bridge response validation failed");
|
|
3465
3818
|
pending.reject(new Error(`Invalid response format from Python bridge: ${error.message}`));
|
|
3466
3819
|
} else {
|
|
3467
3820
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
3821
|
+
logger3.error({ error: errorMessage }, "Response validation error");
|
|
3468
3822
|
pending.reject(new Error(`Response validation error: ${errorMessage}`));
|
|
3469
3823
|
}
|
|
3470
3824
|
}
|
|
3471
3825
|
}
|
|
3472
3826
|
}
|
|
3473
3827
|
} catch (err2) {
|
|
3474
|
-
|
|
3828
|
+
logger3.error({
|
|
3829
|
+
error: err2 instanceof Error ? err2.message : String(err2),
|
|
3830
|
+
line
|
|
3831
|
+
}, "Failed to parse JSON response from Python bridge");
|
|
3475
3832
|
}
|
|
3476
3833
|
});
|
|
3477
3834
|
return Promise.resolve();
|
|
@@ -3570,7 +3927,9 @@ var PythonBridge = class {
|
|
|
3570
3927
|
};
|
|
3571
3928
|
|
|
3572
3929
|
// src/services/index.ts
|
|
3930
|
+
var logger4 = createLogger("services");
|
|
3573
3931
|
async function createServices(configPath, dataDir, projectRoot) {
|
|
3932
|
+
logger4.info({ configPath, dataDir, projectRoot }, "Initializing services");
|
|
3574
3933
|
const config = new ConfigService(configPath, dataDir, projectRoot);
|
|
3575
3934
|
const appConfig = await config.load();
|
|
3576
3935
|
const resolvedDataDir = config.resolveDataDir();
|
|
@@ -3585,8 +3944,9 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
3585
3944
|
const pythonBridge = new PythonBridge();
|
|
3586
3945
|
await pythonBridge.start();
|
|
3587
3946
|
const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
|
|
3588
|
-
const search = new SearchService(lance, embeddings,
|
|
3947
|
+
const search = new SearchService(lance, embeddings, codeGraph);
|
|
3589
3948
|
const index = new IndexService(lance, embeddings, { codeGraphService: codeGraph });
|
|
3949
|
+
logger4.info({ dataDir: resolvedDataDir }, "Services initialized successfully");
|
|
3590
3950
|
return {
|
|
3591
3951
|
config,
|
|
3592
3952
|
store,
|
|
@@ -3599,7 +3959,18 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
3599
3959
|
};
|
|
3600
3960
|
}
|
|
3601
3961
|
async function destroyServices(services) {
|
|
3602
|
-
|
|
3962
|
+
logger4.info("Shutting down services");
|
|
3963
|
+
try {
|
|
3964
|
+
services.lance.close();
|
|
3965
|
+
} catch (e) {
|
|
3966
|
+
logger4.error({ error: e }, "Error closing LanceStore");
|
|
3967
|
+
}
|
|
3968
|
+
try {
|
|
3969
|
+
await services.pythonBridge.stop();
|
|
3970
|
+
} catch (e) {
|
|
3971
|
+
logger4.error({ error: e }, "Error stopping Python bridge");
|
|
3972
|
+
}
|
|
3973
|
+
await shutdownLogger();
|
|
3603
3974
|
}
|
|
3604
3975
|
|
|
3605
3976
|
export {
|
|
@@ -3608,10 +3979,15 @@ export {
|
|
|
3608
3979
|
ok,
|
|
3609
3980
|
err,
|
|
3610
3981
|
extractRepoName,
|
|
3982
|
+
createLogger,
|
|
3983
|
+
summarizePayload,
|
|
3984
|
+
truncateForLog,
|
|
3985
|
+
ChunkingService,
|
|
3986
|
+
classifyWebContentType,
|
|
3611
3987
|
ASTParser,
|
|
3612
3988
|
PythonBridge,
|
|
3613
3989
|
JobService,
|
|
3614
3990
|
createServices,
|
|
3615
3991
|
destroyServices
|
|
3616
3992
|
};
|
|
3617
|
-
//# sourceMappingURL=chunk-
|
|
3993
|
+
//# sourceMappingURL=chunk-RWSXP3PQ.js.map
|