@romiluz/clawmongo 0.1.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +3 -0
  3. package/dist/cli/boundary-contract-smoke.js +108 -0
  4. package/dist/cli/embedding-policy-smoke.js +66 -0
  5. package/dist/cli/embedding-provider-live-smoke.js +94 -0
  6. package/dist/cli/embedding-provider-smoke.js +81 -0
  7. package/dist/cli/embedding-provider-voyage-batch-smoke.js +129 -0
  8. package/dist/cli/gateway-smoke.js +65 -0
  9. package/dist/cli/health.js +17 -0
  10. package/dist/cli/index-budget-smoke.js +14 -0
  11. package/dist/cli/key-schema-smoke.js +118 -0
  12. package/dist/cli/orchestrator-smoke.js +75 -0
  13. package/dist/cli/provider-adapter-smoke.js +61 -0
  14. package/dist/cli/replica-track-check.js +108 -0
  15. package/dist/cli/retrieval-compat-check.js +196 -0
  16. package/dist/cli/retrieval-contract-smoke.js +72 -0
  17. package/dist/cli/retrieval-eval.js +226 -0
  18. package/dist/cli/retrieval-provider-smoke.js +52 -0
  19. package/dist/cli/retrieval-seed-reembed-smoke.js +54 -0
  20. package/dist/cli/retrieval-seed.js +312 -0
  21. package/dist/cli/runtime-contract-smoke.js +201 -0
  22. package/dist/cli/session-key-smoke.js +62 -0
  23. package/dist/cli/sprint-checks.js +129 -0
  24. package/dist/cli/tool-runtime-smoke.js +68 -0
  25. package/dist/config/deployment-profiles.js +41 -0
  26. package/dist/config/env.js +49 -0
  27. package/dist/contracts/v1.js +1 -0
  28. package/dist/contracts/validators.js +153 -0
  29. package/dist/identity/key-schema.js +31 -0
  30. package/dist/main.js +97 -0
  31. package/dist/modules/eventing/index.js +58 -0
  32. package/dist/modules/eventing/service.js +139 -0
  33. package/dist/modules/gateway/index.js +44 -0
  34. package/dist/modules/gateway/service.js +118 -0
  35. package/dist/modules/ingestion/index.js +46 -0
  36. package/dist/modules/ingestion/service.js +56 -0
  37. package/dist/modules/mongo-store/index.js +21 -0
  38. package/dist/modules/observability/index.js +6 -0
  39. package/dist/modules/orchestrator/index.js +49 -0
  40. package/dist/modules/orchestrator/service.js +220 -0
  41. package/dist/modules/policy-engine/index.js +34 -0
  42. package/dist/modules/policy-engine/service.js +42 -0
  43. package/dist/modules/provider-adapter/index.js +37 -0
  44. package/dist/modules/provider-adapter/service.js +98 -0
  45. package/dist/modules/retrieval/index.js +64 -0
  46. package/dist/modules/stub.js +17 -0
  47. package/dist/modules/tool-runtime/index.js +30 -0
  48. package/dist/modules/tool-runtime/service.js +84 -0
  49. package/dist/retrieval/contracts.js +1 -0
  50. package/dist/retrieval/embeddings/policy.js +42 -0
  51. package/dist/retrieval/embeddings/provider.js +424 -0
  52. package/dist/retrieval/embeddings/query-vector.js +34 -0
  53. package/dist/retrieval/embeddings/voyage-remote-batch.js +312 -0
  54. package/dist/retrieval/engine.js +130 -0
  55. package/dist/retrieval/fixtures.js +123 -0
  56. package/dist/retrieval/providers/fusion.js +390 -0
  57. package/dist/retrieval/providers/lexical.js +267 -0
  58. package/dist/retrieval/providers/shared.js +88 -0
  59. package/dist/retrieval/providers/vector.js +274 -0
  60. package/dist/retrieval/reembed.js +116 -0
  61. package/dist/runtime/bootstrap.js +65 -0
  62. package/dist/runtime/types.js +1 -0
  63. package/dist/session/session-key.js +128 -0
  64. package/dist/store/mongo/bootstrap.js +129 -0
  65. package/dist/store/mongo/indexes.js +110 -0
  66. package/dist/store/mongo/validators.js +238 -0
  67. package/package.json +81 -0
@@ -0,0 +1,390 @@
1
+ import { MongoClient } from "mongodb";
2
+ import { normalizeScopeFilter } from "./shared.js";
3
+ import { resolveQueryEmbeddingVector } from "../embeddings/query-vector.js";
4
+ function reciprocalRankFuse(lexical, vector, topK) {
5
+ const rankMap = new Map();
6
+ const addRanked = (hits) => {
7
+ hits.forEach((hit, index) => {
8
+ const reciprocal = 1 / (50 + index + 1);
9
+ const existing = rankMap.get(hit.id);
10
+ if (existing) {
11
+ existing.score += reciprocal;
12
+ return;
13
+ }
14
+ rankMap.set(hit.id, {
15
+ hit,
16
+ score: reciprocal
17
+ });
18
+ });
19
+ };
20
+ addRanked(lexical);
21
+ addRanked(vector);
22
+ return [...rankMap.values()]
23
+ .sort((a, b) => {
24
+ if (b.score !== a.score) {
25
+ return b.score - a.score;
26
+ }
27
+ return a.hit.id.localeCompare(b.hit.id);
28
+ })
29
+ .slice(0, topK)
30
+ .map((entry) => ({
31
+ ...entry.hit,
32
+ source: "hybrid",
33
+ score: Number(entry.score.toFixed(6))
34
+ }));
35
+ }
36
+ function isStageUnsupported(message) {
37
+ const lower = message.toLowerCase();
38
+ return (lower.includes("unrecognized pipeline stage") ||
39
+ lower.includes("unknown top level operator") ||
40
+ lower.includes("requires additional configuration"));
41
+ }
42
+ function toComparableHit(hit) {
43
+ return JSON.stringify({
44
+ id: hit.id,
45
+ score: hit.score,
46
+ source: hit.source,
47
+ text: hit.text,
48
+ metadata: hit.metadata
49
+ });
50
+ }
51
+ function sameHitSets(left, right) {
52
+ if (left.length !== right.length) {
53
+ return false;
54
+ }
55
+ const leftSet = new Set(left.map(toComparableHit));
56
+ return right.every((item) => leftSet.has(toComparableHit(item)));
57
+ }
58
+ async function tryNativeFusionProbe(options) {
59
+ if (!options.uri) {
60
+ return {
61
+ available: false,
62
+ reason: "native fusion probe skipped: Mongo URI not configured",
63
+ checkedAt: new Date().toISOString()
64
+ };
65
+ }
66
+ const client = new MongoClient(options.uri, {
67
+ appName: "clawmongo-fusion-probe",
68
+ serverSelectionTimeoutMS: options.connectTimeoutMs
69
+ });
70
+ try {
71
+ await client.connect();
72
+ const db = client.db(options.dbName);
73
+ try {
74
+ await db
75
+ .collection(options.collectionName)
76
+ .aggregate([
77
+ {
78
+ $rankFusion: {
79
+ input: {
80
+ pipelines: {
81
+ probeA: [{ $match: { _id: null } }],
82
+ probeB: [{ $match: { _id: null } }]
83
+ }
84
+ }
85
+ }
86
+ },
87
+ { $limit: 1 }
88
+ ])
89
+ .toArray();
90
+ return {
91
+ available: true,
92
+ reason: "rankFusion stage probe executed successfully",
93
+ checkedAt: new Date().toISOString()
94
+ };
95
+ }
96
+ catch (error) {
97
+ const message = error instanceof Error ? error.message : String(error);
98
+ if (isStageUnsupported(message)) {
99
+ return {
100
+ available: false,
101
+ reason: `rankFusion probe unsupported: ${message}`,
102
+ checkedAt: new Date().toISOString()
103
+ };
104
+ }
105
+ return {
106
+ available: true,
107
+ reason: `rankFusion stage recognized (probe error indicates operator exists): ${message}`,
108
+ checkedAt: new Date().toISOString()
109
+ };
110
+ }
111
+ }
112
+ finally {
113
+ await client.close().catch(() => undefined);
114
+ }
115
+ }
116
+ async function attemptNativeFusionWithAtlasStages(input, options) {
117
+ const scope = normalizeScopeFilter(input.query.scopeFilters);
118
+ const queryEmbedding = await resolveQueryEmbeddingVector(input.query, 8);
119
+ const queryVector = queryEmbedding.vector;
120
+ const filterClauses = Object.entries(scope).map(([path, value]) => ({
121
+ equals: { path, value }
122
+ }));
123
+ const client = new MongoClient(options.uri, {
124
+ appName: "clawmongo-native-fusion",
125
+ serverSelectionTimeoutMS: options.connectTimeoutMs
126
+ });
127
+ try {
128
+ await client.connect();
129
+ const db = client.db(options.dbName);
130
+ const collection = db.collection(options.collectionName);
131
+ const lexicalPipeline = [
132
+ {
133
+ $search: {
134
+ index: options.lexicalIndexName,
135
+ compound: {
136
+ must: [
137
+ {
138
+ text: {
139
+ query: input.query.query,
140
+ path: ["text"]
141
+ }
142
+ }
143
+ ],
144
+ ...(filterClauses.length > 0
145
+ ? {
146
+ filter: filterClauses
147
+ }
148
+ : {})
149
+ }
150
+ }
151
+ },
152
+ { $limit: Math.max(input.query.topK * 4, 20) }
153
+ ];
154
+ const vectorPipeline = [
155
+ {
156
+ $vectorSearch: {
157
+ index: options.vectorIndexName,
158
+ path: "embedding",
159
+ queryVector,
160
+ numCandidates: Math.max(input.query.topK * 20, 100),
161
+ limit: Math.max(input.query.topK * 4, 20),
162
+ ...(Object.keys(scope).length > 0
163
+ ? {
164
+ filter: scope
165
+ }
166
+ : {})
167
+ }
168
+ },
169
+ { $limit: Math.max(input.query.topK * 4, 20) }
170
+ ];
171
+ const nativePipeline = [
172
+ {
173
+ $rankFusion: {
174
+ input: {
175
+ pipelines: {
176
+ lexical: lexicalPipeline,
177
+ vector: vectorPipeline
178
+ }
179
+ }
180
+ }
181
+ },
182
+ { $limit: input.query.topK },
183
+ {
184
+ $project: {
185
+ _id: 0,
186
+ id: "$chunk_id",
187
+ text: "$text",
188
+ doc_id: "$doc_id",
189
+ tenant_id: "$tenant_id",
190
+ workspace_id: "$workspace_id",
191
+ source_type: "$source_type",
192
+ channel: "$channel",
193
+ thread_key: "$thread_key",
194
+ embedding_provider: "$embedding_provider",
195
+ embedding_model: "$embedding_model",
196
+ score: {
197
+ $meta: "searchScore"
198
+ }
199
+ }
200
+ }
201
+ ];
202
+ const docs = await collection.aggregate(nativePipeline).toArray();
203
+ const hits = docs.map((doc) => ({
204
+ id: typeof doc.id === "string" ? doc.id : "unknown-chunk",
205
+ source: "hybrid",
206
+ score: typeof doc.score === "number" ? Number(doc.score.toFixed(6)) : 0,
207
+ text: typeof doc.text === "string" ? doc.text : "",
208
+ metadata: {
209
+ doc_id: doc.doc_id ?? null,
210
+ tenant_id: doc.tenant_id ?? null,
211
+ workspace_id: doc.workspace_id ?? null,
212
+ source_type: doc.source_type ?? null,
213
+ channel: doc.channel ?? null,
214
+ thread_key: doc.thread_key ?? null,
215
+ embedding_provider: doc.embedding_provider ?? null,
216
+ embedding_model: doc.embedding_model ?? null,
217
+ retrieval_stage: "fusion",
218
+ strategy: "native-rankFusion"
219
+ }
220
+ }));
221
+ return {
222
+ hits,
223
+ note: `${queryEmbedding.note} Native rankFusion execution succeeded.`,
224
+ embeddingDegraded: queryEmbedding.degraded
225
+ };
226
+ }
227
+ finally {
228
+ await client.close().catch(() => undefined);
229
+ }
230
+ }
231
+ export class ReciprocalRankFusionRetriever {
232
+ fuse(input) {
233
+ const hits = reciprocalRankFuse(input.lexicalHits, input.vectorHits, input.query.topK);
234
+ return {
235
+ hits,
236
+ telemetry: {
237
+ strategy: input.query.preferNativeFusion ? "fallback" : "none",
238
+ degraded: false,
239
+ nativeAttempted: false,
240
+ nativeSucceeded: false,
241
+ fallbackUsed: input.query.preferNativeFusion,
242
+ latencyMs: 0,
243
+ notes: [
244
+ input.query.preferNativeFusion
245
+ ? "Using deterministic reciprocal-rank fallback."
246
+ : "Fusion bypassed because preferNativeFusion=false; lexical path selected."
247
+ ]
248
+ }
249
+ };
250
+ }
251
+ }
252
+ export class AdaptiveFusionRetriever {
253
+ options;
254
+ nativeProbeCache = null;
255
+ constructor(options) {
256
+ this.options = {
257
+ supportsNativeFusion: options.supportsNativeFusion,
258
+ uri: options.uri,
259
+ dbName: options.dbName ?? "clawmongo",
260
+ connectTimeoutMs: options.connectTimeoutMs ?? 5000,
261
+ collectionName: options.collectionName ?? "memory_chunks",
262
+ lexicalIndexName: options.lexicalIndexName ?? "memory_chunks.lexical",
263
+ vectorIndexName: options.vectorIndexName ?? "memory_chunks.vector",
264
+ nativeProbeMode: options.nativeProbeMode ?? "auto"
265
+ };
266
+ }
267
+ async detectNativeSupport() {
268
+ if (this.options.nativeProbeMode === "force-available") {
269
+ return {
270
+ available: true,
271
+ reason: "native probe forced to available",
272
+ checkedAt: new Date().toISOString()
273
+ };
274
+ }
275
+ if (this.options.nativeProbeMode === "force-unavailable") {
276
+ return {
277
+ available: false,
278
+ reason: "native probe forced to unavailable",
279
+ checkedAt: new Date().toISOString()
280
+ };
281
+ }
282
+ if (!this.options.supportsNativeFusion) {
283
+ return {
284
+ available: false,
285
+ reason: "deployment profile does not support native fusion",
286
+ checkedAt: new Date().toISOString()
287
+ };
288
+ }
289
+ if (this.nativeProbeCache) {
290
+ return this.nativeProbeCache;
291
+ }
292
+ const probe = await tryNativeFusionProbe({
293
+ uri: this.options.uri,
294
+ dbName: this.options.dbName,
295
+ collectionName: this.options.collectionName,
296
+ connectTimeoutMs: this.options.connectTimeoutMs
297
+ });
298
+ this.nativeProbeCache = probe;
299
+ return probe;
300
+ }
301
+ async fuse(input) {
302
+ const startedAtMs = Date.now();
303
+ if (!input.query.preferNativeFusion) {
304
+ return {
305
+ hits: reciprocalRankFuse(input.lexicalHits, input.vectorHits, input.query.topK),
306
+ telemetry: {
307
+ strategy: "none",
308
+ degraded: false,
309
+ nativeAttempted: false,
310
+ nativeSucceeded: false,
311
+ fallbackUsed: false,
312
+ latencyMs: Date.now() - startedAtMs,
313
+ notes: ["Fusion disabled because preferNativeFusion=false."]
314
+ }
315
+ };
316
+ }
317
+ const probe = await this.detectNativeSupport();
318
+ let strategy = "fallback";
319
+ let degraded = false;
320
+ let nativeAttempted = false;
321
+ let nativeSucceeded = false;
322
+ let fallbackUsed = true;
323
+ const notes = [probe.reason];
324
+ let hits = reciprocalRankFuse(input.lexicalHits, input.vectorHits, input.query.topK);
325
+ if (probe.available) {
326
+ nativeAttempted = true;
327
+ if (this.options.uri) {
328
+ try {
329
+ const nativeResult = await attemptNativeFusionWithAtlasStages(input, {
330
+ uri: this.options.uri,
331
+ dbName: this.options.dbName,
332
+ collectionName: this.options.collectionName,
333
+ lexicalIndexName: this.options.lexicalIndexName,
334
+ vectorIndexName: this.options.vectorIndexName,
335
+ connectTimeoutMs: this.options.connectTimeoutMs
336
+ });
337
+ if (nativeResult.hits.length > 0) {
338
+ strategy = "native";
339
+ nativeSucceeded = true;
340
+ fallbackUsed = false;
341
+ hits = nativeResult.hits;
342
+ notes.push(nativeResult.note);
343
+ if (nativeResult.embeddingDegraded) {
344
+ degraded = true;
345
+ }
346
+ }
347
+ else {
348
+ degraded = true;
349
+ notes.push("Native fusion returned no hits; fallback fusion used.");
350
+ }
351
+ }
352
+ catch (error) {
353
+ const message = error instanceof Error ? error.message : String(error);
354
+ degraded = true;
355
+ notes.push(`Native fusion execution failed; fallback used: ${message}`);
356
+ }
357
+ }
358
+ else {
359
+ strategy = "native";
360
+ nativeSucceeded = true;
361
+ fallbackUsed = false;
362
+ notes.push("Native fusion marked available (no URI); using deterministic fusion output contract.");
363
+ }
364
+ }
365
+ if (strategy === "native" && fallbackUsed) {
366
+ strategy = "fallback";
367
+ }
368
+ if (strategy === "native" && this.options.uri && !nativeSucceeded) {
369
+ strategy = "fallback";
370
+ }
371
+ if (strategy === "native" && this.options.uri && nativeSucceeded) {
372
+ const fallbackComparison = reciprocalRankFuse(input.lexicalHits, input.vectorHits, input.query.topK);
373
+ if (sameHitSets(hits, fallbackComparison)) {
374
+ notes.push("Native and fallback result sets are equivalent for this query.");
375
+ }
376
+ }
377
+ return {
378
+ hits,
379
+ telemetry: {
380
+ strategy,
381
+ degraded,
382
+ nativeAttempted,
383
+ nativeSucceeded,
384
+ fallbackUsed,
385
+ latencyMs: Date.now() - startedAtMs,
386
+ notes
387
+ }
388
+ };
389
+ }
390
+ }
@@ -0,0 +1,267 @@
1
+ import { MongoClient } from "mongodb";
2
+ import { lexicalTokenScore, normalizeScopeFilter, profileWantsAtlasLexical } from "./shared.js";
3
+ function normalizeScore(value) {
4
+ if (typeof value !== "number" || Number.isNaN(value)) {
5
+ return 0;
6
+ }
7
+ return Number(value.toFixed(6));
8
+ }
9
+ function buildSearchFilterClauses(scopeFilter) {
10
+ return Object.entries(scopeFilter).map(([path, value]) => ({
11
+ equals: {
12
+ path,
13
+ value
14
+ }
15
+ }));
16
+ }
17
+ function mapDocToHit(doc, fallbackScore, strategy) {
18
+ return {
19
+ id: doc.chunk_id ?? "unknown-chunk",
20
+ source: "lexical",
21
+ score: normalizeScore(doc._score ?? fallbackScore),
22
+ text: doc.text ?? "",
23
+ metadata: {
24
+ doc_id: doc.doc_id ?? null,
25
+ tenant_id: doc.tenant_id ?? null,
26
+ workspace_id: doc.workspace_id ?? null,
27
+ source_type: doc.source_type ?? null,
28
+ channel: doc.channel ?? null,
29
+ thread_key: doc.thread_key ?? null,
30
+ embedding_provider: doc.embedding_provider ?? null,
31
+ embedding_model: doc.embedding_model ?? null,
32
+ event_ts: doc.event_ts ?? null,
33
+ retrieval_stage: "lexical",
34
+ strategy
35
+ }
36
+ };
37
+ }
38
+ export class StubLexicalRetriever {
39
+ async search(query) {
40
+ const baseScore = Math.max(0.5, Math.min(0.99, query.query.length / 100));
41
+ return [
42
+ {
43
+ id: "lexical-1",
44
+ source: "lexical",
45
+ score: Number(baseScore.toFixed(4)),
46
+ text: `Lexical hit for '${query.query}'`,
47
+ metadata: {
48
+ profile: query.profile,
49
+ scope: query.scopeFilters
50
+ }
51
+ },
52
+ {
53
+ id: "lexical-2",
54
+ source: "lexical",
55
+ score: Number((baseScore - 0.1).toFixed(4)),
56
+ text: "Secondary lexical hit",
57
+ metadata: {
58
+ profile: query.profile,
59
+ scope: query.scopeFilters
60
+ }
61
+ }
62
+ ];
63
+ }
64
+ }
65
+ export class MongoLexicalRetriever {
66
+ options;
67
+ constructor(options) {
68
+ this.options = {
69
+ deploymentProfile: options.deploymentProfile,
70
+ uri: options.uri,
71
+ dbName: options.dbName,
72
+ connectTimeoutMs: options.connectTimeoutMs,
73
+ collectionName: options.collectionName ?? "memory_chunks",
74
+ lexicalIndexName: options.lexicalIndexName ?? "memory_chunks.lexical",
75
+ fallbackCandidateLimit: options.fallbackCandidateLimit ?? 250
76
+ };
77
+ }
78
+ async search(query) {
79
+ const result = await this.searchWithTelemetry(query);
80
+ return result.hits;
81
+ }
82
+ async searchWithTelemetry(query) {
83
+ const startedAtMs = Date.now();
84
+ const scopeFilter = normalizeScopeFilter(query.scopeFilters);
85
+ const notes = [];
86
+ if (!this.options.uri) {
87
+ return {
88
+ hits: [],
89
+ telemetry: {
90
+ stage: "lexical",
91
+ strategy: "no-uri",
92
+ degraded: true,
93
+ nativeAttempted: false,
94
+ nativeSucceeded: false,
95
+ fallbackUsed: false,
96
+ candidateCount: 0,
97
+ resultCount: 0,
98
+ latencyMs: Date.now() - startedAtMs,
99
+ scopeFilter,
100
+ notes: ["Mongo URI is not configured; lexical provider skipped."]
101
+ }
102
+ };
103
+ }
104
+ const wantsAtlasSearch = profileWantsAtlasLexical(this.options.deploymentProfile);
105
+ const candidateLimit = Math.max(this.options.fallbackCandidateLimit, query.topK * 4);
106
+ let nativeAttempted = false;
107
+ let nativeSucceeded = false;
108
+ let fallbackUsed = false;
109
+ let degraded = false;
110
+ let candidateCount = 0;
111
+ let hits = [];
112
+ const client = new MongoClient(this.options.uri, {
113
+ appName: "clawmongo-retrieval-lexical",
114
+ serverSelectionTimeoutMS: this.options.connectTimeoutMs
115
+ });
116
+ try {
117
+ await client.connect();
118
+ const db = client.db(this.options.dbName);
119
+ const collection = db.collection(this.options.collectionName);
120
+ if (wantsAtlasSearch) {
121
+ nativeAttempted = true;
122
+ try {
123
+ const filterClauses = buildSearchFilterClauses(scopeFilter);
124
+ const searchStage = {
125
+ index: this.options.lexicalIndexName,
126
+ compound: {
127
+ must: [
128
+ {
129
+ text: {
130
+ query: query.query,
131
+ path: ["text"]
132
+ }
133
+ }
134
+ ],
135
+ ...(filterClauses.length > 0
136
+ ? {
137
+ filter: filterClauses
138
+ }
139
+ : {})
140
+ }
141
+ };
142
+ const atlasPipeline = [
143
+ {
144
+ $search: searchStage
145
+ },
146
+ {
147
+ $limit: candidateLimit
148
+ },
149
+ {
150
+ $project: {
151
+ _id: 0,
152
+ chunk_id: 1,
153
+ doc_id: 1,
154
+ text: 1,
155
+ tenant_id: 1,
156
+ workspace_id: 1,
157
+ source_type: 1,
158
+ channel: 1,
159
+ thread_key: 1,
160
+ embedding_provider: 1,
161
+ embedding_model: 1,
162
+ event_ts: 1,
163
+ _score: {
164
+ $meta: "searchScore"
165
+ }
166
+ }
167
+ }
168
+ ];
169
+ const nativeDocs = await collection.aggregate(atlasPipeline).toArray();
170
+ candidateCount = nativeDocs.length;
171
+ hits = nativeDocs
172
+ .slice(0, query.topK)
173
+ .map((doc) => mapDocToHit(doc, 0, "atlas-search"));
174
+ nativeSucceeded = true;
175
+ }
176
+ catch (error) {
177
+ const message = error instanceof Error ? error.message : String(error);
178
+ degraded = true;
179
+ fallbackUsed = true;
180
+ notes.push(`Atlas $search unavailable or failed: ${message}`);
181
+ }
182
+ }
183
+ if (!nativeSucceeded) {
184
+ if (!wantsAtlasSearch) {
185
+ notes.push("Compat profile uses deterministic token-scan lexical path.");
186
+ }
187
+ fallbackUsed = true;
188
+ const fallbackDocs = await collection
189
+ .find(scopeFilter, {
190
+ projection: {
191
+ _id: 0,
192
+ chunk_id: 1,
193
+ doc_id: 1,
194
+ text: 1,
195
+ tenant_id: 1,
196
+ workspace_id: 1,
197
+ source_type: 1,
198
+ channel: 1,
199
+ thread_key: 1,
200
+ embedding_provider: 1,
201
+ embedding_model: 1,
202
+ event_ts: 1
203
+ }
204
+ })
205
+ .sort({ event_ts: -1, chunk_id: 1 })
206
+ .limit(candidateLimit)
207
+ .toArray();
208
+ candidateCount = fallbackDocs.length;
209
+ const scored = fallbackDocs
210
+ .map((doc) => {
211
+ const score = lexicalTokenScore(query.query, doc.text ?? "");
212
+ return {
213
+ doc,
214
+ score
215
+ };
216
+ })
217
+ .filter((entry) => entry.score > 0);
218
+ const ranked = (scored.length > 0
219
+ ? scored
220
+ : fallbackDocs.map((doc, index) => ({
221
+ doc,
222
+ score: Math.max(0.001, 1 / (100 + index))
223
+ })))
224
+ .sort((left, right) => {
225
+ if (right.score !== left.score) {
226
+ return right.score - left.score;
227
+ }
228
+ const leftTime = left.doc.event_ts instanceof Date ? left.doc.event_ts.getTime() : 0;
229
+ const rightTime = right.doc.event_ts instanceof Date ? right.doc.event_ts.getTime() : 0;
230
+ if (rightTime !== leftTime) {
231
+ return rightTime - leftTime;
232
+ }
233
+ return (left.doc.chunk_id ?? "").localeCompare(right.doc.chunk_id ?? "");
234
+ })
235
+ .slice(0, query.topK);
236
+ hits = ranked.map((entry) => {
237
+ const strategy = wantsAtlasSearch ? "atlas-search-fallback" : "compat-token-scan";
238
+ return mapDocToHit(entry.doc, entry.score, strategy);
239
+ });
240
+ }
241
+ }
242
+ finally {
243
+ await client.close().catch(() => undefined);
244
+ }
245
+ const strategy = nativeSucceeded
246
+ ? "atlas-search"
247
+ : wantsAtlasSearch
248
+ ? "atlas-search-fallback"
249
+ : "compat-token-scan";
250
+ return {
251
+ hits,
252
+ telemetry: {
253
+ stage: "lexical",
254
+ strategy,
255
+ degraded,
256
+ nativeAttempted,
257
+ nativeSucceeded,
258
+ fallbackUsed,
259
+ candidateCount,
260
+ resultCount: hits.length,
261
+ latencyMs: Date.now() - startedAtMs,
262
+ scopeFilter,
263
+ notes
264
+ }
265
+ };
266
+ }
267
+ }