@rce-mcp/retrieval-core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,60 @@
1
1
  import { afterEach, describe, expect, it, vi } from "vitest";
2
- import { OpenAICompatibleEmbeddingProvider, RetrievalError } from "../src/index.js";
2
+ import {
3
+ LocalProviderRequestLimiter,
4
+ OpenAICompatibleEmbeddingProvider,
5
+ OpenAICompatibleRerankerProvider,
6
+ ProviderRateLimitExceededError,
7
+ RetrievalError
8
+ } from "../src/index.js";
9
+
10
+ describe("provider request limiter", () => {
11
+ it("waits for token refill in local mode", async () => {
12
+ let now = 0;
13
+ const limiter = new LocalProviderRequestLimiter({
14
+ now: () => now,
15
+ sleeper: async (ms) => {
16
+ now += ms;
17
+ }
18
+ });
19
+
20
+ await limiter.acquire({
21
+ scope: "provider:test|credential:a",
22
+ max_requests_per_minute: 1,
23
+ max_wait_ms: 0
24
+ });
25
+
26
+ const acquired = await limiter.acquire({
27
+ scope: "provider:test|credential:a",
28
+ max_requests_per_minute: 1,
29
+ max_wait_ms: 60_000
30
+ });
31
+ expect(acquired.wait_ms).toBe(60_000);
32
+ });
33
+
34
+ it("fails when wait budget is exhausted", async () => {
35
+ let now = 0;
36
+ const limiter = new LocalProviderRequestLimiter({
37
+ now: () => now,
38
+ sleeper: async (ms) => {
39
+ now += ms;
40
+ }
41
+ });
42
+
43
+ await limiter.acquire({
44
+ scope: "provider:test|credential:a",
45
+ max_requests_per_minute: 1,
46
+ max_wait_ms: 0
47
+ });
48
+
49
+ await expect(
50
+ limiter.acquire({
51
+ scope: "provider:test|credential:a",
52
+ max_requests_per_minute: 1,
53
+ max_wait_ms: 1_000
54
+ })
55
+ ).rejects.toBeInstanceOf(ProviderRateLimitExceededError);
56
+ });
57
+ });
3
58
 
4
59
  describe("openai-compatible embedding provider", () => {
5
60
  const originalFetch = globalThis.fetch;
@@ -67,6 +122,191 @@ describe("openai-compatible embedding provider", () => {
67
122
  expect(calls).toBe(1);
68
123
  });
69
124
 
125
+ it("retries transient 403 responses and succeeds", async () => {
126
+ let calls = 0;
127
+ globalThis.fetch = vi.fn(async () => {
128
+ calls += 1;
129
+ if (calls === 1) {
130
+ return new Response(
131
+ JSON.stringify({
132
+ error: {
133
+ message: "openai_error",
134
+ type: "bad_response_status_code"
135
+ }
136
+ }),
137
+ { status: 403 }
138
+ );
139
+ }
140
+ return new Response(
141
+ JSON.stringify({
142
+ data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }]
143
+ }),
144
+ { status: 200 }
145
+ );
146
+ }) as typeof fetch;
147
+
148
+ const provider = new OpenAICompatibleEmbeddingProvider({
149
+ base_url: "https://router.tumuer.me/v1",
150
+ api_key: "test-key",
151
+ dimensions: 3,
152
+ max_retries: 1
153
+ });
154
+
155
+ const vectors = await provider.embed({
156
+ texts: ["hello"],
157
+ purpose: "query"
158
+ });
159
+
160
+ expect(vectors).toEqual([[0.1, 0.2, 0.3]]);
161
+ expect(calls).toBe(2);
162
+ });
163
+
164
+ it("allows transient 403 retries beyond generic max_retries", async () => {
165
+ let calls = 0;
166
+ globalThis.fetch = vi.fn(async () => {
167
+ calls += 1;
168
+ if (calls <= 2) {
169
+ return new Response(
170
+ JSON.stringify({
171
+ error: {
172
+ message: "openai_error",
173
+ type: "bad_response_status_code"
174
+ }
175
+ }),
176
+ { status: 403 }
177
+ );
178
+ }
179
+ return new Response(
180
+ JSON.stringify({
181
+ data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }]
182
+ }),
183
+ { status: 200 }
184
+ );
185
+ }) as typeof fetch;
186
+
187
+ const provider = new OpenAICompatibleEmbeddingProvider({
188
+ base_url: "https://router.tumuer.me/v1",
189
+ api_key: "test-key",
190
+ dimensions: 3,
191
+ max_retries: 0
192
+ });
193
+
194
+ const vectors = await provider.embed({
195
+ texts: ["hello"],
196
+ purpose: "query"
197
+ });
198
+
199
+ expect(vectors).toEqual([[0.1, 0.2, 0.3]]);
200
+ expect(calls).toBe(3);
201
+ });
202
+
203
+ it("does not retry explicit invalid api key 403 failures", async () => {
204
+ let calls = 0;
205
+ globalThis.fetch = vi.fn(async () => {
206
+ calls += 1;
207
+ return new Response(
208
+ JSON.stringify({
209
+ error: {
210
+ message: "Invalid API key provided"
211
+ }
212
+ }),
213
+ { status: 403 }
214
+ );
215
+ }) as typeof fetch;
216
+
217
+ const provider = new OpenAICompatibleEmbeddingProvider({
218
+ base_url: "https://router.tumuer.me/v1",
219
+ api_key: "test-key",
220
+ dimensions: 3,
221
+ max_retries: 4
222
+ });
223
+
224
+ await expect(
225
+ provider.embed({
226
+ texts: ["hello"],
227
+ purpose: "query"
228
+ })
229
+ ).rejects.toMatchObject({
230
+ code: "UPSTREAM_FAILURE"
231
+ } satisfies Partial<RetrievalError>);
232
+ expect(calls).toBe(1);
233
+ });
234
+
235
+ it("does not retry ambiguous 403 failures without transient signals", async () => {
236
+ let calls = 0;
237
+ globalThis.fetch = vi.fn(async () => {
238
+ calls += 1;
239
+ return new Response(
240
+ JSON.stringify({
241
+ error: {
242
+ message: "forbidden"
243
+ }
244
+ }),
245
+ { status: 403 }
246
+ );
247
+ }) as typeof fetch;
248
+
249
+ const provider = new OpenAICompatibleEmbeddingProvider({
250
+ base_url: "https://router.tumuer.me/v1",
251
+ api_key: "test-key",
252
+ dimensions: 3,
253
+ max_retries: 4
254
+ });
255
+
256
+ await expect(
257
+ provider.embed({
258
+ texts: ["hello"],
259
+ purpose: "query"
260
+ })
261
+ ).rejects.toMatchObject({
262
+ code: "UPSTREAM_FAILURE"
263
+ } satisfies Partial<RetrievalError>);
264
+ expect(calls).toBe(1);
265
+ });
266
+
267
+ it("retries 403 failures when retry-after is present", async () => {
268
+ let calls = 0;
269
+ globalThis.fetch = vi.fn(async () => {
270
+ calls += 1;
271
+ if (calls === 1) {
272
+ return new Response(
273
+ JSON.stringify({
274
+ error: {
275
+ message: "forbidden"
276
+ }
277
+ }),
278
+ {
279
+ status: 403,
280
+ headers: {
281
+ "retry-after": "0"
282
+ }
283
+ }
284
+ );
285
+ }
286
+ return new Response(
287
+ JSON.stringify({
288
+ data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }]
289
+ }),
290
+ { status: 200 }
291
+ );
292
+ }) as typeof fetch;
293
+
294
+ const provider = new OpenAICompatibleEmbeddingProvider({
295
+ base_url: "https://router.tumuer.me/v1",
296
+ api_key: "test-key",
297
+ dimensions: 3,
298
+ max_retries: 1
299
+ });
300
+
301
+ const vectors = await provider.embed({
302
+ texts: ["hello"],
303
+ purpose: "query"
304
+ });
305
+
306
+ expect(vectors).toEqual([[0.1, 0.2, 0.3]]);
307
+ expect(calls).toBe(2);
308
+ });
309
+
70
310
  it("rejects non-numeric vectors", async () => {
71
311
  globalThis.fetch = vi.fn(async () => {
72
312
  return new Response(
@@ -118,4 +358,213 @@ describe("openai-compatible embedding provider", () => {
118
358
  code: "UPSTREAM_FAILURE"
119
359
  } satisfies Partial<RetrievalError>);
120
360
  });
361
+
362
+ it("maps local limiter saturation to RATE_LIMITED before calling upstream", async () => {
363
+ let calls = 0;
364
+ globalThis.fetch = vi.fn(async () => {
365
+ calls += 1;
366
+ return new Response(
367
+ JSON.stringify({
368
+ data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }]
369
+ }),
370
+ { status: 200 }
371
+ );
372
+ }) as typeof fetch;
373
+
374
+ const limiter = new LocalProviderRequestLimiter();
375
+ const provider = new OpenAICompatibleEmbeddingProvider({
376
+ base_url: "https://router.tumuer.me/v1",
377
+ api_key: "test-key",
378
+ dimensions: 3,
379
+ request_limiter: limiter,
380
+ max_requests_per_minute: 1,
381
+ query_max_wait_ms: 0
382
+ });
383
+
384
+ await provider.embed({
385
+ texts: ["hello"],
386
+ purpose: "query"
387
+ });
388
+
389
+ await expect(
390
+ provider.embed({
391
+ texts: ["world"],
392
+ purpose: "query"
393
+ })
394
+ ).rejects.toMatchObject({
395
+ code: "RATE_LIMITED"
396
+ } satisfies Partial<RetrievalError>);
397
+ expect(calls).toBe(1);
398
+ });
399
+
400
+ it("retries index embeddings after transient local limiter saturation", async () => {
401
+ let calls = 0;
402
+ globalThis.fetch = vi.fn(async () => {
403
+ calls += 1;
404
+ return new Response(
405
+ JSON.stringify({
406
+ data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }]
407
+ }),
408
+ { status: 200 }
409
+ );
410
+ }) as typeof fetch;
411
+
412
+ const requestLimiter = {
413
+ mode: "local" as const,
414
+ acquire: vi
415
+ .fn()
416
+ .mockRejectedValueOnce(new ProviderRateLimitExceededError("transient local limit", 1))
417
+ .mockResolvedValue({ wait_ms: 0 })
418
+ };
419
+
420
+ const provider = new OpenAICompatibleEmbeddingProvider({
421
+ base_url: "https://router.tumuer.me/v1",
422
+ api_key: "test-key",
423
+ dimensions: 3,
424
+ request_limiter: requestLimiter,
425
+ max_requests_per_minute: 1,
426
+ index_max_wait_ms: 0
427
+ });
428
+
429
+ const vectors = await provider.embed({
430
+ texts: ["hello"],
431
+ purpose: "index"
432
+ });
433
+
434
+ expect(vectors).toEqual([[0.1, 0.2, 0.3]]);
435
+ expect(requestLimiter.acquire).toHaveBeenCalledTimes(2);
436
+ expect(calls).toBe(1);
437
+ });
438
+
439
+ it("keeps query limiter saturation fail-fast", async () => {
440
+ let calls = 0;
441
+ globalThis.fetch = vi.fn(async () => {
442
+ calls += 1;
443
+ return new Response(
444
+ JSON.stringify({
445
+ data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }]
446
+ }),
447
+ { status: 200 }
448
+ );
449
+ }) as typeof fetch;
450
+
451
+ const requestLimiter = {
452
+ mode: "local" as const,
453
+ acquire: vi.fn().mockRejectedValueOnce(new ProviderRateLimitExceededError("transient local limit", 1))
454
+ };
455
+
456
+ const provider = new OpenAICompatibleEmbeddingProvider({
457
+ base_url: "https://router.tumuer.me/v1",
458
+ api_key: "test-key",
459
+ dimensions: 3,
460
+ request_limiter: requestLimiter,
461
+ max_requests_per_minute: 1,
462
+ query_max_wait_ms: 0
463
+ });
464
+
465
+ await expect(
466
+ provider.embed({
467
+ texts: ["hello"],
468
+ purpose: "query"
469
+ })
470
+ ).rejects.toMatchObject({
471
+ code: "RATE_LIMITED"
472
+ } satisfies Partial<RetrievalError>);
473
+
474
+ expect(requestLimiter.acquire).toHaveBeenCalledTimes(1);
475
+ expect(calls).toBe(0);
476
+ });
477
+
478
+ it("maps upstream 429 exhaustion to RATE_LIMITED", async () => {
479
+ let calls = 0;
480
+ globalThis.fetch = vi.fn(async () => {
481
+ calls += 1;
482
+ return new Response(JSON.stringify({ error: "rate limit" }), { status: 429 });
483
+ }) as typeof fetch;
484
+
485
+ const provider = new OpenAICompatibleEmbeddingProvider({
486
+ base_url: "https://router.tumuer.me/v1",
487
+ api_key: "test-key",
488
+ dimensions: 3,
489
+ max_retries: 1
490
+ });
491
+
492
+ await expect(
493
+ provider.embed({
494
+ texts: ["hello"],
495
+ purpose: "query"
496
+ })
497
+ ).rejects.toMatchObject({
498
+ code: "RATE_LIMITED"
499
+ } satisfies Partial<RetrievalError>);
500
+ expect(calls).toBe(2);
501
+ });
502
+ });
503
+
504
+ describe("openai-compatible reranker provider", () => {
505
+ const originalFetch = globalThis.fetch;
506
+
507
+ afterEach(() => {
508
+ globalThis.fetch = originalFetch;
509
+ vi.restoreAllMocks();
510
+ });
511
+
512
+ it("returns ranked indexes from reranker response", async () => {
513
+ globalThis.fetch = vi.fn(async () => {
514
+ return new Response(
515
+ JSON.stringify({
516
+ results: [
517
+ { index: 2, relevance_score: 0.91 },
518
+ { index: 0, relevance_score: 0.82 },
519
+ { index: 1, relevance_score: 0.73 }
520
+ ]
521
+ }),
522
+ { status: 200 }
523
+ );
524
+ }) as typeof fetch;
525
+
526
+ const provider = new OpenAICompatibleRerankerProvider({
527
+ base_url: "https://router.tumuer.me/v1",
528
+ api_key: "test-key",
529
+ model: "Qwen/Qwen3-Reranker-4B",
530
+ timeout_ms: 200
531
+ });
532
+
533
+ const ranked = await provider.rerank({
534
+ query: "tenant auth",
535
+ documents: ["a", "b", "c"],
536
+ top_n: 2
537
+ });
538
+
539
+ expect(ranked).toEqual([
540
+ { index: 2, relevance_score: 0.91 },
541
+ { index: 0, relevance_score: 0.82 }
542
+ ]);
543
+ });
544
+
545
+ it("enforces timeout on stalled reranker requests", async () => {
546
+ globalThis.fetch = vi.fn((_, init) => {
547
+ return new Promise((_, reject) => {
548
+ init?.signal?.addEventListener("abort", () => {
549
+ reject(new DOMException("The operation was aborted.", "AbortError"));
550
+ });
551
+ });
552
+ }) as typeof fetch;
553
+
554
+ const provider = new OpenAICompatibleRerankerProvider({
555
+ base_url: "https://router.tumuer.me/v1",
556
+ api_key: "test-key",
557
+ timeout_ms: 10
558
+ });
559
+
560
+ await expect(
561
+ provider.rerank({
562
+ query: "tenant auth",
563
+ documents: ["a", "b"],
564
+ top_n: 2
565
+ })
566
+ ).rejects.toMatchObject({
567
+ message: expect.stringMatching(/timed out/i)
568
+ });
569
+ });
121
570
  });