@farukada/langchain-ts-rms 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +446 -0
  3. package/dist/app/freshness/evaluator.d.ts +23 -0
  4. package/dist/app/freshness/evaluator.d.ts.map +1 -0
  5. package/dist/app/freshness/evaluator.js +72 -0
  6. package/dist/app/freshness/evaluator.js.map +1 -0
  7. package/dist/app/governance/guardrails.d.ts +51 -0
  8. package/dist/app/governance/guardrails.d.ts.map +1 -0
  9. package/dist/app/governance/guardrails.js +68 -0
  10. package/dist/app/governance/guardrails.js.map +1 -0
  11. package/dist/app/graph/workflow.d.ts +1159 -0
  12. package/dist/app/graph/workflow.d.ts.map +1 -0
  13. package/dist/app/graph/workflow.js +468 -0
  14. package/dist/app/graph/workflow.js.map +1 -0
  15. package/dist/app/queryPlanning/planner.d.ts +18 -0
  16. package/dist/app/queryPlanning/planner.d.ts.map +1 -0
  17. package/dist/app/queryPlanning/planner.js +80 -0
  18. package/dist/app/queryPlanning/planner.js.map +1 -0
  19. package/dist/app/queryRewriting/rewriter.d.ts +32 -0
  20. package/dist/app/queryRewriting/rewriter.d.ts.map +1 -0
  21. package/dist/app/queryRewriting/rewriter.js +111 -0
  22. package/dist/app/queryRewriting/rewriter.js.map +1 -0
  23. package/dist/app/reranking/reranker.d.ts +27 -0
  24. package/dist/app/reranking/reranker.d.ts.map +1 -0
  25. package/dist/app/reranking/reranker.js +67 -0
  26. package/dist/app/reranking/reranker.js.map +1 -0
  27. package/dist/app/state/schema.d.ts +121 -0
  28. package/dist/app/state/schema.d.ts.map +1 -0
  29. package/dist/app/state/schema.js +88 -0
  30. package/dist/app/state/schema.js.map +1 -0
  31. package/dist/app/summarization/summarizationSchema.d.ts +34 -0
  32. package/dist/app/summarization/summarizationSchema.d.ts.map +1 -0
  33. package/dist/app/summarization/summarizationSchema.js +65 -0
  34. package/dist/app/summarization/summarizationSchema.js.map +1 -0
  35. package/dist/app/summarization/summarizer.d.ts +51 -0
  36. package/dist/app/summarization/summarizer.d.ts.map +1 -0
  37. package/dist/app/summarization/summarizer.js +181 -0
  38. package/dist/app/summarization/summarizer.js.map +1 -0
  39. package/dist/app/summarization/synthesisSchema.d.ts +16 -0
  40. package/dist/app/summarization/synthesisSchema.d.ts.map +1 -0
  41. package/dist/app/summarization/synthesisSchema.js +43 -0
  42. package/dist/app/summarization/synthesisSchema.js.map +1 -0
  43. package/dist/app/summarization/synthesizer.d.ts +21 -0
  44. package/dist/app/summarization/synthesizer.d.ts.map +1 -0
  45. package/dist/app/summarization/synthesizer.js +86 -0
  46. package/dist/app/summarization/synthesizer.js.map +1 -0
  47. package/dist/config/env.d.ts +49 -0
  48. package/dist/config/env.d.ts.map +1 -0
  49. package/dist/config/env.js +54 -0
  50. package/dist/config/env.js.map +1 -0
  51. package/dist/domain/contracts.d.ts +59 -0
  52. package/dist/domain/contracts.d.ts.map +1 -0
  53. package/dist/domain/contracts.js +52 -0
  54. package/dist/domain/contracts.js.map +1 -0
  55. package/dist/domain/ports.d.ts +63 -0
  56. package/dist/domain/ports.d.ts.map +1 -0
  57. package/dist/domain/ports.js +2 -0
  58. package/dist/domain/ports.js.map +1 -0
  59. package/dist/domain/researchUtils.d.ts +51 -0
  60. package/dist/domain/researchUtils.d.ts.map +1 -0
  61. package/dist/domain/researchUtils.js +85 -0
  62. package/dist/domain/researchUtils.js.map +1 -0
  63. package/dist/infra/chat/chatModelProvider.d.ts +4 -0
  64. package/dist/infra/chat/chatModelProvider.d.ts.map +1 -0
  65. package/dist/infra/chat/chatModelProvider.js +13 -0
  66. package/dist/infra/chat/chatModelProvider.js.map +1 -0
  67. package/dist/infra/checkpoint/checkpointerFactory.d.ts +38 -0
  68. package/dist/infra/checkpoint/checkpointerFactory.d.ts.map +1 -0
  69. package/dist/infra/checkpoint/checkpointerFactory.js +54 -0
  70. package/dist/infra/checkpoint/checkpointerFactory.js.map +1 -0
  71. package/dist/infra/content/contentExtractor.d.ts +58 -0
  72. package/dist/infra/content/contentExtractor.d.ts.map +1 -0
  73. package/dist/infra/content/contentExtractor.js +296 -0
  74. package/dist/infra/content/contentExtractor.js.map +1 -0
  75. package/dist/infra/embeddings/embeddingProvider.d.ts +4 -0
  76. package/dist/infra/embeddings/embeddingProvider.d.ts.map +1 -0
  77. package/dist/infra/embeddings/embeddingProvider.js +11 -0
  78. package/dist/infra/embeddings/embeddingProvider.js.map +1 -0
  79. package/dist/infra/healthCheck.d.ts +23 -0
  80. package/dist/infra/healthCheck.d.ts.map +1 -0
  81. package/dist/infra/healthCheck.js +57 -0
  82. package/dist/infra/healthCheck.js.map +1 -0
  83. package/dist/infra/observability/tokenCounter.d.ts +30 -0
  84. package/dist/infra/observability/tokenCounter.d.ts.map +1 -0
  85. package/dist/infra/observability/tokenCounter.js +46 -0
  86. package/dist/infra/observability/tokenCounter.js.map +1 -0
  87. package/dist/infra/observability/tracing.d.ts +38 -0
  88. package/dist/infra/observability/tracing.d.ts.map +1 -0
  89. package/dist/infra/observability/tracing.js +92 -0
  90. package/dist/infra/observability/tracing.js.map +1 -0
  91. package/dist/infra/rateLimit/circuitBreaker.d.ts +54 -0
  92. package/dist/infra/rateLimit/circuitBreaker.d.ts.map +1 -0
  93. package/dist/infra/rateLimit/circuitBreaker.js +97 -0
  94. package/dist/infra/rateLimit/circuitBreaker.js.map +1 -0
  95. package/dist/infra/rateLimit/rateLimiter.d.ts +42 -0
  96. package/dist/infra/rateLimit/rateLimiter.d.ts.map +1 -0
  97. package/dist/infra/rateLimit/rateLimiter.js +89 -0
  98. package/dist/infra/rateLimit/rateLimiter.js.map +1 -0
  99. package/dist/infra/search/searxngClient.d.ts +29 -0
  100. package/dist/infra/search/searxngClient.d.ts.map +1 -0
  101. package/dist/infra/search/searxngClient.js +85 -0
  102. package/dist/infra/search/searxngClient.js.map +1 -0
  103. package/dist/infra/search/urlBlocklist.d.ts +28 -0
  104. package/dist/infra/search/urlBlocklist.d.ts.map +1 -0
  105. package/dist/infra/search/urlBlocklist.js +78 -0
  106. package/dist/infra/search/urlBlocklist.js.map +1 -0
  107. package/dist/infra/vector/qdrantClient.d.ts +18 -0
  108. package/dist/infra/vector/qdrantClient.d.ts.map +1 -0
  109. package/dist/infra/vector/qdrantClient.js +82 -0
  110. package/dist/infra/vector/qdrantClient.js.map +1 -0
  111. package/dist/infra/vector/researchRepository.d.ts +39 -0
  112. package/dist/infra/vector/researchRepository.d.ts.map +1 -0
  113. package/dist/infra/vector/researchRepository.js +294 -0
  114. package/dist/infra/vector/researchRepository.js.map +1 -0
  115. package/dist/lib/helpers.d.ts +50 -0
  116. package/dist/lib/helpers.d.ts.map +1 -0
  117. package/dist/lib/helpers.js +124 -0
  118. package/dist/lib/helpers.js.map +1 -0
  119. package/dist/lib/index.d.ts +65 -0
  120. package/dist/lib/index.d.ts.map +1 -0
  121. package/dist/lib/index.js +61 -0
  122. package/dist/lib/index.js.map +1 -0
  123. package/dist/lib/rmsTool.d.ts +28 -0
  124. package/dist/lib/rmsTool.d.ts.map +1 -0
  125. package/dist/lib/rmsTool.js +79 -0
  126. package/dist/lib/rmsTool.js.map +1 -0
  127. package/dist/lib/schemas/lifecycleSchemas.d.ts +42 -0
  128. package/dist/lib/schemas/lifecycleSchemas.d.ts.map +1 -0
  129. package/dist/lib/schemas/lifecycleSchemas.js +176 -0
  130. package/dist/lib/schemas/lifecycleSchemas.js.map +1 -0
  131. package/dist/lib/schemas/researchSchemas.d.ts +23 -0
  132. package/dist/lib/schemas/researchSchemas.d.ts.map +1 -0
  133. package/dist/lib/schemas/researchSchemas.js +83 -0
  134. package/dist/lib/schemas/researchSchemas.js.map +1 -0
  135. package/dist/lib/tools/deleteResearch.d.ts +19 -0
  136. package/dist/lib/tools/deleteResearch.d.ts.map +1 -0
  137. package/dist/lib/tools/deleteResearch.js +37 -0
  138. package/dist/lib/tools/deleteResearch.js.map +1 -0
  139. package/dist/lib/tools/getDatetime.d.ts +7 -0
  140. package/dist/lib/tools/getDatetime.d.ts.map +1 -0
  141. package/dist/lib/tools/getDatetime.js +26 -0
  142. package/dist/lib/tools/getDatetime.js.map +1 -0
  143. package/dist/lib/tools/getResearch.d.ts +19 -0
  144. package/dist/lib/tools/getResearch.d.ts.map +1 -0
  145. package/dist/lib/tools/getResearch.js +32 -0
  146. package/dist/lib/tools/getResearch.js.map +1 -0
  147. package/dist/lib/tools/listResearch.d.ts +25 -0
  148. package/dist/lib/tools/listResearch.d.ts.map +1 -0
  149. package/dist/lib/tools/listResearch.js +41 -0
  150. package/dist/lib/tools/listResearch.js.map +1 -0
  151. package/dist/lib/tools/refreshResearch.d.ts +27 -0
  152. package/dist/lib/tools/refreshResearch.d.ts.map +1 -0
  153. package/dist/lib/tools/refreshResearch.js +81 -0
  154. package/dist/lib/tools/refreshResearch.js.map +1 -0
  155. package/dist/lib/tools/research.d.ts +108 -0
  156. package/dist/lib/tools/research.d.ts.map +1 -0
  157. package/dist/lib/tools/research.js +273 -0
  158. package/dist/lib/tools/research.js.map +1 -0
  159. package/dist/lib/tools/searchResearch.d.ts +25 -0
  160. package/dist/lib/tools/searchResearch.d.ts.map +1 -0
  161. package/dist/lib/tools/searchResearch.js +45 -0
  162. package/dist/lib/tools/searchResearch.js.map +1 -0
  163. package/dist/lib/types.d.ts +51 -0
  164. package/dist/lib/types.d.ts.map +1 -0
  165. package/dist/lib/types.js +2 -0
  166. package/dist/lib/types.js.map +1 -0
  167. package/dist/mcp/index.d.ts +12 -0
  168. package/dist/mcp/index.d.ts.map +1 -0
  169. package/dist/mcp/index.js +12 -0
  170. package/dist/mcp/index.js.map +1 -0
  171. package/dist/mcp/server.d.ts +45 -0
  172. package/dist/mcp/server.d.ts.map +1 -0
  173. package/dist/mcp/server.js +440 -0
  174. package/dist/mcp/server.js.map +1 -0
  175. package/package.json +132 -0
@@ -0,0 +1,54 @@
1
+ /** Circuit breaker states. */
2
+ type CircuitState = "closed" | "open" | "half_open";
3
+ /** Configuration for a CircuitBreaker instance. */
4
+ export interface CircuitBreakerOptions {
5
+ /** Number of consecutive failures before tripping to OPEN. Default: 5. */
6
+ failureThreshold: number;
7
+ /** Milliseconds to wait in OPEN before transitioning to HALF_OPEN. Default: 30000. */
8
+ resetTimeMs: number;
9
+ /** Human-readable name for logging. */
10
+ name: string;
11
+ }
12
+ /**
13
+ * In-process circuit breaker — no external dependencies.
14
+ *
15
+ * Three states:
16
+ * - **CLOSED**: Normal operation. Failures are counted.
17
+ * - **OPEN**: Fast-fail. After `failureThreshold` consecutive failures, all calls
18
+ * are rejected immediately without executing `fn`.
19
+ * - **HALF_OPEN**: After `resetTimeMs`, one probe request is allowed through.
20
+ * On success → CLOSED. On failure → OPEN again.
21
+ *
22
+ * @example
23
+ * ```ts
24
+ * const breaker = new CircuitBreaker({ failureThreshold: 5, resetTimeMs: 30_000, name: "searxng" });
25
+ * const result = await breaker.execute(() => fetch(url));
26
+ * ```
27
+ */
28
+ export declare class CircuitBreaker {
29
+ private readonly opts;
30
+ private _state;
31
+ private consecutiveFailures;
32
+ private lastFailureTime;
33
+ constructor(opts: CircuitBreakerOptions);
34
+ /** Current state (for diagnostics/testing). */
35
+ get state(): CircuitState;
36
+ /** Wraps an async operation with circuit breaker logic. */
37
+ execute<T>(fn: () => Promise<T>): Promise<T>;
38
+ private onSuccess;
39
+ private onFailure;
40
+ /** Reset the breaker to CLOSED state (for testing). */
41
+ reset(): void;
42
+ }
43
+ /**
44
+ * SearxNG circuit breaker: trips after 5 consecutive failures, 30s cooldown.
45
+ * Prevents burning time on a down meta-search engine.
46
+ */
47
+ export declare const searchBreaker: CircuitBreaker;
48
+ /**
49
+ * Content extraction circuit breaker: trips after 8 failures, 60s cooldown.
50
+ * Higher threshold because individual sites fail independently.
51
+ */
52
+ export declare const contentBreaker: CircuitBreaker;
53
+ export {};
54
+ //# sourceMappingURL=circuitBreaker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"circuitBreaker.d.ts","sourceRoot":"","sources":["../../../src/infra/rateLimit/circuitBreaker.ts"],"names":[],"mappings":"AAMA,8BAA8B;AAC9B,KAAK,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;AAEpD,mDAAmD;AACnD,MAAM,WAAW,qBAAqB;IACpC,0EAA0E;IAC1E,gBAAgB,EAAE,MAAM,CAAC;IACzB,sFAAsF;IACtF,WAAW,EAAE,MAAM,CAAC;IACpB,uCAAuC;IACvC,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;;;;;GAeG;AACH,qBAAa,cAAc;IAKb,OAAO,CAAC,QAAQ,CAAC,IAAI;IAJjC,OAAO,CAAC,MAAM,CAA0B;IACxC,OAAO,CAAC,mBAAmB,CAAK;IAChC,OAAO,CAAC,eAAe,CAAK;gBAEC,IAAI,EAAE,qBAAqB;IAExD,+CAA+C;IAC/C,IAAI,KAAK,IAAI,YAAY,CAExB;IAED,2DAA2D;IACrD,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;IAyBlD,OAAO,CAAC,SAAS;IAQjB,OAAO,CAAC,SAAS;IAYjB,uDAAuD;IACvD,KAAK,IAAI,IAAI;CAKd;AAMD;;;GAGG;AACH,eAAO,MAAM,aAAa,gBAIxB,CAAC;AAEH;;;GAGG;AACH,eAAO,MAAM,cAAc,gBAIzB,CAAC"}
@@ -0,0 +1,97 @@
1
+ import { logWarn, logInfo } from "../observability/tracing.js";
2
+ /**
3
+ * In-process circuit breaker — no external dependencies.
4
+ *
5
+ * Three states:
6
+ * - **CLOSED**: Normal operation. Failures are counted.
7
+ * - **OPEN**: Fast-fail. After `failureThreshold` consecutive failures, all calls
8
+ * are rejected immediately without executing `fn`.
9
+ * - **HALF_OPEN**: After `resetTimeMs`, one probe request is allowed through.
10
+ * On success → CLOSED. On failure → OPEN again.
11
+ *
12
+ * @example
13
+ * ```ts
14
+ * const breaker = new CircuitBreaker({ failureThreshold: 5, resetTimeMs: 30_000, name: "searxng" });
15
+ * const result = await breaker.execute(() => fetch(url));
16
+ * ```
17
+ */
18
+ export class CircuitBreaker {
19
+ opts;
20
+ _state = "closed";
21
+ consecutiveFailures = 0;
22
+ lastFailureTime = 0;
23
+ constructor(opts) {
24
+ this.opts = opts;
25
+ }
26
+ /** Current state (for diagnostics/testing). */
27
+ get state() {
28
+ return this._state;
29
+ }
30
+ /** Wraps an async operation with circuit breaker logic. */
31
+ async execute(fn) {
32
+ if (this._state === "open") {
33
+ // Check if enough time has passed to try a probe
34
+ if (Date.now() - this.lastFailureTime >= this.opts.resetTimeMs) {
35
+ this._state = "half_open";
36
+ logInfo(`Circuit breaker "${this.opts.name}" → HALF_OPEN (probe allowed)`);
37
+ }
38
+ else {
39
+ throw new Error(`Circuit breaker "${this.opts.name}" is OPEN — rejecting request. ` +
40
+ `${this.consecutiveFailures} consecutive failures. ` +
41
+ `Will retry after ${this.opts.resetTimeMs}ms cooldown.`);
42
+ }
43
+ }
44
+ try {
45
+ const result = await fn();
46
+ this.onSuccess();
47
+ return result;
48
+ }
49
+ catch (err) {
50
+ this.onFailure();
51
+ throw err;
52
+ }
53
+ }
54
+ onSuccess() {
55
+ if (this._state === "half_open") {
56
+ logInfo(`Circuit breaker "${this.opts.name}" → CLOSED (probe succeeded)`);
57
+ }
58
+ this._state = "closed";
59
+ this.consecutiveFailures = 0;
60
+ }
61
+ onFailure() {
62
+ this.consecutiveFailures++;
63
+ this.lastFailureTime = Date.now();
64
+ if (this.consecutiveFailures >= this.opts.failureThreshold) {
65
+ this._state = "open";
66
+ logWarn(`Circuit breaker "${this.opts.name}" → OPEN after ${this.consecutiveFailures} failures`);
67
+ }
68
+ }
69
+ /** Reset the breaker to CLOSED state (for testing). */
70
+ reset() {
71
+ this._state = "closed";
72
+ this.consecutiveFailures = 0;
73
+ this.lastFailureTime = 0;
74
+ }
75
+ }
76
+ // ---------------------------------------------------------------------------
77
+ // Pre-configured breakers for RMS infrastructure services
78
+ // ---------------------------------------------------------------------------
79
+ /**
80
+ * SearxNG circuit breaker: trips after 5 consecutive failures, 30s cooldown.
81
+ * Prevents burning time on a down meta-search engine.
82
+ */
83
+ export const searchBreaker = new CircuitBreaker({
84
+ failureThreshold: 5,
85
+ resetTimeMs: 30_000,
86
+ name: "searxng",
87
+ });
88
+ /**
89
+ * Content extraction circuit breaker: trips after 8 failures, 60s cooldown.
90
+ * Higher threshold because individual sites fail independently.
91
+ */
92
+ export const contentBreaker = new CircuitBreaker({
93
+ failureThreshold: 8,
94
+ resetTimeMs: 60_000,
95
+ name: "content-extraction",
96
+ });
97
+ //# sourceMappingURL=circuitBreaker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"circuitBreaker.js","sourceRoot":"","sources":["../../../src/infra/rateLimit/circuitBreaker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,6BAA6B,CAAC;AAmB/D;;;;;;;;;;;;;;;GAeG;AACH,MAAM,OAAO,cAAc;IAKI;IAJrB,MAAM,GAAiB,QAAQ,CAAC;IAChC,mBAAmB,GAAG,CAAC,CAAC;IACxB,eAAe,GAAG,CAAC,CAAC;IAE5B,YAA6B,IAA2B;QAA3B,SAAI,GAAJ,IAAI,CAAuB;IAAG,CAAC;IAE5D,+CAA+C;IAC/C,IAAI,KAAK;QACP,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED,2DAA2D;IAC3D,KAAK,CAAC,OAAO,CAAI,EAAoB;QACnC,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC3B,iDAAiD;YACjD,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;gBAC/D,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC;gBAC1B,OAAO,CAAC,oBAAoB,IAAI,CAAC,IAAI,CAAC,IAAI,+BAA+B,CAAC,CAAC;YAC7E,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,KAAK,CACb,oBAAoB,IAAI,CAAC,IAAI,CAAC,IAAI,iCAAiC;oBACjE,GAAG,IAAI,CAAC,mBAAmB,yBAAyB;oBACpD,oBAAoB,IAAI,CAAC,IAAI,CAAC,WAAW,cAAc,CAC1D,CAAC;YACJ,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,EAAE,EAAE,CAAC;YAC1B,IAAI,CAAC,SAAS,EAAE,CAAC;YACjB,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,SAAS,EAAE,CAAC;YACjB,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,SAAS;QACf,IAAI,IAAI,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YAChC,OAAO,CAAC,oBAAoB,IAAI,CAAC,IAAI,CAAC,IAAI,8BAA8B,CAAC,CAAC;QAC5E,CAAC;QACD,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC;QACvB,IAAI,CAAC,mBAAmB,GAAG,CAAC,CAAC;IAC/B,CAAC;IAEO,SAAS;QACf,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAC3B,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAElC,IAAI,IAAI,CAAC,mBAAmB,IAAI,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC3D,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;YACrB,OAAO,CACL,oBAAoB,IAAI,CAAC,IAAI,CAAC,IAAI,kBAAkB,IAAI,CAAC,mBAAmB,WAAW,CACxF,CAAC;QACJ,CAAC;IACH,CAAC;IAED,uDAAuD;IACvD,KAAK;QACH,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC;QACvB,IAAI,CAAC,mBAAmB,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;IAC3B,CAAC;CACF;AAED,8EAA8E;AAC9E,0DAA0D;AAC1D,8EAA8E;AAE9E;;;GAGG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,IAAI,cAAc,CAAC;IAC9C,gBAAgB,EAAE,CAAC;IACnB,WAAW,EAAE,MAAM;IACnB,IAAI,EAAE,SAAS;CAChB,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,cAAc,CAAC;IAC/C,gBAAgB,EAAE,CAAC;IACnB,WAAW,EAAE,MAAM;IACnB,IAAI,EAAE,oBAAoB;CAC3B,CAAC,CAAC"}
@@ -0,0 +1,42 @@
1
+ /**
2
+ * In-process token bucket rate limiter — no external dependencies.
3
+ *
4
+ * Allows short bursts up to `maxTokens` while maintaining a steady-state
5
+ * throughput of `refillRatePerSecond`. Callers that exceed the budget are
6
+ * queued and resolved in FIFO order.
7
+ *
8
+ * @example
9
+ * ```ts
10
+ * const limiter = new TokenBucketLimiter(5, 2); // 5 burst, 2/sec refill
11
+ * await limiter.acquire(); // wait until a token is available
12
+ * ```
13
+ */
14
+ export declare class TokenBucketLimiter {
15
+ private readonly maxTokens;
16
+ private readonly refillRatePerSecond;
17
+ private tokens;
18
+ private lastRefill;
19
+ private readonly waitQueue;
20
+ constructor(maxTokens: number, refillRatePerSecond: number);
21
+ /** Refill tokens based on elapsed time since last refill. */
22
+ private refill;
23
+ /** Wait until a token is available, then consume it. */
24
+ acquire(): Promise<void>;
25
+ /** Current number of available tokens (for testing/diagnostics). */
26
+ get availableTokens(): number;
27
+ }
28
+ /** Creates a new search rate limiter instance (for DI or testing). */
29
+ export declare function createSearchLimiter(): TokenBucketLimiter;
30
+ /** Creates a new content extraction rate limiter instance (for DI or testing). */
31
+ export declare function createContentLimiter(): TokenBucketLimiter;
32
+ /**
33
+ * SearxNG rate limiter: 5 burst, 2/sec refill.
34
+ * Prevents overwhelming the meta-search engine with rapid concurrent queries.
35
+ */
36
+ export declare const searchLimiter: TokenBucketLimiter;
37
+ /**
38
+ * Content extraction rate limiter: 3 burst, 1/sec refill.
39
+ * Prevents target websites from IP-blocking the research bot.
40
+ */
41
+ export declare const contentLimiter: TokenBucketLimiter;
42
+ //# sourceMappingURL=rateLimiter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rateLimiter.d.ts","sourceRoot":"","sources":["../../../src/infra/rateLimit/rateLimiter.ts"],"names":[],"mappings":"AAMA;;;;;;;;;;;;GAYG;AACH,qBAAa,kBAAkB;IAM3B,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,mBAAmB;IANtC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAyB;gBAGhC,SAAS,EAAE,MAAM,EACjB,mBAAmB,EAAE,MAAM;IAM9C,6DAA6D;IAC7D,OAAO,CAAC,MAAM;IAQd,wDAAwD;IAClD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IA2B9B,oEAAoE;IACpE,IAAI,eAAe,IAAI,MAAM,CAG5B;CACF;AAMD,sEAAsE;AACtE,wBAAgB,mBAAmB,IAAI,kBAAkB,CAExD;AAED,kFAAkF;AAClF,wBAAgB,oBAAoB,IAAI,kBAAkB,CAEzD;AAED;;;GAGG;AACH,eAAO,MAAM,aAAa,oBAAwB,CAAC;AAEnD;;;GAGG;AACH,eAAO,MAAM,cAAc,oBAAyB,CAAC"}
@@ -0,0 +1,89 @@
1
+ import { logDebug } from "../observability/tracing.js";
2
+ // ---------------------------------------------------------------------------
3
+ // Token Bucket Rate Limiter
4
+ // ---------------------------------------------------------------------------
5
+ /**
6
+ * In-process token bucket rate limiter — no external dependencies.
7
+ *
8
+ * Allows short bursts up to `maxTokens` while maintaining a steady-state
9
+ * throughput of `refillRatePerSecond`. Callers that exceed the budget are
10
+ * queued and resolved in FIFO order.
11
+ *
12
+ * @example
13
+ * ```ts
14
+ * const limiter = new TokenBucketLimiter(5, 2); // 5 burst, 2/sec refill
15
+ * await limiter.acquire(); // wait until a token is available
16
+ * ```
17
+ */
18
+ export class TokenBucketLimiter {
19
+ maxTokens;
20
+ refillRatePerSecond;
21
+ tokens;
22
+ lastRefill;
23
+ waitQueue = [];
24
+ constructor(maxTokens, refillRatePerSecond) {
25
+ this.maxTokens = maxTokens;
26
+ this.refillRatePerSecond = refillRatePerSecond;
27
+ this.tokens = maxTokens;
28
+ this.lastRefill = Date.now();
29
+ }
30
+ /** Refill tokens based on elapsed time since last refill. */
31
+ refill() {
32
+ const now = Date.now();
33
+ const elapsed = (now - this.lastRefill) / 1000;
34
+ const newTokens = elapsed * this.refillRatePerSecond;
35
+ this.tokens = Math.min(this.maxTokens, this.tokens + newTokens);
36
+ this.lastRefill = now;
37
+ }
38
+ /** Wait until a token is available, then consume it. */
39
+ async acquire() {
40
+ this.refill();
41
+ if (this.tokens >= 1) {
42
+ this.tokens -= 1;
43
+ return;
44
+ }
45
+ // Wait for a token to become available
46
+ const waitMs = ((1 - this.tokens) / this.refillRatePerSecond) * 1000;
47
+ logDebug("Rate limiter queued", { waitMs: Math.round(waitMs) });
48
+ return new Promise((resolve) => {
49
+ this.waitQueue.push(resolve);
50
+ setTimeout(() => {
51
+ this.refill();
52
+ if (this.tokens >= 1) {
53
+ this.tokens -= 1;
54
+ }
55
+ // Resolve the first queued waiter
56
+ const waiter = this.waitQueue.shift();
57
+ if (waiter)
58
+ waiter();
59
+ }, waitMs);
60
+ });
61
+ }
62
+ /** Current number of available tokens (for testing/diagnostics). */
63
+ get availableTokens() {
64
+ this.refill();
65
+ return Math.floor(this.tokens);
66
+ }
67
+ }
68
+ // ---------------------------------------------------------------------------
69
+ // Pre-configured limiters for RMS infrastructure services
70
+ // ---------------------------------------------------------------------------
71
+ /** Creates a new search rate limiter instance (for DI or testing). */
72
+ export function createSearchLimiter() {
73
+ return new TokenBucketLimiter(5, 2);
74
+ }
75
+ /** Creates a new content extraction rate limiter instance (for DI or testing). */
76
+ export function createContentLimiter() {
77
+ return new TokenBucketLimiter(3, 1);
78
+ }
79
+ /**
80
+ * SearxNG rate limiter: 5 burst, 2/sec refill.
81
+ * Prevents overwhelming the meta-search engine with rapid concurrent queries.
82
+ */
83
+ export const searchLimiter = createSearchLimiter();
84
+ /**
85
+ * Content extraction rate limiter: 3 burst, 1/sec refill.
86
+ * Prevents target websites from IP-blocking the research bot.
87
+ */
88
+ export const contentLimiter = createContentLimiter();
89
+ //# sourceMappingURL=rateLimiter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rateLimiter.js","sourceRoot":"","sources":["../../../src/infra/rateLimit/rateLimiter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,6BAA6B,CAAC;AAEvD,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E;;;;;;;;;;;;GAYG;AACH,MAAM,OAAO,kBAAkB;IAMV;IACA;IANX,MAAM,CAAS;IACf,UAAU,CAAS;IACV,SAAS,GAAsB,EAAE,CAAC;IAEnD,YACmB,SAAiB,EACjB,mBAA2B;QAD3B,cAAS,GAAT,SAAS,CAAQ;QACjB,wBAAmB,GAAnB,mBAAmB,CAAQ;QAE5C,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;QACxB,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC/B,CAAC;IAED,6DAA6D;IACrD,MAAM;QACZ,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC;QAC/C,MAAM,SAAS,GAAG,OAAO,GAAG,IAAI,CAAC,mBAAmB,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;QAChE,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC;IACxB,CAAC;IAED,wDAAwD;IACxD,KAAK,CAAC,OAAO;QACX,IAAI,CAAC,MAAM,EAAE,CAAC;QAEd,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;YACjB,OAAO;QACT,CAAC;QAED,uCAAuC;QACvC,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,mBAAmB,CAAC,GAAG,IAAI,CAAC;QACrE,QAAQ,CAAC,qBAAqB,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAEhE,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;YACnC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAE7B,UAAU,CAAC,GAAG,EAAE;gBACd,IAAI,CAAC,MAAM,EAAE,CAAC;gBACd,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;oBACrB,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;gBACnB,CAAC;gBACD,kCAAkC;gBAClC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;gBACtC,IAAI,MAAM;oBAAE,MAAM,EAAE,CAAC;YACvB,CAAC,EAAE,MAAM,CAAC,CAAC;QACb,CAAC,CAAC,CAAC;IACL,CAAC;IAED,oEAAoE;IACpE,IAAI,eAAe;QACjB,IAAI,CAAC,MAAM,EAAE,CAAC;QACd,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACjC,CAAC;CACF;AAED,8EAA8E;AAC9E,0DAA0D;AAC1D,8EAA8E;AAE9E,sEAAsE;AACtE,MAAM,UAAU,mBAAmB;IACjC,OAAO,IAAI,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACtC,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,oBAAoB;IAClC,OAAO,IAAI,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACtC,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,mBAAmB,EAAE,CAAC;AAEnD;;;GAGG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG,oBAAoB,EAAE,CAAC"}
@@ -0,0 +1,29 @@
1
+ import { type TokenBucketLimiter } from "../rateLimit/rateLimiter.js";
2
+ import { type CircuitBreaker } from "../rateLimit/circuitBreaker.js";
3
+ export interface SearxngSearchResult {
4
+ title: string;
5
+ url: string;
6
+ snippet: string;
7
+ engine: string;
8
+ }
9
+ /**
10
+ * Performs a web search via the SearxNG JSON API and returns typed results.
11
+ *
12
+ * This calls the SearxNG `/search` endpoint directly via `fetch()` instead
13
+ * of going through LangChain's `SearxngSearch.invoke()`, which has multiple
14
+ * issues:
15
+ * - Returns comma-separated JSON objects (not a valid JSON array)
16
+ * - Uses `link` instead of `url` as the key, losing URL info during parsing
17
+ * - Hardcoded 5s AbortSignal that kills slow engines
18
+ * - Sends POST with content-type JSON while SearxNG expects GET
19
+ *
20
+ * The API base URL is always read from `SEARXNG_API_BASE` env var.
21
+ */
22
+ export declare function performSearch(query: string, options?: {
23
+ numResults?: number;
24
+ /** Override the default search rate limiter (for DI/testing). */
25
+ limiter?: TokenBucketLimiter;
26
+ /** Override the default search circuit breaker (for DI/testing). */
27
+ breaker?: CircuitBreaker;
28
+ }): Promise<SearxngSearchResult[]>;
29
+ //# sourceMappingURL=searxngClient.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"searxngClient.d.ts","sourceRoot":"","sources":["../../../src/infra/search/searxngClient.ts"],"names":[],"mappings":"AAEA,OAAO,EAAiB,KAAK,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACrF,OAAO,EAAiB,KAAK,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAGpF,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB;AA0BD;;;;;;;;;;;;GAYG;AACH,wBAAsB,aAAa,CACjC,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;IACR,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,OAAO,CAAC,EAAE,kBAAkB,CAAC;IAC7B,oEAAoE;IACpE,OAAO,CAAC,EAAE,cAAc,CAAC;CAC1B,GACA,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAsEhC"}
@@ -0,0 +1,85 @@
1
+ import { loadEnv } from "../../config/env.js";
2
+ import { logInfo, logWarn, logError, ErrorCodes } from "../observability/tracing.js";
3
+ import { searchLimiter } from "../rateLimit/rateLimiter.js";
4
+ import { searchBreaker } from "../rateLimit/circuitBreaker.js";
5
+ import { filterBlockedUrls } from "./urlBlocklist.js";
6
+ /** Default request timeout for SearxNG API calls (ms). */
7
+ const SEARXNG_TIMEOUT_MS = 15_000;
8
+ /**
9
+ * Performs a web search via the SearxNG JSON API and returns typed results.
10
+ *
11
+ * This calls the SearxNG `/search` endpoint directly via `fetch()` instead
12
+ * of going through LangChain's `SearxngSearch.invoke()`, which has multiple
13
+ * issues:
14
+ * - Returns comma-separated JSON objects (not a valid JSON array)
15
+ * - Uses `link` instead of `url` as the key, losing URL info during parsing
16
+ * - Hardcoded 5s AbortSignal that kills slow engines
17
+ * - Sends POST with content-type JSON while SearxNG expects GET
18
+ *
19
+ * The API base URL is always read from `SEARXNG_API_BASE` env var.
20
+ */
21
+ export async function performSearch(query, options) {
22
+ const env = loadEnv();
23
+ const apiBase = env.SEARXNG_API_BASE;
24
+ const limit = options?.numResults ?? 10;
25
+ const limiter = options?.limiter ?? searchLimiter;
26
+ const breaker = options?.breaker ?? searchBreaker;
27
+ logInfo("SearXNG search", { query, numResults: limit });
28
+ // Build query parameters for the SearxNG JSON API
29
+ const params = new URLSearchParams({
30
+ q: query,
31
+ format: "json",
32
+ });
33
+ if (env.SEARXNG_ENGINES)
34
+ params.set("engines", env.SEARXNG_ENGINES);
35
+ if (env.SEARXNG_LANGUAGE)
36
+ params.set("language", env.SEARXNG_LANGUAGE);
37
+ if (env.SEARXNG_TIME_RANGE)
38
+ params.set("time_range", env.SEARXNG_TIME_RANGE);
39
+ const url = `${apiBase}/search?${params.toString()}`;
40
+ return breaker.execute(async () => {
41
+ try {
42
+ await limiter.acquire();
43
+ const resp = await fetch(url, {
44
+ method: "GET",
45
+ signal: AbortSignal.timeout(SEARXNG_TIMEOUT_MS),
46
+ });
47
+ if (!resp.ok) {
48
+ throw new Error(`SearxNG returned HTTP ${resp.status}: ${resp.statusText}`);
49
+ }
50
+ const data = (await resp.json());
51
+ // Log unresponsive engines for diagnostics
52
+ if (data.unresponsive_engines?.length) {
53
+ logWarn("SearxNG unresponsive engines", {
54
+ engines: data.unresponsive_engines
55
+ .map(([name, reason]) => `${name}: ${reason}`)
56
+ .join(", "),
57
+ });
58
+ }
59
+ // Map native SearxNG results to our typed interface
60
+ const rawResults = data.results.slice(0, limit).map((r) => ({
61
+ title: r.title ?? "",
62
+ url: r.url ?? "",
63
+ snippet: r.content ?? "",
64
+ engine: r.engine ?? r.engines?.[0] ?? "unknown",
65
+ }));
66
+ // Remove spam/commercial URLs before they consume result slots
67
+ const results = filterBlockedUrls(rawResults, (r) => r.url);
68
+ logInfo("SearXNG results", {
69
+ query,
70
+ resultCount: results.length,
71
+ totalResults: data.number_of_results,
72
+ answersCount: data.answers?.length ?? 0,
73
+ });
74
+ return results;
75
+ }
76
+ catch (err) {
77
+ logError("SearXNG search failed", {
78
+ errorCode: ErrorCodes.SEARCH_FAILED,
79
+ error: err instanceof Error ? err.message : String(err),
80
+ });
81
+ throw err;
82
+ }
83
+ });
84
+ }
85
+ //# sourceMappingURL=searxngClient.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"searxngClient.js","sourceRoot":"","sources":["../../../src/infra/search/searxngClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,6BAA6B,CAAC;AACrF,OAAO,EAAE,aAAa,EAA2B,MAAM,6BAA6B,CAAC;AACrF,OAAO,EAAE,aAAa,EAAuB,MAAM,gCAAgC,CAAC;AACpF,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AA8BtD,0DAA0D;AAC1D,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAElC;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAa,EACb,OAMC;IAED,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;IACtB,MAAM,OAAO,GAAG,GAAG,CAAC,gBAAgB,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,EAAE,UAAU,IAAI,EAAE,CAAC;IACxC,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,aAAa,CAAC;IAClD,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,aAAa,CAAC;IAElD,OAAO,CAAC,gBAAgB,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;IAExD,kDAAkD;IAClD,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;QACjC,CAAC,EAAE,KAAK;QACR,MAAM,EAAE,MAAM;KACf,CAAC,CAAC;IACH,IAAI,GAAG,CAAC,eAAe;QAAE,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,eAAe,CAAC,CAAC;IACpE,IAAI,GAAG,CAAC,gBAAgB;QAAE,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,gBAAgB,CAAC,CAAC;IACvE,IAAI,GAAG,CAAC,kBAAkB;QAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAE7E,MAAM,GAAG,GAAG,GAAG,OAAO,WAAW,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;IAErD,OAAO,OAAO,CAAC,OAAO,CAAC,KAAK,IAAI,EAAE;QAChC,IAAI,CAAC;YACH,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;YACxB,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAC5B,MAAM,EAAE,KAAK;gBACb,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,kBAAkB,CAAC;aAChD,CAAC,CAAC;YAEH,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;gBACb,MAAM,IAAI,KAAK,CAAC,yBAAyB,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;YAC9E,CAAC;YAED,MAAM,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,IAAI,EAAE,CAAuB,CAAC;YAEvD,2CAA2C;YAC3C,IAAI,IAAI,CAAC,oBAAoB,EAAE,MAAM,EAAE,CAAC;gBACtC,OAAO,CAAC,8BAA8B,EAAE;oBACtC,OAAO,EAAE,IAAI,CAAC,oBAAoB;yBAC/B,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,EAAE,CAAC,GAAG,IAAI,KAAK,MAAM,EAAE,CAAC;yBAC7C,IAAI,CAAC,IAAI,CAAC;iBACd,CAAC,CAAC;YACL,CAAC;YAED,oDAAoD;YACpD,MAAM,UAAU,GAA0B,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACjF,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;gBACpB,GAAG,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE;gBAChB,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE;gBACxB,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,IAAI,SAAS;aAChD,CAAC,CAAC,CAAC;YAEJ,+DAA+D;YAC/D,MAAM,OAAO,GAAG,iBAAiB,CAAC,UAAU,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAE5D,OAAO,CAAC,iBAAiB,EAAE;gBACzB,KAAK;gBACL,WAAW,EAAE,OAAO,CAAC,MAAM;gBAC3B,YAAY,EAAE,IAAI,CAAC,iBAAiB;gBACpC,YAAY,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,IAAI,CAAC;aACxC,CAAC,CAAC;YAEH,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,QAAQ,CAAC,uBAAuB,EAAE;gBAChC,SAAS,EAAE,UAAU,CAAC,aAAa;gBACnC,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACxD,CAAC,CAAC;YACH,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Hardcoded list of domains that never produce useful technical content.
3
+ * Matching is done on hostname suffix, so `samsclub.com` also blocks
4
+ * `shop.samsclub.com`.
5
+ */
6
+ export declare const DEFAULT_BLOCKED_DOMAINS: readonly string[];
7
+ /**
8
+ * Returns the merged blocklist: hardcoded defaults + user-supplied domains
9
+ * from the `SEARXNG_URL_BLOCKLIST` env var (comma-separated).
10
+ */
11
+ export declare function getBlockedDomains(): readonly string[];
12
+ /**
13
+ * Checks whether a URL's hostname matches any blocked domain.
14
+ * Uses suffix matching so blocking `example.com` also blocks `shop.example.com`.
15
+ *
16
+ * @returns `true` if the URL should be filtered out.
17
+ */
18
+ export declare function isBlockedUrl(url: string, blocklist?: readonly string[]): boolean;
19
+ /**
20
+ * Filters out items whose URL matches the blocklist and logs the count of
21
+ * removed items for diagnostics.
22
+ *
23
+ * @param items - Array of items to filter
24
+ * @param getUrl - Accessor to extract the URL from each item
25
+ * @returns Filtered array with blocked items removed
26
+ */
27
+ export declare function filterBlockedUrls<T>(items: readonly T[], getUrl: (item: T) => string): T[];
28
+ //# sourceMappingURL=urlBlocklist.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"urlBlocklist.d.ts","sourceRoot":"","sources":["../../../src/infra/search/urlBlocklist.ts"],"names":[],"mappings":"AAGA;;;;GAIG;AACH,eAAO,MAAM,uBAAuB,EAAE,SAAS,MAAM,EAgBpD,CAAC;AAEF;;;GAGG;AACH,wBAAgB,iBAAiB,IAAI,SAAS,MAAM,EAAE,CAWrD;AAED;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAWhF;AAED;;;;;;;GAOG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,EAAE,EAAE,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,MAAM,GAAG,CAAC,EAAE,CAa1F"}
@@ -0,0 +1,78 @@
1
+ import { loadEnv } from "../../config/env.js";
2
+ import { logInfo } from "../observability/tracing.js";
3
+ /**
4
+ * Hardcoded list of domains that never produce useful technical content.
5
+ * Matching is done on hostname suffix, so `samsclub.com` also blocks
6
+ * `shop.samsclub.com`.
7
+ */
8
+ export const DEFAULT_BLOCKED_DOMAINS = [
9
+ "advanceautoparts.com",
10
+ "samsclub.com",
11
+ "walmart.com",
12
+ "target.com",
13
+ "ebay.com",
14
+ "amazon.com",
15
+ "aliexpress.com",
16
+ "wish.com",
17
+ "etsy.com",
18
+ "bestbuy.com",
19
+ "homedepot.com",
20
+ "lowes.com",
21
+ "costco.com",
22
+ "wayfair.com",
23
+ "overstock.com",
24
+ ];
25
+ /**
26
+ * Returns the merged blocklist: hardcoded defaults + user-supplied domains
27
+ * from the `SEARXNG_URL_BLOCKLIST` env var (comma-separated).
28
+ */
29
+ export function getBlockedDomains() {
30
+ const env = loadEnv();
31
+ const extra = env.SEARXNG_URL_BLOCKLIST;
32
+ if (!extra)
33
+ return DEFAULT_BLOCKED_DOMAINS;
34
+ const userDomains = extra
35
+ .split(",")
36
+ .map((d) => d.trim().toLowerCase())
37
+ .filter(Boolean);
38
+ return [...DEFAULT_BLOCKED_DOMAINS, ...userDomains];
39
+ }
40
+ /**
41
+ * Checks whether a URL's hostname matches any blocked domain.
42
+ * Uses suffix matching so blocking `example.com` also blocks `shop.example.com`.
43
+ *
44
+ * @returns `true` if the URL should be filtered out.
45
+ */
46
+ export function isBlockedUrl(url, blocklist) {
47
+ const domains = blocklist ?? getBlockedDomains();
48
+ let hostname;
49
+ try {
50
+ hostname = new URL(url).hostname.toLowerCase();
51
+ }
52
+ catch {
53
+ // Malformed URLs are not blocked — let downstream handle them
54
+ return false;
55
+ }
56
+ return domains.some((blocked) => hostname === blocked || hostname.endsWith(`.${blocked}`));
57
+ }
58
+ /**
59
+ * Filters out items whose URL matches the blocklist and logs the count of
60
+ * removed items for diagnostics.
61
+ *
62
+ * @param items - Array of items to filter
63
+ * @param getUrl - Accessor to extract the URL from each item
64
+ * @returns Filtered array with blocked items removed
65
+ */
66
+ export function filterBlockedUrls(items, getUrl) {
67
+ const blocklist = getBlockedDomains();
68
+ const filtered = items.filter((item) => !isBlockedUrl(getUrl(item), blocklist));
69
+ const removedCount = items.length - filtered.length;
70
+ if (removedCount > 0) {
71
+ logInfo("Blocked URLs filtered from search results", {
72
+ removedCount,
73
+ remainingCount: filtered.length,
74
+ });
75
+ }
76
+ return filtered;
77
+ }
78
+ //# sourceMappingURL=urlBlocklist.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"urlBlocklist.js","sourceRoot":"","sources":["../../../src/infra/search/urlBlocklist.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,MAAM,6BAA6B,CAAC;AAEtD;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAsB;IACxD,sBAAsB;IACtB,cAAc;IACd,aAAa;IACb,YAAY;IACZ,UAAU;IACV,YAAY;IACZ,gBAAgB;IAChB,UAAU;IACV,UAAU;IACV,aAAa;IACb,eAAe;IACf,WAAW;IACX,YAAY;IACZ,aAAa;IACb,eAAe;CAChB,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,iBAAiB;IAC/B,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;IACtB,MAAM,KAAK,GAAG,GAAG,CAAC,qBAAqB,CAAC;IACxC,IAAI,CAAC,KAAK;QAAE,OAAO,uBAAuB,CAAC;IAE3C,MAAM,WAAW,GAAG,KAAK;SACtB,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAClC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,OAAO,CAAC,GAAG,uBAAuB,EAAE,GAAG,WAAW,CAAC,CAAC;AACtD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,GAAW,EAAE,SAA6B;IACrE,MAAM,OAAO,GAAG,SAAS,IAAI,iBAAiB,EAAE,CAAC;IACjD,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,8DAA8D;QAC9D,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,QAAQ,KAAK,OAAO,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC,CAAC;AAC7F,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,iBAAiB,CAAI,KAAmB,EAAE,MAA2B;IACnF,MAAM,SAAS,GAAG,iBAAiB,EAAE,CAAC;IACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC;IAChF,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAEpD,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,CAAC,2CAA2C,EAAE;YACnD,YAAY;YACZ,cAAc,EAAE,QAAQ,CAAC,MAAM;SAChC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,18 @@
1
+ import { QdrantClient } from "@qdrant/qdrant-js";
2
+ export declare const RESEARCH_COLLECTION = "rms_research";
3
+ export interface QdrantClientConfig {
4
+ url: string;
5
+ apiKey?: string;
6
+ /** Request timeout in milliseconds. Default: 10000 (10s). */
7
+ timeout?: number;
8
+ }
9
+ export declare function createQdrantClient(config?: Partial<QdrantClientConfig>): QdrantClient;
10
+ /**
11
+ * Ensures collections exist and creates payload indexes for filtered search.
12
+ * Idempotent: safe to call on every startup.
13
+ *
14
+ * If an existing collection has different vector dimensions (e.g. after
15
+ * switching embedding models), it is automatically deleted and recreated.
16
+ */
17
+ export declare function bootstrapQdrantCollections(client: QdrantClient, vectorSize: number): Promise<void>;
18
+ //# sourceMappingURL=qdrantClient.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"qdrantClient.d.ts","sourceRoot":"","sources":["../../../src/infra/vector/qdrantClient.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAIjD,eAAO,MAAM,mBAAmB,iBAAiB,CAAC;AAWlD,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6DAA6D;IAC7D,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,kBAAkB,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC,GAAG,YAAY,CASrF;AAED;;;;;;GAMG;AACH,wBAAsB,0BAA0B,CAC9C,MAAM,EAAE,YAAY,EACpB,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,IAAI,CAAC,CAuDf"}
@@ -0,0 +1,82 @@
1
+ import { QdrantClient } from "@qdrant/qdrant-js";
2
+ import { loadEnv } from "../../config/env.js";
3
+ import { logInfo } from "../observability/tracing.js";
4
+ export const RESEARCH_COLLECTION = "rms_research";
5
+ const PAYLOAD_INDEX_FIELDS = [
6
+ { field_name: "metadata.research_id", field_schema: "keyword" },
7
+ { field_name: "metadata.subject", field_schema: "keyword" },
8
+ { field_name: "metadata.status", field_schema: "keyword" },
9
+ { field_name: "metadata.tenant_id", field_schema: "keyword" },
10
+ { field_name: "metadata.updated_at", field_schema: "keyword" },
11
+ { field_name: "metadata.tags", field_schema: "keyword" },
12
+ ];
13
+ export function createQdrantClient(config) {
14
+ const env = loadEnv();
15
+ const opts = {
16
+ url: config?.url ?? env.QDRANT_URL,
17
+ timeout: config?.timeout ?? env.QDRANT_TIMEOUT_MS,
18
+ };
19
+ const apiKey = config?.apiKey ?? env.QDRANT_API_KEY;
20
+ if (apiKey)
21
+ opts.apiKey = apiKey;
22
+ return new QdrantClient(opts);
23
+ }
24
+ /**
25
+ * Ensures collections exist and creates payload indexes for filtered search.
26
+ * Idempotent: safe to call on every startup.
27
+ *
28
+ * If an existing collection has different vector dimensions (e.g. after
29
+ * switching embedding models), it is automatically deleted and recreated.
30
+ */
31
+ export async function bootstrapQdrantCollections(client, vectorSize) {
32
+ const collections = await client.getCollections();
33
+ const names = new Set(collections.collections.map((c) => c.name));
34
+ for (const name of [RESEARCH_COLLECTION]) {
35
+ // If collection exists, validate vector dimensions match
36
+ if (names.has(name)) {
37
+ const info = await client.getCollection(name);
38
+ const existingSize = typeof info.config.params.vectors === "object" && "size" in info.config.params.vectors
39
+ ? info.config.params.vectors.size
40
+ : undefined;
41
+ if (existingSize !== undefined && existingSize !== vectorSize) {
42
+ logInfo(`Collection "${name}" has vector size ${String(existingSize)}, expected ${String(vectorSize)}. Recreating.`);
43
+ await client.deleteCollection(name);
44
+ names.delete(name);
45
+ }
46
+ }
47
+ if (!names.has(name)) {
48
+ try {
49
+ await client.createCollection(name, {
50
+ vectors: {
51
+ size: vectorSize,
52
+ distance: "Cosine",
53
+ },
54
+ });
55
+ }
56
+ catch (e) {
57
+ // Handle TOCTOU race: another process may have created the collection
58
+ // between our getCollections() check and this createCollection() call.
59
+ const msg = e instanceof Error ? e.message : String(e);
60
+ if (!msg.includes("Conflict") && !msg.includes("already exists")) {
61
+ throw e;
62
+ }
63
+ }
64
+ }
65
+ for (const { field_name, field_schema } of PAYLOAD_INDEX_FIELDS) {
66
+ try {
67
+ await client.createPayloadIndex(name, {
68
+ field_name,
69
+ field_schema: { type: field_schema },
70
+ wait: true,
71
+ });
72
+ }
73
+ catch (e) {
74
+ const msg = e instanceof Error ? e.message : String(e);
75
+ if (!msg.includes("already exists") && !msg.includes("AlreadyExists")) {
76
+ throw e;
77
+ }
78
+ }
79
+ }
80
+ }
81
+ }
82
+ //# sourceMappingURL=qdrantClient.js.map