@lacneu/openclaw-knowledge 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +264 -1
  2. package/README.md +131 -0
  3. package/dist/config.d.ts +4 -0
  4. package/dist/config.js +26 -0
  5. package/dist/config.js.map +1 -1
  6. package/dist/index.d.ts +25 -4
  7. package/dist/index.js +295 -46
  8. package/dist/index.js.map +1 -1
  9. package/dist/jina/classifier.d.ts +55 -0
  10. package/dist/jina/classifier.js +170 -0
  11. package/dist/jina/classifier.js.map +1 -0
  12. package/dist/jina/client.d.ts +30 -0
  13. package/dist/jina/client.js +131 -0
  14. package/dist/jina/client.js.map +1 -0
  15. package/dist/jina/errors.d.ts +42 -0
  16. package/dist/jina/errors.js +113 -0
  17. package/dist/jina/errors.js.map +1 -0
  18. package/dist/jina/reranker.d.ts +34 -0
  19. package/dist/jina/reranker.js +95 -0
  20. package/dist/jina/reranker.js.map +1 -0
  21. package/dist/jina/types.d.ts +78 -0
  22. package/dist/jina/types.js +12 -0
  23. package/dist/jina/types.js.map +1 -0
  24. package/dist/pgvector.d.ts +29 -0
  25. package/dist/pgvector.js +68 -0
  26. package/dist/pgvector.js.map +1 -1
  27. package/dist/router/heuristic.d.ts +29 -0
  28. package/dist/router/heuristic.js +104 -0
  29. package/dist/router/heuristic.js.map +1 -0
  30. package/dist/router/index.d.ts +33 -0
  31. package/dist/router/index.js +94 -0
  32. package/dist/router/index.js.map +1 -0
  33. package/dist/router/labels.d.ts +33 -0
  34. package/dist/router/labels.js +67 -0
  35. package/dist/router/labels.js.map +1 -0
  36. package/dist/router/types.d.ts +23 -0
  37. package/dist/router/types.js +7 -0
  38. package/dist/router/types.js.map +1 -0
  39. package/dist/tracing/events.d.ts +83 -0
  40. package/dist/tracing/events.js +86 -0
  41. package/dist/tracing/events.js.map +1 -0
  42. package/dist/types.d.ts +57 -0
  43. package/openclaw.plugin.json +97 -4
  44. package/package.json +3 -3
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Embedding model used as backbone for Classifier zero-shot requests.
3
+ * The Reranker endpoint uses its own model identifiers (see RerankerModel).
4
+ */
5
+ export type ClassifierEmbeddingModel = "jina-embeddings-v3" | "jina-embeddings-v4" | "jina-clip-v2";
6
+ /**
7
+ * Supported Jina reranker models.
8
+ *
9
+ * - `jina-reranker-v2-base-multilingual` (default in this plugin) — trained on
10
+ * 100+ languages, ideal for French content. Context cap: 8 192 tokens.
11
+ * - `jina-reranker-v3` — bigger context (131 K), primarily English-trained.
12
+ * - `jina-reranker-m0` — multimodal.
13
+ * - `jina-colbert-v2` — late-interaction.
14
+ */
15
+ export type RerankerModel = "jina-reranker-v2-base-multilingual" | "jina-reranker-v3" | "jina-reranker-m0" | "jina-colbert-v2" | (string & {});
16
+ /**
17
+ * Input element for the Classifier endpoint. Text-only is what the plugin
18
+ * uses; the API also supports `{image: "..."}` with `jina-clip-v2`, but the
19
+ * router never classifies images.
20
+ */
21
+ export interface ClassifierTextInput {
22
+ text: string;
23
+ }
24
+ /**
25
+ * Zero-shot classification request body for POST /v1/classify.
26
+ *
27
+ * `labels` must contain semantic category strings (the Classifier embeds
28
+ * them and picks the closest one to each input). At least 2 labels.
29
+ */
30
+ export interface JinaClassifyZeroShotRequest {
31
+ model: ClassifierEmbeddingModel;
32
+ input: ClassifierTextInput[];
33
+ labels: string[];
34
+ }
35
+ /**
36
+ * Few-shot classification request body for POST /v1/classify (with a
37
+ * pre-trained classifier_id obtained out-of-band — the plugin does NOT
38
+ * implement /v1/train; operators train via the Jina Playground or CLI and
39
+ * paste the ID into the plugin config).
40
+ */
41
+ export interface JinaClassifyFewShotRequest {
42
+ classifier_id: string;
43
+ input: ClassifierTextInput[];
44
+ }
45
+ export type JinaClassifyRequest = JinaClassifyZeroShotRequest | JinaClassifyFewShotRequest;
46
+ /**
47
+ * Normalized classification outcome as seen by the rest of the plugin.
48
+ * `label` is the picked class. `score` is the confidence in [0, 1] when
49
+ * Jina returns it; `null` when the field is not in the response (the
50
+ * defensive parser still produces a label in that case).
51
+ */
52
+ export interface ClassificationOutcome {
53
+ label: string;
54
+ score: number | null;
55
+ }
56
+ /**
57
+ * Reranker request body for POST /v1/rerank.
58
+ *
59
+ * We always send `return_documents: false`: the caller already holds the
60
+ * original documents (PgvectorResult[]) and only needs the new ordering. This
61
+ * saves a meaningful chunk of egress tokens on large payloads.
62
+ */
63
+ export interface JinaRerankRequest {
64
+ model: RerankerModel;
65
+ query: string;
66
+ documents: string[];
67
+ top_n?: number;
68
+ return_documents: false;
69
+ truncate?: boolean;
70
+ }
71
+ /**
72
+ * Single reranked result item — `index` references the original `documents`
73
+ * array position.
74
+ */
75
+ export interface RerankedItem {
76
+ index: number;
77
+ score: number;
78
+ }
@@ -0,0 +1,12 @@
1
+ // Jina AI API request/response types.
2
+ //
3
+ // Documented at:
4
+ // https://jina.ai/classifier/ (POST /v1/classify, zero-shot + few-shot)
5
+ // https://jina.ai/reranker/ (POST /v1/rerank)
6
+ //
7
+ // We deliberately keep the response shapes flexible (`unknown`-leaning) so the
8
+ // parser can stay defensive: Jina has changed field names between iterations
9
+ // (e.g. `predictions[]` vs `results[]`), and a brittle interface would mask
10
+ // silent breakage. Strong typing happens at the parser boundary.
11
+ export {};
12
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/jina/types.ts"],"names":[],"mappings":"AAAA,sCAAsC;AACtC,EAAE;AACF,iBAAiB;AACjB,4EAA4E;AAC5E,oDAAoD;AACpD,EAAE;AACF,+EAA+E;AAC/E,6EAA6E;AAC7E,4EAA4E;AAC5E,iEAAiE"}
@@ -1,3 +1,4 @@
1
+ import type { RerankerModel } from "./jina/types.js";
1
2
  import type { PgPoolLike, PgvectorResult } from "./types.js";
2
3
  /**
3
4
  * Search a single collection in `knowledge_vectors` using cosine similarity.
@@ -19,3 +20,31 @@ export declare function searchCollection(pool: PgPoolLike, collection: string, v
19
20
  * easily skip empty sections.
20
21
  */
21
22
  export declare function formatPgvectorResults(results: PgvectorResult[], maxChars: number): string | null;
23
+ export interface RerankPgvectorParams {
24
+ apiKey: string;
25
+ query: string;
26
+ model?: RerankerModel;
27
+ /** Cap on the number of results returned post-rerank. */
28
+ topN?: number;
29
+ timeoutMs?: number;
30
+ signal?: AbortSignal;
31
+ }
32
+ /**
33
+ * Re-order `results` using the Jina cross-encoder reranker.
34
+ *
35
+ * This is the precision stage on top of pgvector's recall: the cosine pass
36
+ * grabs ~20 coarse candidates, the reranker promotes the ones that actually
37
+ * match the user's intent.
38
+ *
39
+ * Contract:
40
+ * - On success, returns at most `topN` items in descending relevance order.
41
+ * The original cosine `score` is **preserved** on each item; the reranker
42
+ * produces its own score we expose via the log event, not in the
43
+ * PgvectorResult.
44
+ * - On any error (including auth / rate limit), throws a `JinaError`. The
45
+ * caller is responsible for falling back to the original cosine order —
46
+ * we do NOT swallow here because the caller wants to track the failure
47
+ * for the cooldown breaker.
48
+ * - On empty input, returns `[]` without hitting the network.
49
+ */
50
+ export declare function rerankPgvectorResults(results: PgvectorResult[], params: RerankPgvectorParams): Promise<PgvectorResult[]>;
package/dist/pgvector.js CHANGED
@@ -4,6 +4,12 @@
4
4
  // `halfvec(3072)` because pgvector's HNSW implementation caps at 2000 dims
5
5
  // for the native `vector` type. Both the column cast and the query parameter
6
6
  // cast must match, otherwise the planner falls back to a sequential scan.
7
+ //
8
+ // As of v3.2.0, results from `searchCollection` may optionally be re-ordered
9
+ // by a Jina cross-encoder reranker (see `rerankPgvectorResults`). The vector
10
+ // search remains the recall stage; the reranker is the precision stage.
11
+ import { rerank } from "./jina/reranker.js";
12
+ import { JinaError } from "./jina/errors.js";
7
13
  const SEARCH_SQL = `SELECT file_name, mime_type, text, file_id, source, owner,
8
14
  chunk_index, total_chunks, timestamp_start, timestamp_end,
9
15
  embedded_at,
@@ -78,4 +84,66 @@ export function formatPgvectorResults(results, maxChars) {
78
84
  }
79
85
  return output;
80
86
  }
87
+ /**
88
+ * Re-order `results` using the Jina cross-encoder reranker.
89
+ *
90
+ * This is the precision stage on top of pgvector's recall: the cosine pass
91
+ * grabs ~20 coarse candidates, the reranker promotes the ones that actually
92
+ * match the user's intent.
93
+ *
94
+ * Contract:
95
+ * - On success, returns at most `topN` items in descending relevance order.
96
+ * The original cosine `score` is **preserved** on each item; the reranker
97
+ * produces its own score we expose via the log event, not in the
98
+ * PgvectorResult.
99
+ * - On any error (including auth / rate limit), throws a `JinaError`. The
100
+ * caller is responsible for falling back to the original cosine order —
101
+ * we do NOT swallow here because the caller wants to track the failure
102
+ * for the cooldown breaker.
103
+ * - On empty input, returns `[]` without hitting the network.
104
+ */
105
+ export async function rerankPgvectorResults(results, params) {
106
+ if (results.length === 0)
107
+ return [];
108
+ // The reranker only sees the textual content. Empty/null texts cannot be
109
+ // ranked, so we filter them out BEFORE the API call to avoid wasting
110
+ // tokens on rows the cross-encoder can't score anyway.
111
+ const indexed = results
112
+ .map((r, i) => ({ row: r, originalIndex: i, text: r.text ?? "" }))
113
+ .filter((x) => x.text.trim().length > 0);
114
+ if (indexed.length === 0)
115
+ return results.slice(0, params.topN ?? results.length);
116
+ try {
117
+ const reranked = await rerank({
118
+ apiKey: params.apiKey,
119
+ query: params.query,
120
+ documents: indexed.map((x) => x.text),
121
+ model: params.model,
122
+ topN: params.topN,
123
+ timeoutMs: params.timeoutMs,
124
+ signal: params.signal,
125
+ });
126
+ if (reranked.length === 0) {
127
+ // Defensive: reranker returned no usable items. Surface the original
128
+ // cosine order rather than wiping the candidate list.
129
+ return results.slice(0, params.topN ?? results.length);
130
+ }
131
+ // Map back to PgvectorResult using the reranker's `index` (which points
132
+ // into our filtered `indexed` array, NOT the original `results` array).
133
+ const out = [];
134
+ for (const item of reranked) {
135
+ const found = indexed[item.index];
136
+ if (found)
137
+ out.push(found.row);
138
+ }
139
+ return out;
140
+ }
141
+ catch (err) {
142
+ // Re-throw only if it's a Jina error the caller will recognize. Any
143
+ // other failure (programmer error) propagates as-is.
144
+ if (err instanceof JinaError)
145
+ throw err;
146
+ throw err;
147
+ }
148
+ }
81
149
  //# sourceMappingURL=pgvector.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"pgvector.js","sourceRoot":"","sources":["../src/pgvector.ts"],"names":[],"mappings":"AAAA,wCAAwC;AACxC,EAAE;AACF,iEAAiE;AACjE,2EAA2E;AAC3E,6EAA6E;AAC7E,0EAA0E;AAI1E,MAAM,UAAU,GAAG;;;;;;;gBAOH,CAAC;AAEjB;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAgB,EAChB,UAAkB,EAClB,MAAgB,EAChB,IAAY,EACZ,cAAsB;IAEtB,MAAM,SAAS,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IAE1C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC,SAAS,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC;QAE3E,wEAAwE;QACxE,OAAO,MAAM,CAAC,IAAI;aACf,GAAG,CAAC,CAAC,GAAgB,EAAkB,EAAE,CAAC,CAAC;YAC1C,UAAU;YACV,KAAK,EAAE,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC;YAC5B,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,IAAI;YAChC,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,IAAI;YAChC,IAAI,EAAE,GAAG,CAAC,IAAI,IAAI,IAAI;YACtB,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,IAAI;YAC5B,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,IAAI;YAC1B,KAAK,EAAE,GAAG,CAAC,KAAK,IAAI,IAAI;YACxB,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;YACpC,YAAY,EAAE,GAAG,CAAC,YAAY,IAAI,IAAI;YACtC,eAAe,EAAE,GAAG,CAAC,eAAe,IAAI,IAAI;YAC5C,aAAa,EAAE,GAAG,CAAC,aAAa,IAAI,IAAI;SACzC,CAAC,CAAC;aACF,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,KAAK,IAAI,cAAc,CAAC,CAAC;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,qBAAqB,CACnC,OAAyB,EACzB,QAAgB;IAEhB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,KAAK,GAAa;YACtB,IAAI,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,SAAS,IAAI,SAAS,YAAY,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;SAC/E,CAAC;QAEF,IAAI,CAAC,CAAC,eAAe,EAAE,CAAC;YACtB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,eAAe,MAAM,CAAC,CAAC,aAAa,IAAI,EAAE,EAAE,CAAC,CAAC;QACzE,CAAC;QAED,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACX,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACnC,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,uCAAuC;QACvD,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE/B,IAAI,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,QAAQ;YAAE,MAAM;QACnD,MAAM,IAAI,KAAK,CAAC;IAClB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"pgvector.js","sourceRoot":"","sources":["../src/pgvector.ts"],"names":[],"mappings":"AAAA,wCAAwC;AACxC,EAAE;AACF,iEAAiE;AACjE,2EAA2E;AAC3E,6EAA6E;AAC7E,0EAA0E;AAC1E,EAAE;AACF,6EAA6E;AAC7E,6EAA6E;AAC7E,wEAAwE;AAExE,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAI7C,MAAM,UAAU,GAAG;;;;;;;gBAOH,CAAC;AAEjB;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAgB,EAChB,UAAkB,EAClB,MAAgB,EAChB,IAAY,EACZ,cAAsB;IAEtB,MAAM,SAAS,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IAE1C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC,SAAS,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC;QAE3E,wEAAwE;QACxE,OAAO,MAAM,CAAC,IAAI;aACf,GAAG,CAAC,CAAC,GAAgB,EAAkB,EAAE,CAAC,CAAC;YAC1C,UAAU;YACV,KAAK,EAAE,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC;YAC5B,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,IAAI;YAChC,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,IAAI;YAChC,IAAI,EAAE,GAAG,CAAC,IAAI,IAAI,IAAI;YACtB,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,IAAI;YAC5B,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,IAAI;YAC1B,KAAK,EAAE,GAAG,CAAC,KAAK,IAAI,IAAI;YACxB,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;YACpC,YAAY,EAAE,GAAG,CAAC,YAAY,IAAI,IAAI;YACtC,eAAe,EAAE,GAAG,CAAC,eAAe,IAAI,IAAI;YAC5C,aAAa,EAAE,GAAG,CAAC,aAAa,IAAI,IAAI;SACzC,CAAC,CAAC;aACF,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,KAAK,IAAI,cAAc,CAAC,CAAC;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,qBAAqB,CACnC,OAAyB,EACzB,QAAgB;IAEhB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,KAAK,GAAa;YACtB,IAAI,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,SAAS,IAAI,SAAS,YAAY,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;SAC/E,CAAC;QAEF,IAAI,CAAC,CAAC,eAAe,EAAE,CAAC;YACtB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,eAAe,MAAM,CAAC,CAAC,aAAa,IAAI,EAAE,EAAE,CAAC,CAAC;QACzE,CAAC;QAED,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACX,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACnC,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,uCAAuC;QACvD,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE/B,IAAI,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,QAAQ;YAAE,MAAM;QACnD,MAAM,IAAI,KAAK,CAAC;IAClB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAgBD;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAyB,EACzB,MAA4B;IAE5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,yEAAyE;IACzE,qEAAqE;IACrE,uDAAuD;IACvD,MAAM,OAAO,GAAG,OAAO;SACpB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC,CAAC;SACjE,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE3C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAEjF,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC;YAC5B,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,SAAS,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YACrC,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,qEAAqE;YACrE,sDAAsD;YACtD,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;QACzD,CAAC;QAED,wEAAwE;QACxE,wEAAwE;QACxE,MAAM,GAAG,GAAqB,EAAE,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,IAAI,KAAK;gBAAE,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACjC,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,oEAAoE;QACpE,qDAAqD;QACrD,IAAI,GAAG,YAAY,SAAS;YAAE,MAAM,GAAG,CAAC;QACxC,MAAM,GAAG,CAAC;IACZ,CAAC;AACH,CAAC"}
@@ -0,0 +1,29 @@
1
+ import type { Route, RouterReason } from "./types.js";
2
+ /**
3
+ * Triggers from the OpenClaw SDK that mean "not a user-initiated turn".
4
+ *
5
+ * Sourced from `PluginHookAgentContext.trigger` in the OpenClaw plugin SDK:
6
+ * `"user" | "heartbeat" | "cron" | "memory"`.
7
+ */
8
+ export declare const NON_USER_TRIGGERS: Set<string>;
9
+ export interface HeuristicInput {
10
+ /** The extracted user query (already trimmed and length-validated). */
11
+ query: string;
12
+ /** Trigger from `PluginHookAgentContext`. May be undefined on legacy SDKs. */
13
+ trigger?: string;
14
+ /** Whether the sender is the local CLI test harness (id: "cli"). */
15
+ isCli?: boolean;
16
+ }
17
+ export interface HeuristicVerdict {
18
+ route: Route | null;
19
+ reason: RouterReason;
20
+ }
21
+ /**
22
+ * Decide a route from cheap signals only. Returns `route: null` when the
23
+ * input is ambiguous and a classifier (or `ALL` fallback) should take over.
24
+ *
25
+ * The returned `reason` always identifies the rule that fired (or
26
+ * `"classifier_fallback"` if no rule did — yes that's reused, but a `null`
27
+ * route forces the caller to consult the classifier or fall back).
28
+ */
29
+ export declare function heuristicRoute(input: HeuristicInput): HeuristicVerdict;
@@ -0,0 +1,104 @@
1
+ // Zero-cost router heuristics.
2
+ //
3
+ // Runs BEFORE any Jina call. Three jobs:
4
+ //
5
+ // 1. **Deterministic skip on operational triggers.** When OpenClaw fires
6
+ // `before_prompt_build` with `ctx.trigger ∈ {heartbeat, cron, memory}`,
7
+ // we know the turn is not a real user question — skip retrieval
8
+ // unconditionally. This is the cheapest and most important gain:
9
+ // heartbeats fire continuously and were eating ~95% of the previous
10
+ // Jina quota.
11
+ //
12
+ // 2. **Meta-agent regex matches.** Questions like "what is your session
13
+ // id" or "combien d'agents ici" cannot be answered by the knowledge
14
+ // base, so we skip them deterministically too.
15
+ //
16
+ // 3. **Keyword fast-paths for common business questions.** A small set of
17
+ // regex hints lets the heuristic decide on its own (PGVECTOR_ONLY vs
18
+ // LIGHTRAG_ONLY) without paying for a Jina call. The router falls back
19
+ // to the classifier — or to `ALL` — when nothing matches.
20
+ //
21
+ // Everything in this module is pure (no I/O, no side effects) so the tests
22
+ // can exhaustively cover the matrix.
23
+ /**
24
+ * Triggers from the OpenClaw SDK that mean "not a user-initiated turn".
25
+ *
26
+ * Sourced from `PluginHookAgentContext.trigger` in the OpenClaw plugin SDK:
27
+ * `"user" | "heartbeat" | "cron" | "memory"`.
28
+ */
29
+ export const NON_USER_TRIGGERS = new Set(["heartbeat", "cron", "memory"]);
30
+ // ---------------------------------------------------------------------------
31
+ // Meta-agent questions — skip entirely
32
+ // ---------------------------------------------------------------------------
33
+ const META_PATTERNS = [
34
+ // Identifiant/Id de session, session id, runId, agent id
35
+ /\b(?:session\s*id|runid|run\s*id|identifiant\s+(?:de\s+)?session|sessions?\s*identifi(?:ant|cation))\b/i,
36
+ // Combien d'agents/subagents
37
+ /\bcombien\s+d['e\s]?\s*(?:sub-?)?agent/i,
38
+ // Self-introspection "qui es-tu", "what model are you", "who are you"
39
+ /\b(?:qui\s+es-tu|what\s+model\s+are\s+you|who\s+are\s+you|what\s+is\s+your\s+name|comment\s+t['e]appelles?-tu)\b/i,
40
+ // Trivial system pings — the WHOLE prompt must be a status/ping check.
41
+ // Anchored on `^` so business questions ending with the word "status"
42
+ // (e.g. "what is the ACME project status?") are NOT classified as meta.
43
+ // Same anchoring for the FR variants ("tu es là ?").
44
+ /^\s*(?:(?:system|the\s+system)\s+)?(?:status|ping|heartbeat)\s*[?!.]*\s*$/i,
45
+ /^\s*(?:are\s+you\s+(?:there|alive)|t['e]es?\s+(?:la|en\s+ligne))\s*[?!.]*\s*$/i,
46
+ ];
47
+ // ---------------------------------------------------------------------------
48
+ // CLI test guards — short trivial prompts coming from `id:"cli"`
49
+ // ---------------------------------------------------------------------------
50
+ const CLI_TRIVIAL_PATTERN = /^\s*(?:test|test\s+de\s+(?:bon\s+)?fonctionnement|ping|hello|hi|salut|coucou|ok|yes|no|oui|non)\W*\s*$/i;
51
+ // ---------------------------------------------------------------------------
52
+ // Keyword fast-paths
53
+ // ---------------------------------------------------------------------------
54
+ const PGVECTOR_KEYWORDS = [
55
+ /\bversion\b/i,
56
+ /\brelease\s+notes?\b/i,
57
+ /\bchangelog\b/i,
58
+ /\bsource\s+(?:document|file|pdf|markdown)\b/i,
59
+ /\b\w+\.(?:md|pdf|yaml|yml|json|ts|js|py|sh)\b/i, // file name with extension
60
+ ];
61
+ const LIGHTRAG_KEYWORDS = [
62
+ /\bcompare\w*\b/i,
63
+ /\baudit\b/i,
64
+ /\bsynth[éè]se\b/i,
65
+ /\brelations?\s+entre\b/i,
66
+ /\bqui\s+(?:travaille|collabore|coach|forme)\s+(?:avec|pour|chez)\b/i,
67
+ /\bdifferent\w*\s+(?:entre|de)\b/i,
68
+ ];
69
+ /**
70
+ * Decide a route from cheap signals only. Returns `route: null` when the
71
+ * input is ambiguous and a classifier (or `ALL` fallback) should take over.
72
+ *
73
+ * The returned `reason` always identifies the rule that fired (or
74
+ * `"classifier_fallback"` if no rule did — yes that's reused, but a `null`
75
+ * route forces the caller to consult the classifier or fall back).
76
+ */
77
+ export function heuristicRoute(input) {
78
+ const { query, trigger, isCli } = input;
79
+ // 1. Operational trigger → NEVER call any source.
80
+ if (trigger && NON_USER_TRIGGERS.has(trigger)) {
81
+ return { route: "NONE", reason: "heuristic_trigger" };
82
+ }
83
+ // 2. Meta-agent question → NEVER call any source.
84
+ for (const re of META_PATTERNS) {
85
+ if (re.test(query)) {
86
+ return { route: "NONE", reason: "heuristic_meta" };
87
+ }
88
+ }
89
+ // 3. CLI trivial prompt → NEVER call any source. Restricted to CLI to
90
+ // avoid blocking legitimate-but-short collaborator questions.
91
+ if (isCli && CLI_TRIVIAL_PATTERN.test(query)) {
92
+ return { route: "NONE", reason: "heuristic_short" };
93
+ }
94
+ // 4. Keyword fast-paths.
95
+ if (PGVECTOR_KEYWORDS.some((re) => re.test(query))) {
96
+ return { route: "PGVECTOR_ONLY", reason: "heuristic_keyword" };
97
+ }
98
+ if (LIGHTRAG_KEYWORDS.some((re) => re.test(query))) {
99
+ return { route: "LIGHTRAG_ONLY", reason: "heuristic_keyword" };
100
+ }
101
+ // 5. Nothing fired — defer to the classifier (or fallback to ALL).
102
+ return { route: null, reason: "classifier_fallback" };
103
+ }
104
+ //# sourceMappingURL=heuristic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"heuristic.js","sourceRoot":"","sources":["../../src/router/heuristic.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,yCAAyC;AACzC,EAAE;AACF,2EAA2E;AAC3E,6EAA6E;AAC7E,qEAAqE;AACrE,sEAAsE;AACtE,yEAAyE;AACzE,mBAAmB;AACnB,EAAE;AACF,0EAA0E;AAC1E,yEAAyE;AACzE,oDAAoD;AACpD,EAAE;AACF,4EAA4E;AAC5E,0EAA0E;AAC1E,4EAA4E;AAC5E,+DAA+D;AAC/D,EAAE;AACF,2EAA2E;AAC3E,qCAAqC;AAIrC;;;;;GAKG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE1E,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,aAAa,GAAa;IAC9B,yDAAyD;IACzD,yGAAyG;IACzG,6BAA6B;IAC7B,yCAAyC;IACzC,sEAAsE;IACtE,mHAAmH;IACnH,uEAAuE;IACvE,sEAAsE;IACtE,wEAAwE;IACxE,qDAAqD;IACrD,4EAA4E;IAC5E,gFAAgF;CACjF,CAAC;AAEF,8EAA8E;AAC9E,iEAAiE;AACjE,8EAA8E;AAE9E,MAAM,mBAAmB,GACvB,yGAAyG,CAAC;AAE5G,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,MAAM,iBAAiB,GAAa;IAClC,cAAc;IACd,uBAAuB;IACvB,gBAAgB;IAChB,8CAA8C;IAC9C,gDAAgD,EAAE,2BAA2B;CAC9E,CAAC;AAEF,MAAM,iBAAiB,GAAa;IAClC,iBAAiB;IACjB,YAAY;IACZ,kBAAkB;IAClB,yBAAyB;IACzB,qEAAqE;IACrE,kCAAkC;CACnC,CAAC;AAoBF;;;;;;;GAOG;AACH,MAAM,UAAU,cAAc,CAAC,KAAqB;IAClD,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC;IAExC,kDAAkD;IAClD,IAAI,OAAO,IAAI,iBAAiB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QAC9C,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,EAAE,CAAC;IACxD,CAAC;IAED,kDAAkD;IAClD,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;QAC/B,IAAI,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACnB,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;QACrD,CAAC;IACH,CAAC;IAED,sEAAsE;IACtE,iEAAiE;IACjE,IAAI,KAAK,IAAI,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7C,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,iBAAiB,EAAE,CAAC;IACtD,CAAC;IAED,yBAAyB;IACzB,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;QACnD,OAAO,EAAE,KAAK,EAAE,eAAe,EAAE,MAAM,EAAE,mBAAmB,EAAE,CAAC;IACjE,CAAC;IACD,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;QACnD,OAAO,EAAE,KAAK,EAAE,eAAe,EAAE,MAAM,EAAE,mBAAmB,EAAE,CAAC;IACjE,CAAC;IAED,mEAAmE;IACnE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC;AACxD,CAAC"}
@@ -0,0 +1,33 @@
1
+ import type { RouterDecision } from "./types.js";
2
+ export interface RouterConfig {
3
+ /** Master switch. When false, every call returns `{route: "ALL"}`. */
4
+ enabled: boolean;
5
+ /** Which engine fills the gap when heuristics are ambiguous. */
6
+ mode: "heuristic" | "jina-classifier";
7
+ /** Jina API key — required when mode === "jina-classifier". */
8
+ jinaApiKey: string;
9
+ /**
10
+ * Optional few-shot classifier ID. When provided, the router calls
11
+ * `/v1/classify` with this ID instead of running zero-shot.
12
+ */
13
+ classifierId?: string;
14
+ /** Labels for zero-shot classification. Defaults to {@link DEFAULT_ROUTER_LABELS}. */
15
+ labels?: readonly string[];
16
+ /** Triggers that bypass retrieval (subset of NON_USER_TRIGGERS). */
17
+ skipTriggers?: readonly string[];
18
+ }
19
+ export interface RouterRuntimeContext {
20
+ query: string;
21
+ trigger?: string;
22
+ /** Whether the sender is the local CLI test harness. */
23
+ isCli?: boolean;
24
+ /** Optional AbortSignal propagated to the classifier HTTP call. */
25
+ signal?: AbortSignal;
26
+ }
27
+ /**
28
+ * Decide the route for one user turn.
29
+ *
30
+ * Returns a {@link RouterDecision}. The plugin caller logs `reason` and
31
+ * uses `route` to gate source calls.
32
+ */
33
+ export declare function decideRoute(cfg: RouterConfig, ctx: RouterRuntimeContext): Promise<RouterDecision>;
@@ -0,0 +1,94 @@
1
+ // Router orchestrator: heuristic → classifier (optional) → fallback.
2
+ //
3
+ // Public entry point is `decideRoute(...)`. It produces a `RouterDecision`
4
+ // the hook handler consumes to gate calls to pgvector / LightRAG.
5
+ //
6
+ // Design contract (fail-open):
7
+ // - Any error in the classifier MUST yield `ALL` so retrieval keeps
8
+ // working. The router never blocks the agent.
9
+ // - The classifier is only called when the heuristic returns `null`
10
+ // (ambiguous input). Heuristic hits are deterministic and free.
11
+ // - Classifier results that don't map to a known label fall back to
12
+ // `ALL` with reason `"classifier_fallback"`.
13
+ import { classifyFewShot, classifyZeroShot, } from "../jina/classifier.js";
14
+ import { JinaError } from "../jina/errors.js";
15
+ import { DEFAULT_ROUTER_LABELS, ROUTER_LABEL_NAMES, extractRouteFromLabel, } from "./labels.js";
16
+ import { heuristicRoute } from "./heuristic.js";
17
+ const FALLBACK = "ALL";
18
+ /**
19
+ * Decide the route for one user turn.
20
+ *
21
+ * Returns a {@link RouterDecision}. The plugin caller logs `reason` and
22
+ * uses `route` to gate source calls.
23
+ */
24
+ export async function decideRoute(cfg, ctx) {
25
+ // 0. Disabled → preserve legacy behavior.
26
+ if (!cfg.enabled) {
27
+ return { route: "ALL", reason: "router_disabled", score: null };
28
+ }
29
+ // 1. Heuristic pass — cheap and deterministic.
30
+ const verdict = heuristicRoute({
31
+ query: ctx.query,
32
+ trigger: ctx.trigger,
33
+ isCli: ctx.isCli,
34
+ });
35
+ if (verdict.route !== null) {
36
+ return { route: verdict.route, reason: verdict.reason, score: null };
37
+ }
38
+ // 2. Classifier pass — only when heuristics were ambiguous.
39
+ if (cfg.mode === "heuristic") {
40
+ // Operator asked for heuristic-only routing; ambiguous → ALL.
41
+ return { route: FALLBACK, reason: "classifier_fallback", score: null };
42
+ }
43
+ if (!cfg.jinaApiKey) {
44
+ // Misconfiguration safety net — never crash, just fall back.
45
+ return { route: FALLBACK, reason: "classifier_fallback", score: null };
46
+ }
47
+ try {
48
+ const outcome = cfg.classifierId
49
+ ? await classifyFewShot({
50
+ apiKey: cfg.jinaApiKey,
51
+ text: ctx.query,
52
+ classifierId: cfg.classifierId,
53
+ expectedLabels: ROUTER_LABEL_NAMES,
54
+ signal: ctx.signal,
55
+ })
56
+ : await classifyZeroShot({
57
+ apiKey: cfg.jinaApiKey,
58
+ text: ctx.query,
59
+ labels: (cfg.labels ?? DEFAULT_ROUTER_LABELS),
60
+ signal: ctx.signal,
61
+ });
62
+ if (!outcome) {
63
+ return { route: FALLBACK, reason: "classifier_fallback", score: null };
64
+ }
65
+ const routeName =
66
+ // Few-shot classifiers return the raw label name as trained; zero-shot
67
+ // returns the full descriptive label — strip the colon prefix.
68
+ cfg.classifierId
69
+ ? outcome.label
70
+ : (extractRouteFromLabel(outcome.label) ?? outcome.label);
71
+ if (!isKnownRoute(routeName)) {
72
+ return { route: FALLBACK, reason: "classifier_fallback", score: outcome.score };
73
+ }
74
+ return {
75
+ route: routeName,
76
+ reason: "classifier_hit",
77
+ score: outcome.score,
78
+ };
79
+ }
80
+ catch (err) {
81
+ // Re-throw non-Jina errors (programmer errors, type mismatches). Jina
82
+ // failures fail open.
83
+ if (!(err instanceof JinaError))
84
+ throw err;
85
+ return { route: FALLBACK, reason: "classifier_error", score: null };
86
+ }
87
+ }
88
+ function isKnownRoute(value) {
89
+ return (value === "NONE" ||
90
+ value === "PGVECTOR_ONLY" ||
91
+ value === "LIGHTRAG_ONLY" ||
92
+ value === "ALL");
93
+ }
94
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/router/index.ts"],"names":[],"mappings":"AAAA,qEAAqE;AACrE,EAAE;AACF,2EAA2E;AAC3E,kEAAkE;AAClE,EAAE;AACF,+BAA+B;AAC/B,sEAAsE;AACtE,kDAAkD;AAClD,sEAAsE;AACtE,oEAAoE;AACpE,sEAAsE;AACtE,iDAAiD;AAEjD,OAAO,EACL,eAAe,EACf,gBAAgB,GACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EACL,qBAAqB,EACrB,kBAAkB,EAClB,qBAAqB,GACtB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AA8BhD,MAAM,QAAQ,GAAU,KAAK,CAAC;AAE9B;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,GAAiB,EACjB,GAAyB;IAEzB,0CAA0C;IAC1C,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC;QACjB,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,iBAAiB,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IAClE,CAAC;IAED,+CAA+C;IAC/C,MAAM,OAAO,GAAG,cAAc,CAAC;QAC7B,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,KAAK,EAAE,GAAG,CAAC,KAAK;KACjB,CAAC,CAAC;IACH,IAAI,OAAO,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;QAC3B,OAAO,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IACvE,CAAC;IAED,4DAA4D;IAC5D,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;QAC7B,8DAA8D;QAC9D,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,qBAAqB,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IACzE,CAAC;IAED,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC;QACpB,6DAA6D;QAC7D,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,qBAAqB,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IACzE,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,GAAG,CAAC,YAAY;YAC9B,CAAC,CAAC,MAAM,eAAe,CAAC;gBACpB,MAAM,EAAE,GAAG,CAAC,UAAU;gBACtB,IAAI,EAAE,GAAG,CAAC,KAAK;gBACf,YAAY,EAAE,GAAG,CAAC,YAAY;gBAC9B,cAAc,EAAE,kBAA8B;gBAC9C,MAAM,EAAE,GAAG,CAAC,MAAM;aACnB,CAAC;YACJ,CAAC,CAAC,MAAM,gBAAgB,CAAC;gBACrB,MAAM,EAAE,GAAG,CAAC,UAAU;gBACtB,IAAI,EAAE,GAAG,CAAC,KAAK;gBACf,MAAM,EAAE,CAAC,GAAG,CAAC,MAAM,IAAI,qBAAqB,CAAa;gBACzD,MAAM,EAAE,GAAG,CAAC,MAAM;aACnB,CAAC,CAAC;QAEP,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,qBAAqB,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;QACzE,CAAC;QAED,MAAM,SAAS;QACb,uEAAuE;QACvE,+DAA+D;QAC/D,GAAG,CAAC,YAAY;YACd,CAAC,CAAC,OAAO,CAAC,KAAK;YACf,CAAC,CAAC,CAAC,qBAAqB,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC;QAE9D,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7B,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,qBAAqB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;QAClF,CAAC;QAED,OAAO;YACL,KAAK,EAAE,SAAS;YAChB,MAAM,EAAE,gBAAgB;YACxB,KAAK,EAAE,OAAO,CAAC,KAAK;SACrB,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,sEAAsE;QACtE,sBAAsB;QACtB,IAAI,CAAC,CAAC,GAAG,YAAY,SAAS,CAAC;YAAE,MAAM,GAAG,CAAC;QAC3C,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,kBAAkB,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IACtE,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CAAC,KAAa;IACjC,OAAO,CACL,KAAK,KAAK,MAAM;QAChB,KAAK,KAAK,eAAe;QACzB,KAAK,KAAK,eAAe;QACzB,KAAK,KAAK,KAAK,CAChB,CAAC;AACJ,CAAC"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Public constants — MUST stay in sync with `Route` in `types.js`.
3
+ *
4
+ * The label prefix passed to Jina (zero-shot) and the canonical name a
5
+ * few-shot classifier MUST be trained against share the same literal
6
+ * values as the `Route` union ("NONE" | "PGVECTOR_ONLY" | "LIGHTRAG_ONLY"
7
+ * | "ALL"). If they diverged, the classifier path would always fall back
8
+ * to "ALL" because `isKnownRoute` would reject the predicted label.
9
+ */
10
+ export declare const ROUTE_NONE = "NONE";
11
+ export declare const ROUTE_PGVECTOR_ONLY = "PGVECTOR_ONLY";
12
+ export declare const ROUTE_LIGHTRAG_ONLY = "LIGHTRAG_ONLY";
13
+ export declare const ROUTE_ALL = "ALL";
14
+ /**
15
+ * Default labels handed to Jina `/v1/classify` in zero-shot mode when the
16
+ * operator does not supply a few-shot `classifierId`.
17
+ *
18
+ * Order does not matter for correctness, but we keep `NONE` first by
19
+ * convention so test output is stable.
20
+ */
21
+ export declare const DEFAULT_ROUTER_LABELS: readonly string[];
22
+ /**
23
+ * The label names that the classifier may legitimately return.
24
+ * Used by the defensive parser to refuse hallucinated classes.
25
+ */
26
+ export declare const ROUTER_LABEL_NAMES: readonly string[];
27
+ /**
28
+ * Extract the route name from a full label string (the part before the
29
+ * colon). Returns `null` when the label is malformed.
30
+ *
31
+ * @internal exported for unit testing
32
+ */
33
+ export declare function extractRouteFromLabel(label: string): string | null;
@@ -0,0 +1,67 @@
1
+ // Default zero-shot labels for the Jina Classifier.
2
+ //
3
+ // The labels are intentionally bilingual: most prompts in the observed
4
+ // traces are French (Ataraxis is a French-speaking deployment) but the
5
+ // system prompts and agent meta-questions are often English. Jina
6
+ // `jina-embeddings-v3` handles both languages natively.
7
+ //
8
+ // Each label is a short sentence describing the kind of question that
9
+ // belongs to that route. Jina embeds both the label and the input, then
10
+ // picks the closest one — so the more discriminative the labels, the
11
+ // better the routing. Empirically, "describe the kind of question"
12
+ // outperforms "give a category name" by ~25% on small samples
13
+ // (https://jina.ai/news/rephrased-labels-improve-zero-shot-text-classification-30/).
14
+ /**
15
+ * Public constants — MUST stay in sync with `Route` in `types.js`.
16
+ *
17
+ * The label prefix passed to Jina (zero-shot) and the canonical name a
18
+ * few-shot classifier MUST be trained against share the same literal
19
+ * values as the `Route` union ("NONE" | "PGVECTOR_ONLY" | "LIGHTRAG_ONLY"
20
+ * | "ALL"). If they diverged, the classifier path would always fall back
21
+ * to "ALL" because `isKnownRoute` would reject the predicted label.
22
+ */
23
+ export const ROUTE_NONE = "NONE";
24
+ export const ROUTE_PGVECTOR_ONLY = "PGVECTOR_ONLY";
25
+ export const ROUTE_LIGHTRAG_ONLY = "LIGHTRAG_ONLY";
26
+ export const ROUTE_ALL = "ALL";
27
+ /**
28
+ * Default labels handed to Jina `/v1/classify` in zero-shot mode when the
29
+ * operator does not supply a few-shot `classifierId`.
30
+ *
31
+ * Order does not matter for correctness, but we keep `NONE` first by
32
+ * convention so test output is stable.
33
+ */
34
+ export const DEFAULT_ROUTER_LABELS = [
35
+ // NONE — agent meta, smalltalk, test pings
36
+ `${ROUTE_NONE}: meta-question about the agent itself, session identifier, system test, simple greeting, weather, or trivial smalltalk that does not depend on the knowledge base`,
37
+ // PGVECTOR_ONLY — single-document factual lookup
38
+ `${ROUTE_PGVECTOR_ONLY}: factual lookup that can be answered by a single document excerpt — version numbers, file names, dates, configuration values, raw quotes`,
39
+ // LIGHTRAG_ONLY — entity-relation traversal
40
+ `${ROUTE_LIGHTRAG_ONLY}: knowledge graph question about entities and their relationships — which client, which coach, which mission, which programme links to which livrable`,
41
+ // ALL — broad / synthesizing / unclear
42
+ `${ROUTE_ALL}: broad synthesis, multi-hop reasoning, audit, comparison, or any question whose scope is unclear and benefits from both vector search and knowledge graph context`,
43
+ ];
44
+ /**
45
+ * The label names that the classifier may legitimately return.
46
+ * Used by the defensive parser to refuse hallucinated classes.
47
+ */
48
+ export const ROUTER_LABEL_NAMES = [
49
+ ROUTE_NONE,
50
+ ROUTE_PGVECTOR_ONLY,
51
+ ROUTE_LIGHTRAG_ONLY,
52
+ ROUTE_ALL,
53
+ ];
54
+ /**
55
+ * Extract the route name from a full label string (the part before the
56
+ * colon). Returns `null` when the label is malformed.
57
+ *
58
+ * @internal exported for unit testing
59
+ */
60
+ export function extractRouteFromLabel(label) {
61
+ const colonIndex = label.indexOf(":");
62
+ if (colonIndex <= 0)
63
+ return null;
64
+ const name = label.slice(0, colonIndex).trim();
65
+ return name.length > 0 ? name : null;
66
+ }
67
+ //# sourceMappingURL=labels.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"labels.js","sourceRoot":"","sources":["../../src/router/labels.ts"],"names":[],"mappings":"AAAA,oDAAoD;AACpD,EAAE;AACF,uEAAuE;AACvE,uEAAuE;AACvE,kEAAkE;AAClE,wDAAwD;AACxD,EAAE;AACF,sEAAsE;AACtE,wEAAwE;AACxE,qEAAqE;AACrE,mEAAmE;AACnE,8DAA8D;AAC9D,qFAAqF;AAErF;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,UAAU,GAAG,MAAM,CAAC;AACjC,MAAM,CAAC,MAAM,mBAAmB,GAAG,eAAe,CAAC;AACnD,MAAM,CAAC,MAAM,mBAAmB,GAAG,eAAe,CAAC;AACnD,MAAM,CAAC,MAAM,SAAS,GAAG,KAAK,CAAC;AAE/B;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAsB;IACtD,2CAA2C;IAC3C,GAAG,UAAU,oKAAoK;IAEjL,iDAAiD;IACjD,GAAG,mBAAmB,2IAA2I;IAEjK,4CAA4C;IAC5C,GAAG,mBAAmB,uJAAuJ;IAE7K,uCAAuC;IACvC,GAAG,SAAS,oKAAoK;CACjL,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAsB;IACnD,UAAU;IACV,mBAAmB;IACnB,mBAAmB;IACnB,SAAS;CACV,CAAC;AAEF;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAa;IACjD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACtC,IAAI,UAAU,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACjC,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/C,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;AACvC,CAAC"}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * The four mutually exclusive routes the router can pick.
3
+ *
4
+ * - `NONE` — skip every source. Used for heartbeats, cron,
5
+ * memory triggers, agent meta-questions, system
6
+ * tests. Saves the cost of an irrelevant retrieval.
7
+ * - `PGVECTOR_ONLY` — vector search only (no graph). Cheap factual
8
+ * lookups: file names, version numbers, simple
9
+ * excerpts.
10
+ * - `LIGHTRAG_ONLY` — knowledge graph only (no vectors). Multi-hop
11
+ * reasoning, entity-relationship queries.
12
+ * - `ALL` — both sources in parallel. The current default
13
+ * behavior; preserved when the router is disabled.
14
+ */
15
+ export type Route = "NONE" | "PGVECTOR_ONLY" | "LIGHTRAG_ONLY" | "ALL";
16
+ /** Source of a router decision — used in logs to trace the reasoning path. */
17
+ export type RouterReason = "router_disabled" | "heuristic_trigger" | "heuristic_meta" | "heuristic_short" | "heuristic_keyword" | "classifier_hit" | "classifier_fallback" | "classifier_error";
18
+ export interface RouterDecision {
19
+ route: Route;
20
+ reason: RouterReason;
21
+ /** Confidence in [0, 1] when the classifier produced a score, else null. */
22
+ score: number | null;
23
+ }
@@ -0,0 +1,7 @@
1
+ // Router types — shared by heuristic + classifier paths.
2
+ //
3
+ // The router decides which knowledge sources should be queried for a given
4
+ // user turn. The decision is consumed by the `before_prompt_build` handler
5
+ // to gate calls to pgvector and LightRAG.
6
+ export {};
7
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/router/types.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,EAAE;AACF,2EAA2E;AAC3E,2EAA2E;AAC3E,0CAA0C"}