@lacneu/openclaw-knowledge 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +264 -1
  2. package/README.md +131 -0
  3. package/dist/config.d.ts +4 -0
  4. package/dist/config.js +26 -0
  5. package/dist/config.js.map +1 -1
  6. package/dist/index.d.ts +25 -4
  7. package/dist/index.js +295 -46
  8. package/dist/index.js.map +1 -1
  9. package/dist/jina/classifier.d.ts +55 -0
  10. package/dist/jina/classifier.js +170 -0
  11. package/dist/jina/classifier.js.map +1 -0
  12. package/dist/jina/client.d.ts +30 -0
  13. package/dist/jina/client.js +131 -0
  14. package/dist/jina/client.js.map +1 -0
  15. package/dist/jina/errors.d.ts +42 -0
  16. package/dist/jina/errors.js +113 -0
  17. package/dist/jina/errors.js.map +1 -0
  18. package/dist/jina/reranker.d.ts +34 -0
  19. package/dist/jina/reranker.js +95 -0
  20. package/dist/jina/reranker.js.map +1 -0
  21. package/dist/jina/types.d.ts +78 -0
  22. package/dist/jina/types.js +12 -0
  23. package/dist/jina/types.js.map +1 -0
  24. package/dist/pgvector.d.ts +29 -0
  25. package/dist/pgvector.js +68 -0
  26. package/dist/pgvector.js.map +1 -1
  27. package/dist/router/heuristic.d.ts +29 -0
  28. package/dist/router/heuristic.js +104 -0
  29. package/dist/router/heuristic.js.map +1 -0
  30. package/dist/router/index.d.ts +33 -0
  31. package/dist/router/index.js +94 -0
  32. package/dist/router/index.js.map +1 -0
  33. package/dist/router/labels.d.ts +33 -0
  34. package/dist/router/labels.js +67 -0
  35. package/dist/router/labels.js.map +1 -0
  36. package/dist/router/types.d.ts +23 -0
  37. package/dist/router/types.js +7 -0
  38. package/dist/router/types.js.map +1 -0
  39. package/dist/tracing/events.d.ts +83 -0
  40. package/dist/tracing/events.js +86 -0
  41. package/dist/tracing/events.js.map +1 -0
  42. package/dist/types.d.ts +57 -0
  43. package/openclaw.plugin.json +97 -4
  44. package/package.json +3 -3
@@ -0,0 +1,83 @@
1
+ import type { Route, RouterReason } from "../router/types.js";
2
+ /**
3
+ * Minimal logger surface used by this module. Matches the relevant subset of
4
+ * `PluginLogger` from the OpenClaw SDK so it can be unit-tested without
5
+ * importing the full SDK type graph.
6
+ */
7
+ export interface TracingLogger {
8
+ info: (message: string) => void;
9
+ debug?: (message: string) => void;
10
+ }
11
+ /**
12
+ * Marker prefix for every structured line emitted by this module. Pick a
13
+ * value that is unlikely to clash with other plugins and stable across
14
+ * versions — log scrapers and Opik rules depend on it.
15
+ */
16
+ export declare const EVENT_PREFIX = "[knowledge.event]";
17
+ export interface RouterEvent {
18
+ type: "router";
19
+ route: Route;
20
+ reason: RouterReason;
21
+ score: number | null;
22
+ queryLength: number;
23
+ trigger?: string;
24
+ }
25
+ export interface PgvectorEvent {
26
+ type: "pgvector";
27
+ collections: string[];
28
+ rawCount: number;
29
+ rerankedCount: number | null;
30
+ topScore: number | null;
31
+ durationMs: number;
32
+ }
33
+ export interface LightRAGEvent {
34
+ type: "lightrag";
35
+ mode: string;
36
+ contextChars: number;
37
+ truncatedChars: number;
38
+ durationMs: number;
39
+ }
40
+ export interface JinaUsageEvent {
41
+ type: "jina";
42
+ endpoint: "classify" | "rerank";
43
+ model: string;
44
+ durationMs: number;
45
+ inputCount: number;
46
+ }
47
+ export interface CooldownEvent {
48
+ type: "cooldown";
49
+ scope: "global" | "router" | "pgvector_reranker";
50
+ consecutiveErrors: number;
51
+ }
52
+ export type KnowledgeEvent = RouterEvent | PgvectorEvent | LightRAGEvent | JinaUsageEvent | CooldownEvent;
53
+ /**
54
+ * Emit a structured event line through `logger.info`.
55
+ *
56
+ * Never throws. If JSON serialization or the logger itself fails (e.g.
57
+ * upstream broke the contract), we silently swallow — the plugin must keep
58
+ * working even if tracing breaks.
59
+ */
60
+ export declare function emitEvent(logger: TracingLogger, event: KnowledgeEvent): void;
61
+ /**
62
+ * Optional debug-level emission of turn metadata for correlation.
63
+ *
64
+ * What goes into the log line:
65
+ * - `runId`: the OpenClaw SDK's runId for this agent turn (or
66
+ * `"unknown"` when the SDK did not supply one). This is the
67
+ * ONLY correlation key we expose — it is non-query-derived
68
+ * by construction, so it cannot be dictionary-recovered
69
+ * from the log line.
70
+ * - `qlen`: character length of the query (a count, not content).
71
+ *
72
+ * What does NOT go in: any portion of the query text, AND no hash of it.
73
+ * An earlier iteration of this plugin emitted `SHA-256(query)` truncated
74
+ * to 12 hex chars under the assumption it was "non-reversible". Code
75
+ * review (2026-05-23) correctly pointed out that for short or low-entropy
76
+ * prompts (the hook accepts ≥ 3 chars), the hash is dictionary-recoverable
77
+ * offline. We removed the hash entirely and rely on `runId` instead.
78
+ *
79
+ * Operators who want CONTENT correlation across turns must instrument
80
+ * Opik / LangFuse at the SDK layer with their own keyed scheme (HMAC
81
+ * with a deployment secret); the plugin will not do it for them.
82
+ */
83
+ export declare function emitTurnMetadata(logger: TracingLogger, runId: string | undefined, queryLength: number): void;
@@ -0,0 +1,86 @@
1
+ // Structured event emission for downstream observability tools.
2
+ //
3
+ // The plugin already runs inside an OpenClaw deployment that includes Opik
4
+ // (https://www.comet.com/docs/opik/) for tracing — but the plugin itself
5
+ // MUST NOT depend on the Opik SDK directly. Two reasons:
6
+ //
7
+ // 1. Deps. The plugin proudly ships with a single runtime dep (`pg`).
8
+ // Adding `opik` would force every consumer to install it.
9
+ // 2. Coupling. Operators may swap Opik for LangFuse or pure OTLP. The
10
+ // plugin should not care.
11
+ //
12
+ // Solution: emit structured JSON lines through OpenClaw's logger. The
13
+ // upstream gateway already forwards `logger.info(...)` to Opik (when
14
+ // configured) and to stdout in any case. A grep-friendly prefix
15
+ // (`[knowledge.event]`) lets a downstream scraper or Opik rule pick the
16
+ // records out without ambiguity.
17
+ //
18
+ // Privacy invariant: NO event in this module ever logs the raw user
19
+ // query, query excerpts, retrieved chunk content, OR ANY HASH OF THEM.
20
+ // We log metadata only (lengths, scores, counts, durations) plus the
21
+ // `runId` provided by the OpenClaw SDK when turn-level correlation is
22
+ // needed. The runId is non-query-derived by construction, so it cannot
23
+ // be reversed offline against a dictionary of likely prompts.
24
+ //
25
+ // The events module is intentionally tiny and synchronous — emitting a log
26
+ // line must NEVER throw, NEVER consume noticeable CPU, and NEVER hold the
27
+ // agent turn open.
28
+ /**
29
+ * Marker prefix for every structured line emitted by this module. Pick a
30
+ * value that is unlikely to clash with other plugins and stable across
31
+ * versions — log scrapers and Opik rules depend on it.
32
+ */
33
+ export const EVENT_PREFIX = "[knowledge.event]";
34
+ // ---------------------------------------------------------------------------
35
+ // Emitters
36
+ // ---------------------------------------------------------------------------
37
+ /**
38
+ * Emit a structured event line through `logger.info`.
39
+ *
40
+ * Never throws. If JSON serialization or the logger itself fails (e.g.
41
+ * upstream broke the contract), we silently swallow — the plugin must keep
42
+ * working even if tracing breaks.
43
+ */
44
+ export function emitEvent(logger, event) {
45
+ try {
46
+ const payload = JSON.stringify(event);
47
+ logger.info(`${EVENT_PREFIX} ${payload}`);
48
+ }
49
+ catch {
50
+ // intentional swallow — tracing must never crash the plugin
51
+ }
52
+ }
53
+ /**
54
+ * Optional debug-level emission of turn metadata for correlation.
55
+ *
56
+ * What goes into the log line:
57
+ * - `runId`: the OpenClaw SDK's runId for this agent turn (or
58
+ * `"unknown"` when the SDK did not supply one). This is the
59
+ * ONLY correlation key we expose — it is non-query-derived
60
+ * by construction, so it cannot be dictionary-recovered
61
+ * from the log line.
62
+ * - `qlen`: character length of the query (a count, not content).
63
+ *
64
+ * What does NOT go in: any portion of the query text, AND no hash of it.
65
+ * An earlier iteration of this plugin emitted `SHA-256(query)` truncated
66
+ * to 12 hex chars under the assumption it was "non-reversible". Code
67
+ * review (2026-05-23) correctly pointed out that for short or low-entropy
68
+ * prompts (the hook accepts ≥ 3 chars), the hash is dictionary-recoverable
69
+ * offline. We removed the hash entirely and rely on `runId` instead.
70
+ *
71
+ * Operators who want CONTENT correlation across turns must instrument
72
+ * Opik / LangFuse at the SDK layer with their own keyed scheme (HMAC
73
+ * with a deployment secret); the plugin will not do it for them.
74
+ */
75
+ export function emitTurnMetadata(logger, runId, queryLength) {
76
+ if (!logger.debug)
77
+ return;
78
+ try {
79
+ const id = runId && runId.length > 0 ? runId : "unknown";
80
+ logger.debug(`${EVENT_PREFIX} turn.metadata runId=${id} qlen=${queryLength}`);
81
+ }
82
+ catch {
83
+ // swallow — tracing must never crash the plugin
84
+ }
85
+ }
86
+ //# sourceMappingURL=events.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"events.js","sourceRoot":"","sources":["../../src/tracing/events.ts"],"names":[],"mappings":"AAAA,gEAAgE;AAChE,EAAE;AACF,2EAA2E;AAC3E,yEAAyE;AACzE,yDAAyD;AACzD,EAAE;AACF,wEAAwE;AACxE,+DAA+D;AAC/D,wEAAwE;AACxE,+BAA+B;AAC/B,EAAE;AACF,sEAAsE;AACtE,qEAAqE;AACrE,gEAAgE;AAChE,wEAAwE;AACxE,iCAAiC;AACjC,EAAE;AACF,oEAAoE;AACpE,uEAAuE;AACvE,qEAAqE;AACrE,sEAAsE;AACtE,uEAAuE;AACvE,8DAA8D;AAC9D,EAAE;AACF,2EAA2E;AAC3E,0EAA0E;AAC1E,mBAAmB;AAcnB;;;;GAIG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,mBAAmB,CAAC;AAqDhD,8EAA8E;AAC9E,WAAW;AACX,8EAA8E;AAE9E;;;;;;GAMG;AACH,MAAM,UAAU,SAAS,CAAC,MAAqB,EAAE,KAAqB;IACpE,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,GAAG,YAAY,IAAI,OAAO,EAAE,CAAC,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,4DAA4D;IAC9D,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,UAAU,gBAAgB,CAC9B,MAAqB,EACrB,KAAyB,EACzB,WAAmB;IAEnB,IAAI,CAAC,MAAM,CAAC,KAAK;QAAE,OAAO;IAC1B,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;QACzD,MAAM,CAAC,KAAK,CAAC,GAAG,YAAY,wBAAwB,EAAE,SAAS,WAAW,EAAE,CAAC,CAAC;IAChF,CAAC;IAAC,MAAM,CAAC;QACP,gDAAgD;IAClD,CAAC;AACH,CAAC"}
package/dist/types.d.ts CHANGED
@@ -1,3 +1,25 @@
1
+ import type { RerankerModel } from "./jina/types.js";
2
+ /**
3
+ * Subset of `PluginHookAgentContext` from the OpenClaw plugin SDK that this
4
+ * plugin actually consumes. Declared locally to keep the test suite free of
5
+ * SDK runtime imports.
6
+ *
7
+ * Fields beyond this subset (workspaceDir, modelProviderId, ...) are
8
+ * deliberately omitted — the handler does not depend on them.
9
+ *
10
+ * @see https://github.com/openclaw/openclaw plugin-sdk types.d.ts
11
+ */
12
+ export interface PluginHookAgentContext {
13
+ /** What initiated this agent run. */
14
+ trigger?: "user" | "heartbeat" | "cron" | "memory" | string;
15
+ /** Channel-derived sender id. The plugin currently only uses `"cli"`. */
16
+ messageProvider?: string;
17
+ channelId?: string;
18
+ agentId?: string;
19
+ sessionId?: string;
20
+ sessionKey?: string;
21
+ runId?: string;
22
+ }
1
23
  /**
2
24
  * Runtime configuration as it appears in `plugins.entries.openclaw-knowledge.config`.
3
25
  * All fields are optional — defaults are applied in {@link resolveConfig}.
@@ -16,6 +38,30 @@ export interface KnowledgePluginConfig {
16
38
  lightragQueryMode?: LightRAGQueryMode;
17
39
  lightragMaxChars?: number;
18
40
  lightragEnabled?: boolean;
41
+ jina?: JinaPluginConfig;
42
+ }
43
+ export interface JinaPluginConfig {
44
+ /** Jina API key. Required for `router.mode=jina-classifier` or `pgvectorReranker.enabled`. Supports `${ENV_VAR}` substitution. */
45
+ apiKey?: string;
46
+ router?: RouterPluginConfig;
47
+ pgvectorReranker?: PgvectorRerankerPluginConfig;
48
+ }
49
+ export interface RouterPluginConfig {
50
+ enabled?: boolean;
51
+ mode?: "heuristic" | "jina-classifier";
52
+ /**
53
+ * Optional pre-trained Jina classifier_id. When set, the router calls
54
+ * `/v1/classify` with this ID (few-shot mode). Train it out-of-band via
55
+ * `POST /v1/train` — the plugin does NOT implement training.
56
+ */
57
+ classifierId?: string;
58
+ }
59
+ export interface PgvectorRerankerPluginConfig {
60
+ enabled?: boolean;
61
+ /** Reranker model. Default: `jina-reranker-v2-base-multilingual` (best FR coverage). */
62
+ model?: RerankerModel;
63
+ /** Cap on results returned post-rerank. Default: `5`. */
64
+ topN?: number;
19
65
  }
20
66
  export type LightRAGQueryMode = "naive" | "local" | "global" | "hybrid";
21
67
  /**
@@ -36,6 +82,13 @@ export interface ResolvedKnowledgeConfig {
36
82
  lightragQueryMode: LightRAGQueryMode;
37
83
  lightragMaxChars: number;
38
84
  lightragEnabled: boolean;
85
+ jinaApiKey: string;
86
+ routerEnabled: boolean;
87
+ routerMode: "heuristic" | "jina-classifier";
88
+ routerClassifierId: string;
89
+ pgvectorRerankerEnabled: boolean;
90
+ pgvectorRerankerModel: RerankerModel;
91
+ pgvectorRerankerTopN: number;
39
92
  }
40
93
  /**
41
94
  * One search hit from the PostgreSQL `knowledge_vectors` table, after score
@@ -86,6 +139,10 @@ export interface PgvectorRow {
86
139
  /**
87
140
  * Shape of the `before_prompt_build` event payload as consumed by this plugin.
88
141
  * We only rely on `messages`; the SDK may add other fields that we ignore.
142
+ *
143
+ * The full SDK type also exposes `prompt: string` (the raw user text). The
144
+ * handler keeps using `extractQueryFromMessages` to stay compatible with the
145
+ * existing tests; `prompt` is left to the SDK without being read here.
89
146
  */
90
147
  export interface BeforePromptBuildEvent {
91
148
  messages?: PromptMessage[];
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "id": "openclaw-knowledge",
3
3
  "name": "Knowledge Base",
4
- "description": "Multi-source knowledge search (pgvector + LightRAG) — injects relevant documents and knowledge graph context before each turn via the before_prompt_build hook",
5
- "version": "3.1.2",
4
+ "description": "Multi-source knowledge search (pgvector + LightRAG) with optional Jina-powered router & reranker — injects relevant documents and knowledge graph context before each turn via the before_prompt_build hook",
5
+ "version": "3.2.0",
6
6
  "activation": {
7
7
  "onStartup": true
8
8
  },
@@ -34,7 +34,7 @@
34
34
  "minimum": 1,
35
35
  "maximum": 100,
36
36
  "default": 5,
37
- "description": "Maximum number of results returned per collection"
37
+ "description": "Maximum number of results returned per collection (raw recall stage)"
38
38
  },
39
39
  "scoreThreshold": {
40
40
  "type": "number",
@@ -76,6 +76,63 @@
76
76
  "lightragEnabled": {
77
77
  "type": "boolean",
78
78
  "description": "Disable LightRAG search while keeping pgvector. Defaults to true when lightragUrl is set."
79
+ },
80
+ "jina": {
81
+ "type": "object",
82
+ "additionalProperties": false,
83
+ "description": "Optional Jina-powered enhancements: router (skip irrelevant retrievals) and pgvector reranker (re-order vector results by cross-encoder relevance).",
84
+ "properties": {
85
+ "apiKey": {
86
+ "type": "string",
87
+ "description": "Jina API key shared by router and reranker. Required when router.mode=jina-classifier or pgvectorReranker.enabled. Supports ${ENV_VAR} substitution."
88
+ },
89
+ "router": {
90
+ "type": "object",
91
+ "additionalProperties": false,
92
+ "description": "Adaptive routing: classify each user turn and skip retrieval when irrelevant.",
93
+ "properties": {
94
+ "enabled": {
95
+ "type": "boolean",
96
+ "default": false,
97
+ "description": "Enable the router. When false (default), every eligible turn calls every configured source — pre-3.2.0 behavior."
98
+ },
99
+ "mode": {
100
+ "type": "string",
101
+ "enum": ["heuristic", "jina-classifier"],
102
+ "default": "heuristic",
103
+ "description": "heuristic: zero-cost regex + trigger rules only (safe start). jina-classifier: same heuristics first, then Jina /v1/classify for ambiguous queries."
104
+ },
105
+ "classifierId": {
106
+ "type": "string",
107
+ "description": "Optional pre-trained Jina classifier_id (few-shot mode). Train it out-of-band via POST /v1/train then paste the ID here. When omitted, the router uses zero-shot with built-in labels."
108
+ }
109
+ }
110
+ },
111
+ "pgvectorReranker": {
112
+ "type": "object",
113
+ "additionalProperties": false,
114
+ "description": "Re-order pgvector results with a Jina cross-encoder. Boosts precision when topK candidates contain noise.",
115
+ "properties": {
116
+ "enabled": {
117
+ "type": "boolean",
118
+ "default": false,
119
+ "description": "Enable cross-encoder rerank on pgvector results. Requires jina.apiKey."
120
+ },
121
+ "model": {
122
+ "type": "string",
123
+ "default": "jina-reranker-v2-base-multilingual",
124
+ "description": "Jina reranker model. v2-base-multilingual is recommended for French content (v3 is English-biased)."
125
+ },
126
+ "topN": {
127
+ "type": "number",
128
+ "minimum": 1,
129
+ "maximum": 100,
130
+ "default": 5,
131
+ "description": "Max number of results returned after rerank. Recommendation: keep topK ≥ topN × 2 so the cross-encoder has room to re-order."
132
+ }
133
+ }
134
+ }
135
+ }
79
136
  }
80
137
  },
81
138
  "required": []
@@ -105,7 +162,7 @@
105
162
  "topK": {
106
163
  "label": "Top-K per collection",
107
164
  "advanced": true,
108
- "help": "Maximum number of results returned per collection (default: 5)"
165
+ "help": "Maximum number of results returned per collection (default: 5). When the reranker is enabled, recommended ≥ 2× rerank topN."
109
166
  },
110
167
  "scoreThreshold": {
111
168
  "label": "Score threshold",
@@ -147,6 +204,42 @@
147
204
  "label": "Enable LightRAG source",
148
205
  "advanced": true,
149
206
  "help": "Disable LightRAG while keeping pgvector. Defaults to true when lightragUrl is set."
207
+ },
208
+ "jina.apiKey": {
209
+ "label": "Jina API Key",
210
+ "placeholder": "${JINA_API_KEY}",
211
+ "sensitive": true,
212
+ "help": "Shared by router and reranker. Use ${JINA_API_KEY} for env var substitution."
213
+ },
214
+ "jina.router.enabled": {
215
+ "label": "Enable router",
216
+ "advanced": true,
217
+ "help": "Adaptive routing that skips retrieval on heartbeats and meta-questions. Default: false (pre-3.2.0 behavior)."
218
+ },
219
+ "jina.router.mode": {
220
+ "label": "Router mode",
221
+ "advanced": true,
222
+ "help": "heuristic: zero-cost rules only. jina-classifier: heuristics + Jina /v1/classify fallback."
223
+ },
224
+ "jina.router.classifierId": {
225
+ "label": "Few-shot classifier ID",
226
+ "advanced": true,
227
+ "help": "Optional. When set, the router uses your pre-trained classifier instead of zero-shot labels."
228
+ },
229
+ "jina.pgvectorReranker.enabled": {
230
+ "label": "Enable pgvector reranker",
231
+ "advanced": true,
232
+ "help": "Cross-encoder re-ordering of pgvector results. Requires Jina API key."
233
+ },
234
+ "jina.pgvectorReranker.model": {
235
+ "label": "Reranker model",
236
+ "advanced": true,
237
+ "help": "Default: jina-reranker-v2-base-multilingual (best for French)."
238
+ },
239
+ "jina.pgvectorReranker.topN": {
240
+ "label": "Reranker top-N",
241
+ "advanced": true,
242
+ "help": "Max results returned after rerank. Keep topK ≥ topN × 2."
150
243
  }
151
244
  }
152
245
  }
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@lacneu/openclaw-knowledge",
3
- "version": "3.1.2",
3
+ "version": "3.2.0",
4
4
  "type": "module",
5
- "description": "Multi-source knowledge plugin for OpenClaw — pgvector + LightRAG injection via before_prompt_build hook",
5
+ "description": "Multi-source knowledge plugin for OpenClaw — pgvector + LightRAG injection with optional Jina-powered router & reranker, via before_prompt_build hook",
6
6
  "license": "MIT",
7
7
  "author": "Olivier Neu",
8
8
  "homepage": "https://github.com/OlivierNeu/openclaw-knowledge-plugin#readme",
@@ -40,7 +40,7 @@
40
40
  "build:test": "tsc -p tsconfig.test-build.json",
41
41
  "clean": "rm -rf dist dist-test",
42
42
  "typecheck": "tsc -p tsconfig.test.json",
43
- "test": "npm run build:test && node --test dist-test/test/*.test.js",
43
+ "test": "npm run build:test && node --test $(find dist-test/test -name '*.test.js' -print)",
44
44
  "prepublishOnly": "npm run clean && npm run build"
45
45
  },
46
46
  "openclaw": {