@lacneu/openclaw-knowledge 3.1.2 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +368 -1
- package/README.md +131 -0
- package/dist/config.d.ts +4 -0
- package/dist/config.js +26 -0
- package/dist/config.js.map +1 -1
- package/dist/index.d.ts +61 -4
- package/dist/index.js +463 -50
- package/dist/index.js.map +1 -1
- package/dist/jina/classifier.d.ts +55 -0
- package/dist/jina/classifier.js +170 -0
- package/dist/jina/classifier.js.map +1 -0
- package/dist/jina/client.d.ts +30 -0
- package/dist/jina/client.js +131 -0
- package/dist/jina/client.js.map +1 -0
- package/dist/jina/errors.d.ts +42 -0
- package/dist/jina/errors.js +113 -0
- package/dist/jina/errors.js.map +1 -0
- package/dist/jina/reranker.d.ts +34 -0
- package/dist/jina/reranker.js +95 -0
- package/dist/jina/reranker.js.map +1 -0
- package/dist/jina/types.d.ts +78 -0
- package/dist/jina/types.js +12 -0
- package/dist/jina/types.js.map +1 -0
- package/dist/pgvector.d.ts +29 -0
- package/dist/pgvector.js +68 -0
- package/dist/pgvector.js.map +1 -1
- package/dist/router/heuristic.d.ts +29 -0
- package/dist/router/heuristic.js +104 -0
- package/dist/router/heuristic.js.map +1 -0
- package/dist/router/index.d.ts +33 -0
- package/dist/router/index.js +94 -0
- package/dist/router/index.js.map +1 -0
- package/dist/router/labels.d.ts +33 -0
- package/dist/router/labels.js +67 -0
- package/dist/router/labels.js.map +1 -0
- package/dist/router/types.d.ts +23 -0
- package/dist/router/types.js +7 -0
- package/dist/router/types.js.map +1 -0
- package/dist/tracing/events.d.ts +83 -0
- package/dist/tracing/events.js +86 -0
- package/dist/tracing/events.js.map +1 -0
- package/dist/types.d.ts +61 -1
- package/openclaw.plugin.json +97 -4
- package/package.json +3 -3
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// Default zero-shot labels for the Jina Classifier.
|
|
2
|
+
//
|
|
3
|
+
// The labels are intentionally bilingual: most prompts in the observed
|
|
4
|
+
// traces are French (Ataraxis is a French-speaking deployment) but the
|
|
5
|
+
// system prompts and agent meta-questions are often English. Jina
|
|
6
|
+
// `jina-embeddings-v3` handles both languages natively.
|
|
7
|
+
//
|
|
8
|
+
// Each label is a short sentence describing the kind of question that
|
|
9
|
+
// belongs to that route. Jina embeds both the label and the input, then
|
|
10
|
+
// picks the closest one — so the more discriminative the labels, the
|
|
11
|
+
// better the routing. Empirically, "describe the kind of question"
|
|
12
|
+
// outperforms "give a category name" by ~25% on small samples
|
|
13
|
+
// (https://jina.ai/news/rephrased-labels-improve-zero-shot-text-classification-30/).
|
|
14
|
+
/**
|
|
15
|
+
* Public constants — MUST stay in sync with `Route` in `types.js`.
|
|
16
|
+
*
|
|
17
|
+
* The label prefix passed to Jina (zero-shot) and the canonical name a
|
|
18
|
+
* few-shot classifier MUST be trained against share the same literal
|
|
19
|
+
* values as the `Route` union ("NONE" | "PGVECTOR_ONLY" | "LIGHTRAG_ONLY"
|
|
20
|
+
* | "ALL"). If they diverged, the classifier path would always fall back
|
|
21
|
+
* to "ALL" because `isKnownRoute` would reject the predicted label.
|
|
22
|
+
*/
|
|
23
|
+
export const ROUTE_NONE = "NONE";
|
|
24
|
+
export const ROUTE_PGVECTOR_ONLY = "PGVECTOR_ONLY";
|
|
25
|
+
export const ROUTE_LIGHTRAG_ONLY = "LIGHTRAG_ONLY";
|
|
26
|
+
export const ROUTE_ALL = "ALL";
|
|
27
|
+
/**
|
|
28
|
+
* Default labels handed to Jina `/v1/classify` in zero-shot mode when the
|
|
29
|
+
* operator does not supply a few-shot `classifierId`.
|
|
30
|
+
*
|
|
31
|
+
* Order does not matter for correctness, but we keep `NONE` first by
|
|
32
|
+
* convention so test output is stable.
|
|
33
|
+
*/
|
|
34
|
+
export const DEFAULT_ROUTER_LABELS = [
|
|
35
|
+
// NONE — agent meta, smalltalk, test pings
|
|
36
|
+
`${ROUTE_NONE}: meta-question about the agent itself, session identifier, system test, simple greeting, weather, or trivial smalltalk that does not depend on the knowledge base`,
|
|
37
|
+
// PGVECTOR_ONLY — single-document factual lookup
|
|
38
|
+
`${ROUTE_PGVECTOR_ONLY}: factual lookup that can be answered by a single document excerpt — version numbers, file names, dates, configuration values, raw quotes`,
|
|
39
|
+
// LIGHTRAG_ONLY — entity-relation traversal
|
|
40
|
+
`${ROUTE_LIGHTRAG_ONLY}: knowledge graph question about entities and their relationships — which client, which coach, which mission, which programme links to which livrable`,
|
|
41
|
+
// ALL — broad / synthesizing / unclear
|
|
42
|
+
`${ROUTE_ALL}: broad synthesis, multi-hop reasoning, audit, comparison, or any question whose scope is unclear and benefits from both vector search and knowledge graph context`,
|
|
43
|
+
];
|
|
44
|
+
/**
|
|
45
|
+
* The label names that the classifier may legitimately return.
|
|
46
|
+
* Used by the defensive parser to refuse hallucinated classes.
|
|
47
|
+
*/
|
|
48
|
+
export const ROUTER_LABEL_NAMES = [
|
|
49
|
+
ROUTE_NONE,
|
|
50
|
+
ROUTE_PGVECTOR_ONLY,
|
|
51
|
+
ROUTE_LIGHTRAG_ONLY,
|
|
52
|
+
ROUTE_ALL,
|
|
53
|
+
];
|
|
54
|
+
/**
|
|
55
|
+
* Extract the route name from a full label string (the part before the
|
|
56
|
+
* colon). Returns `null` when the label is malformed.
|
|
57
|
+
*
|
|
58
|
+
* @internal exported for unit testing
|
|
59
|
+
*/
|
|
60
|
+
export function extractRouteFromLabel(label) {
|
|
61
|
+
const colonIndex = label.indexOf(":");
|
|
62
|
+
if (colonIndex <= 0)
|
|
63
|
+
return null;
|
|
64
|
+
const name = label.slice(0, colonIndex).trim();
|
|
65
|
+
return name.length > 0 ? name : null;
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=labels.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"labels.js","sourceRoot":"","sources":["../../src/router/labels.ts"],"names":[],"mappings":"AAAA,oDAAoD;AACpD,EAAE;AACF,uEAAuE;AACvE,uEAAuE;AACvE,kEAAkE;AAClE,wDAAwD;AACxD,EAAE;AACF,sEAAsE;AACtE,wEAAwE;AACxE,qEAAqE;AACrE,mEAAmE;AACnE,8DAA8D;AAC9D,qFAAqF;AAErF;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,UAAU,GAAG,MAAM,CAAC;AACjC,MAAM,CAAC,MAAM,mBAAmB,GAAG,eAAe,CAAC;AACnD,MAAM,CAAC,MAAM,mBAAmB,GAAG,eAAe,CAAC;AACnD,MAAM,CAAC,MAAM,SAAS,GAAG,KAAK,CAAC;AAE/B;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAsB;IACtD,2CAA2C;IAC3C,GAAG,UAAU,oKAAoK;IAEjL,iDAAiD;IACjD,GAAG,mBAAmB,2IAA2I;IAEjK,4CAA4C;IAC5C,GAAG,mBAAmB,uJAAuJ;IAE7K,uCAAuC;IACvC,GAAG,SAAS,oKAAoK;CACjL,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAsB;IACnD,UAAU;IACV,mBAAmB;IACnB,mBAAmB;IACnB,SAAS;CACV,CAAC;AAEF;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAa;IACjD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACtC,IAAI,UAAU,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACjC,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/C,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;AACvC,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The four mutually exclusive routes the router can pick.
|
|
3
|
+
*
|
|
4
|
+
* - `NONE` — skip every source. Used for heartbeats, cron,
|
|
5
|
+
* memory triggers, agent meta-questions, system
|
|
6
|
+
* tests. Saves the cost of an irrelevant retrieval.
|
|
7
|
+
* - `PGVECTOR_ONLY` — vector search only (no graph). Cheap factual
|
|
8
|
+
* lookups: file names, version numbers, simple
|
|
9
|
+
* excerpts.
|
|
10
|
+
* - `LIGHTRAG_ONLY` — knowledge graph only (no vectors). Multi-hop
|
|
11
|
+
* reasoning, entity-relationship queries.
|
|
12
|
+
* - `ALL` — both sources in parallel. The current default
|
|
13
|
+
* behavior; preserved when the router is disabled.
|
|
14
|
+
*/
|
|
15
|
+
export type Route = "NONE" | "PGVECTOR_ONLY" | "LIGHTRAG_ONLY" | "ALL";
|
|
16
|
+
/** Source of a router decision — used in logs to trace the reasoning path. */
|
|
17
|
+
export type RouterReason = "router_disabled" | "heuristic_trigger" | "heuristic_meta" | "heuristic_short" | "heuristic_keyword" | "classifier_hit" | "classifier_fallback" | "classifier_error";
|
|
18
|
+
export interface RouterDecision {
|
|
19
|
+
route: Route;
|
|
20
|
+
reason: RouterReason;
|
|
21
|
+
/** Confidence in [0, 1] when the classifier produced a score, else null. */
|
|
22
|
+
score: number | null;
|
|
23
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
// Router types — shared by heuristic + classifier paths.
|
|
2
|
+
//
|
|
3
|
+
// The router decides which knowledge sources should be queried for a given
|
|
4
|
+
// user turn. The decision is consumed by the `before_prompt_build` handler
|
|
5
|
+
// to gate calls to pgvector and LightRAG.
|
|
6
|
+
export {};
|
|
7
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/router/types.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,EAAE;AACF,2EAA2E;AAC3E,2EAA2E;AAC3E,0CAA0C"}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import type { Route, RouterReason } from "../router/types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Minimal logger surface used by this module. Matches the relevant subset of
|
|
4
|
+
* `PluginLogger` from the OpenClaw SDK so it can be unit-tested without
|
|
5
|
+
* importing the full SDK type graph.
|
|
6
|
+
*/
|
|
7
|
+
export interface TracingLogger {
|
|
8
|
+
info: (message: string) => void;
|
|
9
|
+
debug?: (message: string) => void;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Marker prefix for every structured line emitted by this module. Pick a
|
|
13
|
+
* value that is unlikely to clash with other plugins and stable across
|
|
14
|
+
* versions — log scrapers and Opik rules depend on it.
|
|
15
|
+
*/
|
|
16
|
+
export declare const EVENT_PREFIX = "[knowledge.event]";
|
|
17
|
+
export interface RouterEvent {
|
|
18
|
+
type: "router";
|
|
19
|
+
route: Route;
|
|
20
|
+
reason: RouterReason;
|
|
21
|
+
score: number | null;
|
|
22
|
+
queryLength: number;
|
|
23
|
+
trigger?: string;
|
|
24
|
+
}
|
|
25
|
+
export interface PgvectorEvent {
|
|
26
|
+
type: "pgvector";
|
|
27
|
+
collections: string[];
|
|
28
|
+
rawCount: number;
|
|
29
|
+
rerankedCount: number | null;
|
|
30
|
+
topScore: number | null;
|
|
31
|
+
durationMs: number;
|
|
32
|
+
}
|
|
33
|
+
export interface LightRAGEvent {
|
|
34
|
+
type: "lightrag";
|
|
35
|
+
mode: string;
|
|
36
|
+
contextChars: number;
|
|
37
|
+
truncatedChars: number;
|
|
38
|
+
durationMs: number;
|
|
39
|
+
}
|
|
40
|
+
export interface JinaUsageEvent {
|
|
41
|
+
type: "jina";
|
|
42
|
+
endpoint: "classify" | "rerank";
|
|
43
|
+
model: string;
|
|
44
|
+
durationMs: number;
|
|
45
|
+
inputCount: number;
|
|
46
|
+
}
|
|
47
|
+
export interface CooldownEvent {
|
|
48
|
+
type: "cooldown";
|
|
49
|
+
scope: "global" | "router" | "pgvector_reranker";
|
|
50
|
+
consecutiveErrors: number;
|
|
51
|
+
}
|
|
52
|
+
export type KnowledgeEvent = RouterEvent | PgvectorEvent | LightRAGEvent | JinaUsageEvent | CooldownEvent;
|
|
53
|
+
/**
|
|
54
|
+
* Emit a structured event line through `logger.info`.
|
|
55
|
+
*
|
|
56
|
+
* Never throws. If JSON serialization or the logger itself fails (e.g.
|
|
57
|
+
* upstream broke the contract), we silently swallow — the plugin must keep
|
|
58
|
+
* working even if tracing breaks.
|
|
59
|
+
*/
|
|
60
|
+
export declare function emitEvent(logger: TracingLogger, event: KnowledgeEvent): void;
|
|
61
|
+
/**
|
|
62
|
+
* Optional debug-level emission of turn metadata for correlation.
|
|
63
|
+
*
|
|
64
|
+
* What goes into the log line:
|
|
65
|
+
* - `runId`: the OpenClaw SDK's runId for this agent turn (or
|
|
66
|
+
* `"unknown"` when the SDK did not supply one). This is the
|
|
67
|
+
* ONLY correlation key we expose — it is non-query-derived
|
|
68
|
+
* by construction, so it cannot be dictionary-recovered
|
|
69
|
+
* from the log line.
|
|
70
|
+
* - `qlen`: character length of the query (a count, not content).
|
|
71
|
+
*
|
|
72
|
+
* What does NOT go in: any portion of the query text, AND no hash of it.
|
|
73
|
+
* An earlier iteration of this plugin emitted `SHA-256(query)` truncated
|
|
74
|
+
* to 12 hex chars under the assumption it was "non-reversible". Code
|
|
75
|
+
* review (2026-05-23) correctly pointed out that for short or low-entropy
|
|
76
|
+
* prompts (the hook accepts ≥ 3 chars), the hash is dictionary-recoverable
|
|
77
|
+
* offline. We removed the hash entirely and rely on `runId` instead.
|
|
78
|
+
*
|
|
79
|
+
* Operators who want CONTENT correlation across turns must instrument
|
|
80
|
+
* Opik / LangFuse at the SDK layer with their own keyed scheme (HMAC
|
|
81
|
+
* with a deployment secret); the plugin will not do it for them.
|
|
82
|
+
*/
|
|
83
|
+
export declare function emitTurnMetadata(logger: TracingLogger, runId: string | undefined, queryLength: number): void;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
// Structured event emission for downstream observability tools.
|
|
2
|
+
//
|
|
3
|
+
// The plugin already runs inside an OpenClaw deployment that includes Opik
|
|
4
|
+
// (https://www.comet.com/docs/opik/) for tracing — but the plugin itself
|
|
5
|
+
// MUST NOT depend on the Opik SDK directly. Two reasons:
|
|
6
|
+
//
|
|
7
|
+
// 1. Deps. The plugin proudly ships with a single runtime dep (`pg`).
|
|
8
|
+
// Adding `opik` would force every consumer to install it.
|
|
9
|
+
// 2. Coupling. Operators may swap Opik for LangFuse or pure OTLP. The
|
|
10
|
+
// plugin should not care.
|
|
11
|
+
//
|
|
12
|
+
// Solution: emit structured JSON lines through OpenClaw's logger. The
|
|
13
|
+
// upstream gateway already forwards `logger.info(...)` to Opik (when
|
|
14
|
+
// configured) and to stdout in any case. A grep-friendly prefix
|
|
15
|
+
// (`[knowledge.event]`) lets a downstream scraper or Opik rule pick the
|
|
16
|
+
// records out without ambiguity.
|
|
17
|
+
//
|
|
18
|
+
// Privacy invariant: NO event in this module ever logs the raw user
|
|
19
|
+
// query, query excerpts, retrieved chunk content, OR ANY HASH OF THEM.
|
|
20
|
+
// We log metadata only (lengths, scores, counts, durations) plus the
|
|
21
|
+
// `runId` provided by the OpenClaw SDK when turn-level correlation is
|
|
22
|
+
// needed. The runId is non-query-derived by construction, so it cannot
|
|
23
|
+
// be reversed offline against a dictionary of likely prompts.
|
|
24
|
+
//
|
|
25
|
+
// The events module is intentionally tiny and synchronous — emitting a log
|
|
26
|
+
// line must NEVER throw, NEVER consume noticeable CPU, and NEVER hold the
|
|
27
|
+
// agent turn open.
|
|
28
|
+
/**
|
|
29
|
+
* Marker prefix for every structured line emitted by this module. Pick a
|
|
30
|
+
* value that is unlikely to clash with other plugins and stable across
|
|
31
|
+
* versions — log scrapers and Opik rules depend on it.
|
|
32
|
+
*/
|
|
33
|
+
export const EVENT_PREFIX = "[knowledge.event]";
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Emitters
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
/**
|
|
38
|
+
* Emit a structured event line through `logger.info`.
|
|
39
|
+
*
|
|
40
|
+
* Never throws. If JSON serialization or the logger itself fails (e.g.
|
|
41
|
+
* upstream broke the contract), we silently swallow — the plugin must keep
|
|
42
|
+
* working even if tracing breaks.
|
|
43
|
+
*/
|
|
44
|
+
export function emitEvent(logger, event) {
|
|
45
|
+
try {
|
|
46
|
+
const payload = JSON.stringify(event);
|
|
47
|
+
logger.info(`${EVENT_PREFIX} ${payload}`);
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
// intentional swallow — tracing must never crash the plugin
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Optional debug-level emission of turn metadata for correlation.
|
|
55
|
+
*
|
|
56
|
+
* What goes into the log line:
|
|
57
|
+
* - `runId`: the OpenClaw SDK's runId for this agent turn (or
|
|
58
|
+
* `"unknown"` when the SDK did not supply one). This is the
|
|
59
|
+
* ONLY correlation key we expose — it is non-query-derived
|
|
60
|
+
* by construction, so it cannot be dictionary-recovered
|
|
61
|
+
* from the log line.
|
|
62
|
+
* - `qlen`: character length of the query (a count, not content).
|
|
63
|
+
*
|
|
64
|
+
* What does NOT go in: any portion of the query text, AND no hash of it.
|
|
65
|
+
* An earlier iteration of this plugin emitted `SHA-256(query)` truncated
|
|
66
|
+
* to 12 hex chars under the assumption it was "non-reversible". Code
|
|
67
|
+
* review (2026-05-23) correctly pointed out that for short or low-entropy
|
|
68
|
+
* prompts (the hook accepts ≥ 3 chars), the hash is dictionary-recoverable
|
|
69
|
+
* offline. We removed the hash entirely and rely on `runId` instead.
|
|
70
|
+
*
|
|
71
|
+
* Operators who want CONTENT correlation across turns must instrument
|
|
72
|
+
* Opik / LangFuse at the SDK layer with their own keyed scheme (HMAC
|
|
73
|
+
* with a deployment secret); the plugin will not do it for them.
|
|
74
|
+
*/
|
|
75
|
+
export function emitTurnMetadata(logger, runId, queryLength) {
|
|
76
|
+
if (!logger.debug)
|
|
77
|
+
return;
|
|
78
|
+
try {
|
|
79
|
+
const id = runId && runId.length > 0 ? runId : "unknown";
|
|
80
|
+
logger.debug(`${EVENT_PREFIX} turn.metadata runId=${id} qlen=${queryLength}`);
|
|
81
|
+
}
|
|
82
|
+
catch {
|
|
83
|
+
// swallow — tracing must never crash the plugin
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
//# sourceMappingURL=events.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"events.js","sourceRoot":"","sources":["../../src/tracing/events.ts"],"names":[],"mappings":"AAAA,gEAAgE;AAChE,EAAE;AACF,2EAA2E;AAC3E,yEAAyE;AACzE,yDAAyD;AACzD,EAAE;AACF,wEAAwE;AACxE,+DAA+D;AAC/D,wEAAwE;AACxE,+BAA+B;AAC/B,EAAE;AACF,sEAAsE;AACtE,qEAAqE;AACrE,gEAAgE;AAChE,wEAAwE;AACxE,iCAAiC;AACjC,EAAE;AACF,oEAAoE;AACpE,uEAAuE;AACvE,qEAAqE;AACrE,sEAAsE;AACtE,uEAAuE;AACvE,8DAA8D;AAC9D,EAAE;AACF,2EAA2E;AAC3E,0EAA0E;AAC1E,mBAAmB;AAcnB;;;;GAIG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,mBAAmB,CAAC;AAqDhD,8EAA8E;AAC9E,WAAW;AACX,8EAA8E;AAE9E;;;;;;GAMG;AACH,MAAM,UAAU,SAAS,CAAC,MAAqB,EAAE,KAAqB;IACpE,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,GAAG,YAAY,IAAI,OAAO,EAAE,CAAC,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,4DAA4D;IAC9D,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,UAAU,gBAAgB,CAC9B,MAAqB,EACrB,KAAyB,EACzB,WAAmB;IAEnB,IAAI,CAAC,MAAM,CAAC,KAAK;QAAE,OAAO;IAC1B,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;QACzD,MAAM,CAAC,KAAK,CAAC,GAAG,YAAY,wBAAwB,EAAE,SAAS,WAAW,EAAE,CAAC,CAAC;IAChF,CAAC;IAAC,MAAM,CAAC;QACP,gDAAgD;IAClD,CAAC;AACH,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,3 +1,25 @@
|
|
|
1
|
+
import type { RerankerModel } from "./jina/types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Subset of `PluginHookAgentContext` from the OpenClaw plugin SDK that this
|
|
4
|
+
* plugin actually consumes. Declared locally to keep the test suite free of
|
|
5
|
+
* SDK runtime imports.
|
|
6
|
+
*
|
|
7
|
+
* Fields beyond this subset (workspaceDir, modelProviderId, ...) are
|
|
8
|
+
* deliberately omitted — the handler does not depend on them.
|
|
9
|
+
*
|
|
10
|
+
* @see https://github.com/openclaw/openclaw plugin-sdk types.d.ts
|
|
11
|
+
*/
|
|
12
|
+
export interface PluginHookAgentContext {
|
|
13
|
+
/** What initiated this agent run. */
|
|
14
|
+
trigger?: "user" | "heartbeat" | "cron" | "memory" | string;
|
|
15
|
+
/** Channel-derived sender id. The plugin currently only uses `"cli"`. */
|
|
16
|
+
messageProvider?: string;
|
|
17
|
+
channelId?: string;
|
|
18
|
+
agentId?: string;
|
|
19
|
+
sessionId?: string;
|
|
20
|
+
sessionKey?: string;
|
|
21
|
+
runId?: string;
|
|
22
|
+
}
|
|
1
23
|
/**
|
|
2
24
|
* Runtime configuration as it appears in `plugins.entries.openclaw-knowledge.config`.
|
|
3
25
|
* All fields are optional — defaults are applied in {@link resolveConfig}.
|
|
@@ -16,6 +38,30 @@ export interface KnowledgePluginConfig {
|
|
|
16
38
|
lightragQueryMode?: LightRAGQueryMode;
|
|
17
39
|
lightragMaxChars?: number;
|
|
18
40
|
lightragEnabled?: boolean;
|
|
41
|
+
jina?: JinaPluginConfig;
|
|
42
|
+
}
|
|
43
|
+
export interface JinaPluginConfig {
|
|
44
|
+
/** Jina API key. Required for `router.mode=jina-classifier` or `pgvectorReranker.enabled`. Supports `${ENV_VAR}` substitution. */
|
|
45
|
+
apiKey?: string;
|
|
46
|
+
router?: RouterPluginConfig;
|
|
47
|
+
pgvectorReranker?: PgvectorRerankerPluginConfig;
|
|
48
|
+
}
|
|
49
|
+
export interface RouterPluginConfig {
|
|
50
|
+
enabled?: boolean;
|
|
51
|
+
mode?: "heuristic" | "jina-classifier";
|
|
52
|
+
/**
|
|
53
|
+
* Optional pre-trained Jina classifier_id. When set, the router calls
|
|
54
|
+
* `/v1/classify` with this ID (few-shot mode). Train it out-of-band via
|
|
55
|
+
* `POST /v1/train` — the plugin does NOT implement training.
|
|
56
|
+
*/
|
|
57
|
+
classifierId?: string;
|
|
58
|
+
}
|
|
59
|
+
export interface PgvectorRerankerPluginConfig {
|
|
60
|
+
enabled?: boolean;
|
|
61
|
+
/** Reranker model. Default: `jina-reranker-v2-base-multilingual` (best FR coverage). */
|
|
62
|
+
model?: RerankerModel;
|
|
63
|
+
/** Cap on results returned post-rerank. Default: `5`. */
|
|
64
|
+
topN?: number;
|
|
19
65
|
}
|
|
20
66
|
export type LightRAGQueryMode = "naive" | "local" | "global" | "hybrid";
|
|
21
67
|
/**
|
|
@@ -36,6 +82,13 @@ export interface ResolvedKnowledgeConfig {
|
|
|
36
82
|
lightragQueryMode: LightRAGQueryMode;
|
|
37
83
|
lightragMaxChars: number;
|
|
38
84
|
lightragEnabled: boolean;
|
|
85
|
+
jinaApiKey: string;
|
|
86
|
+
routerEnabled: boolean;
|
|
87
|
+
routerMode: "heuristic" | "jina-classifier";
|
|
88
|
+
routerClassifierId: string;
|
|
89
|
+
pgvectorRerankerEnabled: boolean;
|
|
90
|
+
pgvectorRerankerModel: RerankerModel;
|
|
91
|
+
pgvectorRerankerTopN: number;
|
|
39
92
|
}
|
|
40
93
|
/**
|
|
41
94
|
* One search hit from the PostgreSQL `knowledge_vectors` table, after score
|
|
@@ -85,9 +138,16 @@ export interface PgvectorRow {
|
|
|
85
138
|
}
|
|
86
139
|
/**
|
|
87
140
|
* Shape of the `before_prompt_build` event payload as consumed by this plugin.
|
|
88
|
-
*
|
|
141
|
+
*
|
|
142
|
+
* As of v3.2.1, `prompt` is the PRIMARY source for the user query — it is
|
|
143
|
+
* the raw user text surfaced by the SDK, distinct from `messages` which may
|
|
144
|
+
* aggregate the full conversation window (with summaries, system prompt
|
|
145
|
+
* fragments, etc.). The handler reads `prompt` first; `messages` remains
|
|
146
|
+
* as a legacy fallback for SDK versions that do not populate it.
|
|
89
147
|
*/
|
|
90
148
|
export interface BeforePromptBuildEvent {
|
|
149
|
+
/** Raw user prompt for this turn. SDK >= 2026.5.0. */
|
|
150
|
+
prompt?: string;
|
|
91
151
|
messages?: PromptMessage[];
|
|
92
152
|
}
|
|
93
153
|
export interface PromptMessage {
|
package/openclaw.plugin.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "openclaw-knowledge",
|
|
3
3
|
"name": "Knowledge Base",
|
|
4
|
-
"description": "Multi-source knowledge search (pgvector + LightRAG) — injects relevant documents and knowledge graph context before each turn via the before_prompt_build hook",
|
|
5
|
-
"version": "3.1
|
|
4
|
+
"description": "Multi-source knowledge search (pgvector + LightRAG) with optional Jina-powered router & reranker — injects relevant documents and knowledge graph context before each turn via the before_prompt_build hook",
|
|
5
|
+
"version": "3.2.1",
|
|
6
6
|
"activation": {
|
|
7
7
|
"onStartup": true
|
|
8
8
|
},
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"minimum": 1,
|
|
35
35
|
"maximum": 100,
|
|
36
36
|
"default": 5,
|
|
37
|
-
"description": "Maximum number of results returned per collection"
|
|
37
|
+
"description": "Maximum number of results returned per collection (raw recall stage)"
|
|
38
38
|
},
|
|
39
39
|
"scoreThreshold": {
|
|
40
40
|
"type": "number",
|
|
@@ -76,6 +76,63 @@
|
|
|
76
76
|
"lightragEnabled": {
|
|
77
77
|
"type": "boolean",
|
|
78
78
|
"description": "Disable LightRAG search while keeping pgvector. Defaults to true when lightragUrl is set."
|
|
79
|
+
},
|
|
80
|
+
"jina": {
|
|
81
|
+
"type": "object",
|
|
82
|
+
"additionalProperties": false,
|
|
83
|
+
"description": "Optional Jina-powered enhancements: router (skip irrelevant retrievals) and pgvector reranker (re-order vector results by cross-encoder relevance).",
|
|
84
|
+
"properties": {
|
|
85
|
+
"apiKey": {
|
|
86
|
+
"type": "string",
|
|
87
|
+
"description": "Jina API key shared by router and reranker. Required when router.mode=jina-classifier or pgvectorReranker.enabled. Supports ${ENV_VAR} substitution."
|
|
88
|
+
},
|
|
89
|
+
"router": {
|
|
90
|
+
"type": "object",
|
|
91
|
+
"additionalProperties": false,
|
|
92
|
+
"description": "Adaptive routing: classify each user turn and skip retrieval when irrelevant.",
|
|
93
|
+
"properties": {
|
|
94
|
+
"enabled": {
|
|
95
|
+
"type": "boolean",
|
|
96
|
+
"default": false,
|
|
97
|
+
"description": "Enable the router. When false (default), every eligible turn calls every configured source — pre-3.2.0 behavior."
|
|
98
|
+
},
|
|
99
|
+
"mode": {
|
|
100
|
+
"type": "string",
|
|
101
|
+
"enum": ["heuristic", "jina-classifier"],
|
|
102
|
+
"default": "heuristic",
|
|
103
|
+
"description": "heuristic: zero-cost regex + trigger rules only (safe start). jina-classifier: same heuristics first, then Jina /v1/classify for ambiguous queries."
|
|
104
|
+
},
|
|
105
|
+
"classifierId": {
|
|
106
|
+
"type": "string",
|
|
107
|
+
"description": "Optional pre-trained Jina classifier_id (few-shot mode). Train it out-of-band via POST /v1/train then paste the ID here. When omitted, the router uses zero-shot with built-in labels."
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
},
|
|
111
|
+
"pgvectorReranker": {
|
|
112
|
+
"type": "object",
|
|
113
|
+
"additionalProperties": false,
|
|
114
|
+
"description": "Re-order pgvector results with a Jina cross-encoder. Boosts precision when topK candidates contain noise.",
|
|
115
|
+
"properties": {
|
|
116
|
+
"enabled": {
|
|
117
|
+
"type": "boolean",
|
|
118
|
+
"default": false,
|
|
119
|
+
"description": "Enable cross-encoder rerank on pgvector results. Requires jina.apiKey."
|
|
120
|
+
},
|
|
121
|
+
"model": {
|
|
122
|
+
"type": "string",
|
|
123
|
+
"default": "jina-reranker-v2-base-multilingual",
|
|
124
|
+
"description": "Jina reranker model. v2-base-multilingual is recommended for French content (v3 is English-biased)."
|
|
125
|
+
},
|
|
126
|
+
"topN": {
|
|
127
|
+
"type": "number",
|
|
128
|
+
"minimum": 1,
|
|
129
|
+
"maximum": 100,
|
|
130
|
+
"default": 5,
|
|
131
|
+
"description": "Max number of results returned after rerank. Recommendation: keep topK ≥ topN × 2 so the cross-encoder has room to re-order."
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
79
136
|
}
|
|
80
137
|
},
|
|
81
138
|
"required": []
|
|
@@ -105,7 +162,7 @@
|
|
|
105
162
|
"topK": {
|
|
106
163
|
"label": "Top-K per collection",
|
|
107
164
|
"advanced": true,
|
|
108
|
-
"help": "Maximum number of results returned per collection (default: 5)"
|
|
165
|
+
"help": "Maximum number of results returned per collection (default: 5). When the reranker is enabled, recommended ≥ 2× rerank topN."
|
|
109
166
|
},
|
|
110
167
|
"scoreThreshold": {
|
|
111
168
|
"label": "Score threshold",
|
|
@@ -147,6 +204,42 @@
|
|
|
147
204
|
"label": "Enable LightRAG source",
|
|
148
205
|
"advanced": true,
|
|
149
206
|
"help": "Disable LightRAG while keeping pgvector. Defaults to true when lightragUrl is set."
|
|
207
|
+
},
|
|
208
|
+
"jina.apiKey": {
|
|
209
|
+
"label": "Jina API Key",
|
|
210
|
+
"placeholder": "${JINA_API_KEY}",
|
|
211
|
+
"sensitive": true,
|
|
212
|
+
"help": "Shared by router and reranker. Use ${JINA_API_KEY} for env var substitution."
|
|
213
|
+
},
|
|
214
|
+
"jina.router.enabled": {
|
|
215
|
+
"label": "Enable router",
|
|
216
|
+
"advanced": true,
|
|
217
|
+
"help": "Adaptive routing that skips retrieval on heartbeats and meta-questions. Default: false (pre-3.2.0 behavior)."
|
|
218
|
+
},
|
|
219
|
+
"jina.router.mode": {
|
|
220
|
+
"label": "Router mode",
|
|
221
|
+
"advanced": true,
|
|
222
|
+
"help": "heuristic: zero-cost rules only. jina-classifier: heuristics + Jina /v1/classify fallback."
|
|
223
|
+
},
|
|
224
|
+
"jina.router.classifierId": {
|
|
225
|
+
"label": "Few-shot classifier ID",
|
|
226
|
+
"advanced": true,
|
|
227
|
+
"help": "Optional. When set, the router uses your pre-trained classifier instead of zero-shot labels."
|
|
228
|
+
},
|
|
229
|
+
"jina.pgvectorReranker.enabled": {
|
|
230
|
+
"label": "Enable pgvector reranker",
|
|
231
|
+
"advanced": true,
|
|
232
|
+
"help": "Cross-encoder re-ordering of pgvector results. Requires Jina API key."
|
|
233
|
+
},
|
|
234
|
+
"jina.pgvectorReranker.model": {
|
|
235
|
+
"label": "Reranker model",
|
|
236
|
+
"advanced": true,
|
|
237
|
+
"help": "Default: jina-reranker-v2-base-multilingual (best for French)."
|
|
238
|
+
},
|
|
239
|
+
"jina.pgvectorReranker.topN": {
|
|
240
|
+
"label": "Reranker top-N",
|
|
241
|
+
"advanced": true,
|
|
242
|
+
"help": "Max results returned after rerank. Keep topK ≥ topN × 2."
|
|
150
243
|
}
|
|
151
244
|
}
|
|
152
245
|
}
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lacneu/openclaw-knowledge",
|
|
3
|
-
"version": "3.1
|
|
3
|
+
"version": "3.2.1",
|
|
4
4
|
"type": "module",
|
|
5
|
-
"description": "Multi-source knowledge plugin for OpenClaw — pgvector + LightRAG injection via before_prompt_build hook",
|
|
5
|
+
"description": "Multi-source knowledge plugin for OpenClaw — pgvector + LightRAG injection with optional Jina-powered router & reranker, via before_prompt_build hook",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"author": "Olivier Neu",
|
|
8
8
|
"homepage": "https://github.com/OlivierNeu/openclaw-knowledge-plugin#readme",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
"build:test": "tsc -p tsconfig.test-build.json",
|
|
41
41
|
"clean": "rm -rf dist dist-test",
|
|
42
42
|
"typecheck": "tsc -p tsconfig.test.json",
|
|
43
|
-
"test": "npm run build:test && node --test dist-test/test
|
|
43
|
+
"test": "npm run build:test && node --test $(find dist-test/test -name '*.test.js' -print)",
|
|
44
44
|
"prepublishOnly": "npm run clean && npm run build"
|
|
45
45
|
},
|
|
46
46
|
"openclaw": {
|