@objectstack/knowledge-ragflow 6.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,307 @@
1
+ // Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license.
2
+
3
+ import type { Plugin, PluginContext } from '@objectstack/core';
4
+ import type {
5
+ IKnowledgeAdapter,
6
+ IKnowledgeService,
7
+ AdapterContext,
8
+ AdapterSearchOptions,
9
+ } from '@objectstack/spec/contracts';
10
+ import type {
11
+ KnowledgeDocument,
12
+ KnowledgeHit,
13
+ KnowledgeSource,
14
+ } from '@objectstack/spec/ai';
15
+ import { KNOWLEDGE_SERVICE } from '@objectstack/spec/contracts';
16
+
17
+ /**
18
+ * Subset of `fetch` used by the adapter. Inject in tests; defaults to
19
+ * the global `fetch`.
20
+ */
21
+ export type FetchLike = (
22
+ input: string,
23
+ init?: {
24
+ method?: string;
25
+ headers?: Record<string, string>;
26
+ body?: string;
27
+ signal?: AbortSignal;
28
+ },
29
+ ) => Promise<{
30
+ ok: boolean;
31
+ status: number;
32
+ statusText: string;
33
+ text(): Promise<string>;
34
+ json(): Promise<unknown>;
35
+ }>;
36
+
37
+ export interface KnowledgeRagflowAdapterOptions {
38
+ /** RAGFlow endpoint, e.g. `http://localhost:9380`. */
39
+ endpoint: string;
40
+ /** RAGFlow API key (Bearer token). */
41
+ apiKey: string;
42
+ /** Adapter id. @default 'ragflow' */
43
+ id?: string;
44
+ /** Override `fetch` for tests. */
45
+ fetch?: FetchLike;
46
+ /** Request timeout in milliseconds. @default 30000 */
47
+ timeoutMs?: number;
48
+ }
49
+
50
+ interface RagflowSourceOptions {
51
+ datasetId: string;
52
+ /** Optional rerank model id (overrides dataset default). */
53
+ rerankModel?: string;
54
+ /** Optional similarity threshold passed through to RAGFlow. */
55
+ similarityThreshold?: number;
56
+ /** Optional vector vs keyword weight in [0,1]. */
57
+ vectorSimilarityWeight?: number;
58
+ }
59
+
60
+ function extractRagflowOptions(source: KnowledgeSource): RagflowSourceOptions {
61
+ const opts = ((source as unknown as { options?: Record<string, unknown> }).options ?? {}) as
62
+ Record<string, unknown>;
63
+ const datasetId = opts.datasetId;
64
+ if (typeof datasetId !== 'string' || !datasetId) {
65
+ throw new Error(
66
+ `RAGFlow adapter requires source.options.datasetId on source '${source.id}'`,
67
+ );
68
+ }
69
+ return {
70
+ datasetId,
71
+ rerankModel: typeof opts.rerankModel === 'string' ? opts.rerankModel : undefined,
72
+ similarityThreshold:
73
+ typeof opts.similarityThreshold === 'number' ? opts.similarityThreshold : undefined,
74
+ vectorSimilarityWeight:
75
+ typeof opts.vectorSimilarityWeight === 'number'
76
+ ? opts.vectorSimilarityWeight
77
+ : undefined,
78
+ };
79
+ }
80
+
81
+ /**
82
+ * RAGFlow adapter. Maps {@link KnowledgeDocument} upserts to the
83
+ * dataset's chunk API, delegates retrieval to `/api/v1/retrieval`, and
84
+ * returns {@link KnowledgeHit}s with `sourceRecordId` preserved so the
85
+ * orchestrator can run a permission re-check.
86
+ */
87
+ export class KnowledgeRagflowAdapter implements IKnowledgeAdapter {
88
+ readonly id: string;
89
+ private readonly endpoint: string;
90
+ private readonly apiKey: string;
91
+ private readonly fetchImpl: FetchLike;
92
+ private readonly timeoutMs: number;
93
+
94
+ constructor(opts: KnowledgeRagflowAdapterOptions) {
95
+ if (!opts.endpoint) throw new Error('RAGFlow adapter: endpoint required');
96
+ if (!opts.apiKey) throw new Error('RAGFlow adapter: apiKey required');
97
+ this.id = opts.id ?? 'ragflow';
98
+ this.endpoint = opts.endpoint.replace(/\/+$/, '');
99
+ this.apiKey = opts.apiKey;
100
+ this.fetchImpl = opts.fetch ?? ((globalThis as { fetch?: FetchLike }).fetch as FetchLike);
101
+ this.timeoutMs = opts.timeoutMs ?? 30000;
102
+ if (!this.fetchImpl) {
103
+ throw new Error('RAGFlow adapter: no fetch available; pass options.fetch');
104
+ }
105
+ }
106
+
107
+ async upsert(docs: KnowledgeDocument[], ctx: AdapterContext): Promise<void> {
108
+ const { datasetId } = extractRagflowOptions(ctx.source);
109
+ // RAGFlow models documents in two layers: documents (file-like) and
110
+ // chunks (text blocks). We treat each KnowledgeDocument as a single
111
+ // chunk-set: delete existing chunks with the same external id, then
112
+ // upload as `content` chunks tagged with our document id.
113
+ for (const doc of docs) {
114
+ await this.deleteChunksByDocumentId(datasetId, doc.id);
115
+ await this.request(`/api/v1/datasets/${datasetId}/chunks`, {
116
+ method: 'POST',
117
+ body: JSON.stringify({
118
+ content: doc.content,
119
+ // RAGFlow accepts arbitrary metadata used for filtering at
120
+ // retrieval time. We always stamp `objectstack_doc_id` so
121
+ // delete() can find these chunks again.
122
+ important_keywords: doc.title ? [doc.title] : undefined,
123
+ metadata: {
124
+ ...(doc.metadata ?? {}),
125
+ objectstack_doc_id: doc.id,
126
+ objectstack_source_id: doc.sourceId,
127
+ objectstack_record_id: doc.sourceRecordId,
128
+ title: doc.title,
129
+ },
130
+ }),
131
+ });
132
+ }
133
+ }
134
+
135
+ async delete(documentIds: string[], ctx: AdapterContext): Promise<void> {
136
+ const { datasetId } = extractRagflowOptions(ctx.source);
137
+ for (const id of documentIds) {
138
+ await this.deleteChunksByDocumentId(datasetId, id);
139
+ }
140
+ }
141
+
142
+ async search(query: string, opts: AdapterSearchOptions): Promise<KnowledgeHit[]> {
143
+ const { datasetId, rerankModel, similarityThreshold, vectorSimilarityWeight } =
144
+ extractRagflowOptions(opts.source);
145
+ const body: Record<string, unknown> = {
146
+ question: query,
147
+ dataset_ids: [datasetId],
148
+ top_k: opts.topK,
149
+ keyword: true,
150
+ };
151
+ if (rerankModel) body.rerank_id = rerankModel;
152
+ if (typeof similarityThreshold === 'number') body.similarity_threshold = similarityThreshold;
153
+ if (typeof vectorSimilarityWeight === 'number')
154
+ body.vector_similarity_weight = vectorSimilarityWeight;
155
+ if (opts.filter) body.metadata_condition = { ...opts.filter };
156
+
157
+ const res = await this.request('/api/v1/retrieval', {
158
+ method: 'POST',
159
+ body: JSON.stringify(body),
160
+ });
161
+ const data = (res?.data ?? {}) as { chunks?: RagflowChunkHit[] };
162
+ const chunks = data.chunks ?? [];
163
+ return chunks.slice(0, opts.topK).map<KnowledgeHit>((c) => {
164
+ const md = (c.metadata ?? {}) as Record<string, unknown>;
165
+ const docId =
166
+ (md.objectstack_doc_id as string | undefined) ??
167
+ c.document_id ??
168
+ c.doc_id ??
169
+ c.id;
170
+ const recordId = md.objectstack_record_id as string | undefined;
171
+ return {
172
+ chunkId: c.id ?? `${docId}#${c.position ?? 0}`,
173
+ documentId: docId ?? c.id ?? 'unknown',
174
+ sourceId: opts.source.id,
175
+ sourceRecordId: recordId,
176
+ score: c.similarity ?? c.score ?? 0,
177
+ snippet: c.content ?? c.content_with_weight ?? '',
178
+ title: (md.title as string | undefined) ?? c.document_name,
179
+ metadata: md,
180
+ };
181
+ });
182
+ }
183
+
184
+ async healthCheck(): Promise<{ ok: boolean; message?: string }> {
185
+ try {
186
+ await this.request('/api/v1/datasets?page=1&page_size=1', { method: 'GET' });
187
+ return { ok: true };
188
+ } catch (err) {
189
+ return { ok: false, message: err instanceof Error ? err.message : String(err) };
190
+ }
191
+ }
192
+
193
+ private async deleteChunksByDocumentId(datasetId: string, docId: string): Promise<void> {
194
+ // Find chunks with our stamped metadata, then delete by chunk id.
195
+ const found = (await this.request(`/api/v1/retrieval`, {
196
+ method: 'POST',
197
+ body: JSON.stringify({
198
+ question: docId,
199
+ dataset_ids: [datasetId],
200
+ top_k: 256,
201
+ keyword: false,
202
+ metadata_condition: { objectstack_doc_id: docId },
203
+ }),
204
+ })) as { data?: { chunks?: Array<{ id?: string }> } };
205
+ const ids = (found.data?.chunks ?? [])
206
+ .map((c) => c.id)
207
+ .filter((x): x is string => typeof x === 'string');
208
+ if (ids.length === 0) return;
209
+ await this.request(`/api/v1/datasets/${datasetId}/chunks`, {
210
+ method: 'DELETE',
211
+ body: JSON.stringify({ chunk_ids: ids }),
212
+ });
213
+ }
214
+
215
+ private async request(
216
+ path: string,
217
+ init: { method: string; body?: string },
218
+ ): Promise<{ data?: unknown; code?: number; message?: string }> {
219
+ const controller = new AbortController();
220
+ const t = setTimeout(() => controller.abort(), this.timeoutMs);
221
+ try {
222
+ const res = await this.fetchImpl(`${this.endpoint}${path}`, {
223
+ method: init.method,
224
+ headers: {
225
+ 'content-type': 'application/json',
226
+ authorization: `Bearer ${this.apiKey}`,
227
+ },
228
+ body: init.body,
229
+ signal: controller.signal,
230
+ });
231
+ const raw = await res.text();
232
+ let parsed: { data?: unknown; code?: number; message?: string } = {};
233
+ if (raw) {
234
+ try {
235
+ parsed = JSON.parse(raw) as typeof parsed;
236
+ } catch {
237
+ if (!res.ok) {
238
+ throw new Error(
239
+ `RAGFlow ${init.method} ${path} → ${res.status} ${res.statusText}: ${raw.slice(0, 200)}`,
240
+ );
241
+ }
242
+ }
243
+ }
244
+ if (!res.ok || (typeof parsed.code === 'number' && parsed.code !== 0 && parsed.code !== 200)) {
245
+ throw new Error(
246
+ `RAGFlow ${init.method} ${path} → ${res.status} ${res.statusText}${
247
+ parsed.message ? ` (${parsed.message})` : ''
248
+ }`,
249
+ );
250
+ }
251
+ return parsed;
252
+ } finally {
253
+ clearTimeout(t);
254
+ }
255
+ }
256
+ }
257
+
258
+ interface RagflowChunkHit {
259
+ id?: string;
260
+ document_id?: string;
261
+ doc_id?: string;
262
+ document_name?: string;
263
+ content?: string;
264
+ content_with_weight?: string;
265
+ similarity?: number;
266
+ score?: number;
267
+ position?: number;
268
+ metadata?: Record<string, unknown>;
269
+ }
270
+
271
+ /* ---------------------------------------------------------------- */
272
+ /* Kernel plugin glue */
273
+ /* ---------------------------------------------------------------- */
274
+
275
+ export interface KnowledgeRagflowPluginOptions extends KnowledgeRagflowAdapterOptions {}
276
+
277
+ export class KnowledgeRagflowPlugin implements Plugin {
278
+ name = 'com.objectstack.plugin.knowledge-ragflow';
279
+ version = '0.1.0';
280
+ type = 'standard';
281
+
282
+ private readonly adapter: KnowledgeRagflowAdapter;
283
+
284
+ constructor(opts: KnowledgeRagflowPluginOptions) {
285
+ this.adapter = new KnowledgeRagflowAdapter(opts);
286
+ }
287
+
288
+ async init(_ctx: PluginContext): Promise<void> {
289
+ // No-op: actual registration happens in start() once service is available.
290
+ }
291
+
292
+ async start(ctx: PluginContext): Promise<void> {
293
+ let svc: IKnowledgeService | undefined;
294
+ try {
295
+ svc = ctx.getService<IKnowledgeService>(KNOWLEDGE_SERVICE);
296
+ } catch {
297
+ ctx.logger.warn?.(
298
+ 'KnowledgeRagflowPlugin: IKnowledgeService not registered — install KnowledgeServicePlugin first.',
299
+ );
300
+ return;
301
+ }
302
+ svc.registerAdapter(this.adapter.id, this.adapter);
303
+ ctx.logger.info?.(
304
+ `KnowledgeRagflowPlugin: adapter '${this.adapter.id}' registered (endpoint=${(this.adapter as unknown as { endpoint: string }).endpoint}).`,
305
+ );
306
+ }
307
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "extends": "../../../tsconfig.json",
3
+ "compilerOptions": {
4
+ "target": "ES2020",
5
+ "module": "ES2020",
6
+ "moduleResolution": "bundler",
7
+ "declaration": true,
8
+ "outDir": "./dist",
9
+ "strict": true,
10
+ "esModuleInterop": true,
11
+ "skipLibCheck": true,
12
+ "noUnusedLocals": false,
13
+ "noUnusedParameters": false,
14
+ "forceConsistentCasingInFileNames": true,
15
+ "types": ["node"],
16
+ "rootDir": "./src"
17
+ },
18
+ "include": ["src/**/*"],
19
+ "exclude": ["node_modules", "dist"]
20
+ }
@@ -0,0 +1,23 @@
1
+ // Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license.
2
+
3
+ import { defineConfig } from 'vitest/config';
4
+ import path from 'path';
5
+
6
+ export default defineConfig({
7
+ test: {
8
+ globals: true,
9
+ environment: 'node',
10
+ },
11
+ resolve: {
12
+ alias: {
13
+ '@objectstack/core': path.resolve(__dirname, '../../core/src/index.ts'),
14
+ '@objectstack/service-knowledge': path.resolve(__dirname, '../../services/service-knowledge/src/index.ts'),
15
+ '@objectstack/spec/ai': path.resolve(__dirname, '../../spec/src/ai/index.ts'),
16
+ '@objectstack/spec/contracts': path.resolve(__dirname, '../../spec/src/contracts/index.ts'),
17
+ '@objectstack/spec/data': path.resolve(__dirname, '../../spec/src/data/index.ts'),
18
+ '@objectstack/spec/kernel': path.resolve(__dirname, '../../spec/src/kernel/index.ts'),
19
+ '@objectstack/spec/system': path.resolve(__dirname, '../../spec/src/system/index.ts'),
20
+ '@objectstack/spec': path.resolve(__dirname, '../../spec/src/index.ts'),
21
+ },
22
+ },
23
+ });