graphile-llm 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +23 -0
- package/README.md +193 -0
- package/__tests__/graphile-llm.test.d.ts +1 -0
- package/__tests__/graphile-llm.test.js +721 -0
- package/chat.d.ts +37 -0
- package/chat.js +105 -0
- package/embedder.d.ts +35 -0
- package/embedder.js +79 -0
- package/esm/__tests__/graphile-llm.test.d.ts +1 -0
- package/esm/__tests__/graphile-llm.test.js +683 -0
- package/esm/chat.d.ts +37 -0
- package/esm/chat.js +97 -0
- package/esm/embedder.d.ts +35 -0
- package/esm/embedder.js +71 -0
- package/esm/index.d.ts +39 -0
- package/esm/index.js +42 -0
- package/esm/plugins/llm-module-plugin.d.ts +38 -0
- package/esm/plugins/llm-module-plugin.js +82 -0
- package/esm/plugins/rag-plugin.d.ts +36 -0
- package/esm/plugins/rag-plugin.js +341 -0
- package/esm/plugins/text-mutation-plugin.d.ts +44 -0
- package/esm/plugins/text-mutation-plugin.js +191 -0
- package/esm/plugins/text-search-plugin.d.ts +41 -0
- package/esm/plugins/text-search-plugin.js +163 -0
- package/esm/preset.d.ts +55 -0
- package/esm/preset.js +74 -0
- package/esm/types.d.ts +173 -0
- package/esm/types.js +6 -0
- package/index.d.ts +39 -0
- package/index.js +56 -0
- package/package.json +76 -0
- package/plugins/llm-module-plugin.d.ts +38 -0
- package/plugins/llm-module-plugin.js +85 -0
- package/plugins/rag-plugin.d.ts +36 -0
- package/plugins/rag-plugin.js +344 -0
- package/plugins/text-mutation-plugin.d.ts +44 -0
- package/plugins/text-mutation-plugin.js +194 -0
- package/plugins/text-search-plugin.d.ts +41 -0
- package/plugins/text-search-plugin.js +166 -0
- package/preset.d.ts +55 -0
- package/preset.js +77 -0
- package/types.d.ts +173 -0
- package/types.js +7 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LlmModulePlugin
|
|
4
|
+
*
|
|
5
|
+
* Detects and loads the `llm_module` configuration from `services_public.api_modules`.
|
|
6
|
+
* Makes the resolved embedder available to other plugins via the build context.
|
|
7
|
+
*
|
|
8
|
+
* This plugin is the foundation that enables per-database LLM configuration.
|
|
9
|
+
* When an API has an `llm_module` configured, the embedder is resolved and
|
|
10
|
+
* stored on the build object for other plugins (text search, text mutations)
|
|
11
|
+
* to consume.
|
|
12
|
+
*
|
|
13
|
+
* Resolution order for the embedder:
|
|
14
|
+
* 1. `llm_module` from api_modules (per-database, loaded at schema build time)
|
|
15
|
+
* 2. `defaultEmbedder` from preset options (dev/testing fallback)
|
|
16
|
+
* 3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
|
|
17
|
+
* 4. null — LLM features are disabled
|
|
18
|
+
*/
|
|
19
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
+
exports.createLlmModulePlugin = createLlmModulePlugin;
|
|
21
|
+
const embedder_1 = require("../embedder");
|
|
22
|
+
const chat_1 = require("../chat");
|
|
23
|
+
/**
|
|
24
|
+
* Creates the LlmModulePlugin with the given options.
|
|
25
|
+
*/
|
|
26
|
+
function createLlmModulePlugin(options = {}) {
|
|
27
|
+
const { defaultEmbedder, defaultChatCompleter } = options;
|
|
28
|
+
return {
|
|
29
|
+
name: 'LlmModulePlugin',
|
|
30
|
+
version: '0.1.0',
|
|
31
|
+
description: 'Resolves LLM embedder and chat completer configuration and makes them available to other plugins',
|
|
32
|
+
schema: {
|
|
33
|
+
hooks: {
|
|
34
|
+
build(build) {
|
|
35
|
+
// Resolve the embedder from available sources:
|
|
36
|
+
// 1. Preset default embedder option
|
|
37
|
+
// 2. Environment variables
|
|
38
|
+
// 3. null (disabled)
|
|
39
|
+
//
|
|
40
|
+
// Note: Per-database llm_module resolution happens at request time,
|
|
41
|
+
// not schema build time. The defaultEmbedder and env vars provide
|
|
42
|
+
// the schema-build-time embedder so that text fields are registered
|
|
43
|
+
// in the GraphQL schema. At execution time, the actual embedder
|
|
44
|
+
// used may differ per-database based on the llm_module config.
|
|
45
|
+
let embedder = null;
|
|
46
|
+
if (defaultEmbedder) {
|
|
47
|
+
embedder = (0, embedder_1.buildEmbedder)(defaultEmbedder);
|
|
48
|
+
}
|
|
49
|
+
if (!embedder) {
|
|
50
|
+
embedder = (0, embedder_1.buildEmbedderFromEnv)();
|
|
51
|
+
}
|
|
52
|
+
if (embedder) {
|
|
53
|
+
console.log('[graphile-llm] Embedder configured — LLM text fields will be enabled');
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
console.log('[graphile-llm] No embedder configured. Set defaultEmbedder in preset options ' +
|
|
57
|
+
'or EMBEDDER_PROVIDER env var to enable text-to-vector fields.');
|
|
58
|
+
}
|
|
59
|
+
// Resolve the chat completer from available sources:
|
|
60
|
+
// 1. Preset default chat completer option
|
|
61
|
+
// 2. Environment variables
|
|
62
|
+
// 3. null (disabled — RAG queries will error)
|
|
63
|
+
let chat = null;
|
|
64
|
+
if (defaultChatCompleter) {
|
|
65
|
+
chat = (0, chat_1.buildChatCompleter)(defaultChatCompleter);
|
|
66
|
+
}
|
|
67
|
+
if (!chat) {
|
|
68
|
+
chat = (0, chat_1.buildChatCompleterFromEnv)();
|
|
69
|
+
}
|
|
70
|
+
if (chat) {
|
|
71
|
+
console.log('[graphile-llm] Chat completer configured — RAG queries will be enabled');
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
console.log('[graphile-llm] No chat completer configured. Set defaultChatCompleter in preset ' +
|
|
75
|
+
'options or CHAT_PROVIDER env var to enable RAG queries.');
|
|
76
|
+
}
|
|
77
|
+
return build.extend(build, {
|
|
78
|
+
llmEmbedder: embedder,
|
|
79
|
+
llmChatCompleter: chat,
|
|
80
|
+
}, 'LlmModulePlugin adding llmEmbedder and llmChatCompleter to build');
|
|
81
|
+
},
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LlmRagPlugin
|
|
3
|
+
*
|
|
4
|
+
* Adds RAG (Retrieval-Augmented Generation) query support to PostGraphile v5.
|
|
5
|
+
*
|
|
6
|
+
* When enabled, this plugin:
|
|
7
|
+
* 1. Discovers tables with @hasChunks smart tag during schema build
|
|
8
|
+
* 2. Adds a `ragQuery` root query field that orchestrates:
|
|
9
|
+
* embed prompt → pgvector search chunks → assemble context → call chat LLM → return answer
|
|
10
|
+
* 3. Adds an `embedText` root query field for standalone text-to-vector conversion
|
|
11
|
+
*
|
|
12
|
+
* Uses the extendSchema + grafast lambda pattern (same as bucket-provisioner
|
|
13
|
+
* and presigned-url plugins) for async operations at execution time.
|
|
14
|
+
*
|
|
15
|
+
* RAG is a consumer of graphile-search's pgvector adapter — it uses the existing
|
|
16
|
+
* chunk-aware tables but orchestrates the full LLM synthesis pipeline.
|
|
17
|
+
*
|
|
18
|
+
* Resolution order for embedder and chat completer:
|
|
19
|
+
* 1. build.llmEmbedder / build.llmChatCompleter (from LlmModulePlugin)
|
|
20
|
+
* 2. Falls back to error if not configured
|
|
21
|
+
*/
|
|
22
|
+
import type { GraphileConfig } from 'graphile-config';
|
|
23
|
+
import type { RagDefaults } from '../types';
|
|
24
|
+
declare global {
|
|
25
|
+
namespace GraphileConfig {
|
|
26
|
+
interface Plugins {
|
|
27
|
+
LlmRagPlugin: true;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Creates the LlmRagPlugin.
|
|
33
|
+
*
|
|
34
|
+
* @param ragDefaults - Default configuration for RAG queries
|
|
35
|
+
*/
|
|
36
|
+
export declare function createLlmRagPlugin(ragDefaults?: RagDefaults): GraphileConfig.Plugin;
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LlmRagPlugin
|
|
4
|
+
*
|
|
5
|
+
* Adds RAG (Retrieval-Augmented Generation) query support to PostGraphile v5.
|
|
6
|
+
*
|
|
7
|
+
* When enabled, this plugin:
|
|
8
|
+
* 1. Discovers tables with @hasChunks smart tag during schema build
|
|
9
|
+
* 2. Adds a `ragQuery` root query field that orchestrates:
|
|
10
|
+
* embed prompt → pgvector search chunks → assemble context → call chat LLM → return answer
|
|
11
|
+
* 3. Adds an `embedText` root query field for standalone text-to-vector conversion
|
|
12
|
+
*
|
|
13
|
+
* Uses the extendSchema + grafast lambda pattern (same as bucket-provisioner
|
|
14
|
+
* and presigned-url plugins) for async operations at execution time.
|
|
15
|
+
*
|
|
16
|
+
* RAG is a consumer of graphile-search's pgvector adapter — it uses the existing
|
|
17
|
+
* chunk-aware tables but orchestrates the full LLM synthesis pipeline.
|
|
18
|
+
*
|
|
19
|
+
* Resolution order for embedder and chat completer:
|
|
20
|
+
* 1. build.llmEmbedder / build.llmChatCompleter (from LlmModulePlugin)
|
|
21
|
+
* 2. Falls back to error if not configured
|
|
22
|
+
*/
|
|
23
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
24
|
+
exports.createLlmRagPlugin = createLlmRagPlugin;
|
|
25
|
+
const grafast_1 = require("grafast");
|
|
26
|
+
const graphile_utils_1 = require("graphile-utils");
|
|
27
|
+
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
28
|
+
const DEFAULT_CONTEXT_LIMIT = 5;
|
|
29
|
+
const DEFAULT_MAX_TOKENS = 4000;
|
|
30
|
+
const DEFAULT_MIN_SIMILARITY = 0;
|
|
31
|
+
const DEFAULT_SYSTEM_PROMPT = 'You are a helpful assistant. Answer the user\'s question based ONLY on the ' +
|
|
32
|
+
'following context. If the context does not contain enough information to ' +
|
|
33
|
+
'answer, say so. Do not make up information.\n\n' +
|
|
34
|
+
'--- CONTEXT ---\n';
|
|
35
|
+
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
36
|
+
/**
|
|
37
|
+
* Parse @hasChunks smart tag from a codec's extensions into ChunkTableInfo.
|
|
38
|
+
* Mirrors the parsing logic in graphile-search's pgvector adapter.
|
|
39
|
+
*/
|
|
40
|
+
function parseHasChunksTag(raw, codec) {
|
|
41
|
+
let parsed;
|
|
42
|
+
if (typeof raw === 'string') {
|
|
43
|
+
try {
|
|
44
|
+
parsed = JSON.parse(raw);
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
else if (typeof raw === 'object' && raw !== null) {
|
|
51
|
+
parsed = raw;
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
if (!parsed.chunksTable)
|
|
57
|
+
return null;
|
|
58
|
+
const chunksSchema = parsed.chunksSchema
|
|
59
|
+
|| codec?.extensions?.pg?.schemaName
|
|
60
|
+
|| null;
|
|
61
|
+
return {
|
|
62
|
+
parentCodecName: codec.name || 'unknown',
|
|
63
|
+
chunksSchema,
|
|
64
|
+
chunksTableName: parsed.chunksTable,
|
|
65
|
+
parentFkField: parsed.parentFk || 'parent_id',
|
|
66
|
+
parentPkField: parsed.parentPk || 'id',
|
|
67
|
+
embeddingField: parsed.embeddingField || 'embedding',
|
|
68
|
+
contentField: parsed.contentField || 'content',
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Discover all chunk-aware tables from the pgRegistry.
|
|
73
|
+
*/
|
|
74
|
+
function discoverChunkTables(build) {
|
|
75
|
+
const chunkTables = [];
|
|
76
|
+
const pgRegistry = build.pgRegistry;
|
|
77
|
+
if (!pgRegistry)
|
|
78
|
+
return chunkTables;
|
|
79
|
+
// Scan all codecs for @hasChunks smart tag
|
|
80
|
+
for (const source of Object.values(pgRegistry.pgResources || {})) {
|
|
81
|
+
const codec = source?.codec;
|
|
82
|
+
if (!codec?.attributes)
|
|
83
|
+
continue;
|
|
84
|
+
const tags = codec.extensions?.tags;
|
|
85
|
+
if (!tags?.hasChunks)
|
|
86
|
+
continue;
|
|
87
|
+
const info = parseHasChunksTag(tags.hasChunks, codec);
|
|
88
|
+
if (info) {
|
|
89
|
+
chunkTables.push(info);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return chunkTables;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Build a SQL query string to search a chunks table for similar embeddings.
|
|
96
|
+
*/
|
|
97
|
+
function buildChunkSearchSql(table, vectorString, limit, maxDistance) {
|
|
98
|
+
const schema = table.chunksSchema;
|
|
99
|
+
const qualifiedTable = schema
|
|
100
|
+
? `"${schema}"."${table.chunksTableName}"`
|
|
101
|
+
: `"${table.chunksTableName}"`;
|
|
102
|
+
const embeddingCol = `"${table.embeddingField}"`;
|
|
103
|
+
const contentCol = `"${table.contentField}"`;
|
|
104
|
+
const parentFkCol = `"${table.parentFkField}"`;
|
|
105
|
+
let text = `
|
|
106
|
+
SELECT
|
|
107
|
+
${contentCol} AS content,
|
|
108
|
+
${parentFkCol}::text AS parent_id,
|
|
109
|
+
(${embeddingCol} <=> $1::vector) AS distance
|
|
110
|
+
FROM ${qualifiedTable}
|
|
111
|
+
`;
|
|
112
|
+
const values = [vectorString];
|
|
113
|
+
if (maxDistance !== null) {
|
|
114
|
+
text += ` WHERE (${embeddingCol} <=> $1::vector) <= $2`;
|
|
115
|
+
values.push(maxDistance);
|
|
116
|
+
}
|
|
117
|
+
text += ` ORDER BY ${embeddingCol} <=> $1::vector LIMIT $${values.length + 1}`;
|
|
118
|
+
values.push(limit);
|
|
119
|
+
return { text, values };
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Assemble retrieved chunks into a context string for the LLM prompt.
|
|
123
|
+
*/
|
|
124
|
+
function assembleContext(chunks) {
|
|
125
|
+
return chunks
|
|
126
|
+
.map((chunk, i) => `[Source ${i + 1}] (similarity: ${(1 - chunk.distance).toFixed(3)})\n${chunk.content}`)
|
|
127
|
+
.join('\n\n---\n\n');
|
|
128
|
+
}
|
|
129
|
+
// ─── Plugin Factory ─────────────────────────────────────────────────────────
|
|
130
|
+
/**
|
|
131
|
+
* Creates the LlmRagPlugin.
|
|
132
|
+
*
|
|
133
|
+
* @param ragDefaults - Default configuration for RAG queries
|
|
134
|
+
*/
|
|
135
|
+
function createLlmRagPlugin(ragDefaults = {}) {
|
|
136
|
+
// Chunk tables discovered during schema build, used by the plan at execution time
|
|
137
|
+
let chunkTables = [];
|
|
138
|
+
let embedder = null;
|
|
139
|
+
let chatCompleter = null;
|
|
140
|
+
const schemaExtension = (0, graphile_utils_1.extendSchema)((build) => {
|
|
141
|
+
// Discover chunk-aware tables from pgRegistry
|
|
142
|
+
chunkTables = discoverChunkTables(build);
|
|
143
|
+
embedder = build.llmEmbedder || null;
|
|
144
|
+
chatCompleter = build.llmChatCompleter || null;
|
|
145
|
+
if (chunkTables.length > 0) {
|
|
146
|
+
console.log(`[graphile-llm] RAG plugin discovered ${chunkTables.length} chunk-aware table(s): ` +
|
|
147
|
+
chunkTables.map((t) => t.parentCodecName).join(', '));
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
console.log('[graphile-llm] RAG plugin found no @hasChunks tables. ' +
|
|
151
|
+
'ragQuery will still work if chunks tables are queried directly.');
|
|
152
|
+
}
|
|
153
|
+
return {
|
|
154
|
+
typeDefs: (0, graphile_utils_1.gql) `
|
|
155
|
+
"""A source chunk retrieved during RAG context assembly."""
|
|
156
|
+
type RagSource {
|
|
157
|
+
"""The text content of the retrieved chunk."""
|
|
158
|
+
content: String!
|
|
159
|
+
"""Cosine similarity score (0..1, higher = more similar)."""
|
|
160
|
+
similarity: Float!
|
|
161
|
+
"""The parent table this chunk belongs to."""
|
|
162
|
+
tableName: String
|
|
163
|
+
"""The parent row ID this chunk belongs to."""
|
|
164
|
+
parentId: String
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
"""Response from a RAG (Retrieval-Augmented Generation) query."""
|
|
168
|
+
type RagResponse {
|
|
169
|
+
"""The LLM-generated answer based on retrieved context."""
|
|
170
|
+
answer: String!
|
|
171
|
+
"""The source chunks used as context for the answer."""
|
|
172
|
+
sources: [RagSource!]!
|
|
173
|
+
"""Approximate token count for the request (logging only, not metered)."""
|
|
174
|
+
tokensUsed: Int
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
"""Response from an embedText query."""
|
|
178
|
+
type EmbedTextResponse {
|
|
179
|
+
"""The resulting vector embedding."""
|
|
180
|
+
vector: [Float!]!
|
|
181
|
+
"""Number of dimensions in the vector."""
|
|
182
|
+
dimensions: Int!
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
extend type Query {
|
|
186
|
+
"""
|
|
187
|
+
RAG (Retrieval-Augmented Generation) query.
|
|
188
|
+
Embeds the prompt, searches chunk-aware tables for similar content,
|
|
189
|
+
assembles context, and calls the chat LLM to generate an answer.
|
|
190
|
+
Requires both an embedding provider and a chat provider to be configured.
|
|
191
|
+
"""
|
|
192
|
+
ragQuery(
|
|
193
|
+
"""The natural language question or prompt."""
|
|
194
|
+
prompt: String!
|
|
195
|
+
"""Maximum number of context chunks to include (default: 5)."""
|
|
196
|
+
contextLimit: Int
|
|
197
|
+
"""Minimum similarity threshold (0..1). Chunks below this are excluded."""
|
|
198
|
+
minSimilarity: Float
|
|
199
|
+
"""Custom system prompt. Overrides the default RAG system prompt."""
|
|
200
|
+
systemPrompt: String
|
|
201
|
+
): RagResponse
|
|
202
|
+
|
|
203
|
+
"""
|
|
204
|
+
Convert text to a vector embedding using the configured embedding provider.
|
|
205
|
+
Useful for client-side vector operations when you need the raw vector.
|
|
206
|
+
"""
|
|
207
|
+
embedText(
|
|
208
|
+
"""The text to embed."""
|
|
209
|
+
text: String!
|
|
210
|
+
): EmbedTextResponse
|
|
211
|
+
}
|
|
212
|
+
`,
|
|
213
|
+
plans: {
|
|
214
|
+
Query: {
|
|
215
|
+
ragQuery(_$root, fieldArgs) {
|
|
216
|
+
const $prompt = fieldArgs.getRaw('prompt');
|
|
217
|
+
const $contextLimit = fieldArgs.getRaw('contextLimit');
|
|
218
|
+
const $minSimilarity = fieldArgs.getRaw('minSimilarity');
|
|
219
|
+
const $systemPrompt = fieldArgs.getRaw('systemPrompt');
|
|
220
|
+
const $withPgClient = (0, grafast_1.context)().get('withPgClient');
|
|
221
|
+
const $pgSettings = (0, grafast_1.context)().get('pgSettings');
|
|
222
|
+
const $combined = (0, grafast_1.object)({
|
|
223
|
+
prompt: $prompt,
|
|
224
|
+
contextLimit: $contextLimit,
|
|
225
|
+
minSimilarity: $minSimilarity,
|
|
226
|
+
systemPrompt: $systemPrompt,
|
|
227
|
+
withPgClient: $withPgClient,
|
|
228
|
+
pgSettings: $pgSettings,
|
|
229
|
+
});
|
|
230
|
+
return (0, grafast_1.lambda)($combined, async (input) => {
|
|
231
|
+
const { prompt, contextLimit: queryContextLimit, minSimilarity: queryMinSimilarity, systemPrompt: querySystemPrompt, withPgClient, pgSettings, } = input;
|
|
232
|
+
if (!prompt || typeof prompt !== 'string') {
|
|
233
|
+
throw new Error('RAG_INVALID_PROMPT: prompt is required');
|
|
234
|
+
}
|
|
235
|
+
if (!embedder) {
|
|
236
|
+
throw new Error('RAG_EMBEDDER_NOT_CONFIGURED: An embedding provider must be configured ' +
|
|
237
|
+
'to use ragQuery. Set defaultEmbedder in GraphileLlmPreset options.');
|
|
238
|
+
}
|
|
239
|
+
if (!chatCompleter) {
|
|
240
|
+
throw new Error('RAG_CHAT_NOT_CONFIGURED: A chat completion provider must be configured ' +
|
|
241
|
+
'to use ragQuery. Set defaultChatCompleter in GraphileLlmPreset options.');
|
|
242
|
+
}
|
|
243
|
+
// Resolve parameters with defaults
|
|
244
|
+
const limit = queryContextLimit ?? ragDefaults.contextLimit ?? DEFAULT_CONTEXT_LIMIT;
|
|
245
|
+
const minSim = queryMinSimilarity ?? ragDefaults.minSimilarity ?? DEFAULT_MIN_SIMILARITY;
|
|
246
|
+
const maxDistance = minSim > 0 ? (1 - minSim) : null;
|
|
247
|
+
const systemPromptTemplate = querySystemPrompt ?? ragDefaults.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
|
|
248
|
+
// Step 1: Embed the prompt
|
|
249
|
+
const startEmbed = Date.now();
|
|
250
|
+
const vector = await embedder(prompt);
|
|
251
|
+
const embedLatency = Date.now() - startEmbed;
|
|
252
|
+
const vectorString = `[${vector.join(',')}]`;
|
|
253
|
+
console.log(`[graphile-llm] RAG embed: dims=${vector.length}, latency=${embedLatency}ms`);
|
|
254
|
+
// Step 2: Search chunks tables for similar content
|
|
255
|
+
const allChunks = [];
|
|
256
|
+
if (chunkTables.length > 0) {
|
|
257
|
+
await withPgClient(pgSettings, async (pgClient) => {
|
|
258
|
+
for (const table of chunkTables) {
|
|
259
|
+
const query = buildChunkSearchSql(table, vectorString, limit, maxDistance);
|
|
260
|
+
const result = await pgClient.query(query);
|
|
261
|
+
for (const row of result.rows) {
|
|
262
|
+
allChunks.push({
|
|
263
|
+
content: row.content,
|
|
264
|
+
parent_id: row.parent_id,
|
|
265
|
+
distance: parseFloat(row.distance),
|
|
266
|
+
table_name: table.parentCodecName,
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
// Sort by distance (ascending) and take top N
|
|
273
|
+
allChunks.sort((a, b) => a.distance - b.distance);
|
|
274
|
+
const topChunks = allChunks.slice(0, limit);
|
|
275
|
+
if (topChunks.length === 0) {
|
|
276
|
+
return {
|
|
277
|
+
answer: 'No relevant context found for your query. ' +
|
|
278
|
+
'Try broadening your search or lowering the minimum similarity threshold.',
|
|
279
|
+
sources: [],
|
|
280
|
+
tokensUsed: null,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
// Step 3: Assemble context
|
|
284
|
+
const contextText = assembleContext(topChunks);
|
|
285
|
+
// Step 4: Call chat completion
|
|
286
|
+
const startChat = Date.now();
|
|
287
|
+
const answer = await chatCompleter([
|
|
288
|
+
{ role: 'system', content: systemPromptTemplate + contextText },
|
|
289
|
+
{ role: 'user', content: prompt },
|
|
290
|
+
], {
|
|
291
|
+
maxTokens: ragDefaults.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
292
|
+
});
|
|
293
|
+
const chatLatency = Date.now() - startChat;
|
|
294
|
+
console.log(`[graphile-llm] RAG chat: sources=${topChunks.length}, latency=${chatLatency}ms`);
|
|
295
|
+
// Step 5: Return response
|
|
296
|
+
return {
|
|
297
|
+
answer,
|
|
298
|
+
sources: topChunks.map((chunk) => ({
|
|
299
|
+
content: chunk.content,
|
|
300
|
+
similarity: 1 - chunk.distance,
|
|
301
|
+
tableName: chunk.table_name,
|
|
302
|
+
parentId: chunk.parent_id,
|
|
303
|
+
})),
|
|
304
|
+
tokensUsed: null, // Deferred to metering system
|
|
305
|
+
};
|
|
306
|
+
});
|
|
307
|
+
},
|
|
308
|
+
embedText(_$root, fieldArgs) {
|
|
309
|
+
const $text = fieldArgs.getRaw('text');
|
|
310
|
+
return (0, grafast_1.lambda)($text, async (text) => {
|
|
311
|
+
if (!text || typeof text !== 'string') {
|
|
312
|
+
throw new Error('EMBED_INVALID_TEXT: text is required');
|
|
313
|
+
}
|
|
314
|
+
if (!embedder) {
|
|
315
|
+
throw new Error('EMBED_NOT_CONFIGURED: An embedding provider must be configured ' +
|
|
316
|
+
'to use embedText. Set defaultEmbedder in GraphileLlmPreset options.');
|
|
317
|
+
}
|
|
318
|
+
const startTime = Date.now();
|
|
319
|
+
const vector = await embedder(text);
|
|
320
|
+
const latencyMs = Date.now() - startTime;
|
|
321
|
+
console.log(`[graphile-llm] embedText: dims=${vector.length}, latency=${latencyMs}ms`);
|
|
322
|
+
return {
|
|
323
|
+
vector,
|
|
324
|
+
dimensions: vector.length,
|
|
325
|
+
};
|
|
326
|
+
});
|
|
327
|
+
},
|
|
328
|
+
},
|
|
329
|
+
},
|
|
330
|
+
};
|
|
331
|
+
});
|
|
332
|
+
return {
|
|
333
|
+
...schemaExtension,
|
|
334
|
+
name: 'LlmRagPlugin',
|
|
335
|
+
version: '0.1.0',
|
|
336
|
+
description: 'RAG (Retrieval-Augmented Generation) query support — ' +
|
|
337
|
+
'detects @hasChunks tables and adds ragQuery/embedText fields',
|
|
338
|
+
after: [
|
|
339
|
+
'LlmModulePlugin',
|
|
340
|
+
'UnifiedSearchPlugin',
|
|
341
|
+
'VectorCodecPlugin',
|
|
342
|
+
],
|
|
343
|
+
};
|
|
344
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LlmTextMutationPlugin
|
|
3
|
+
*
|
|
4
|
+
* Adds `{columnName}Text: String` companion fields on create/update mutation
|
|
5
|
+
* inputs for every vector column. When the client provides a text string in
|
|
6
|
+
* the companion field, the plugin embeds it server-side and injects the
|
|
7
|
+
* resulting vector into the actual column.
|
|
8
|
+
*
|
|
9
|
+
* Example:
|
|
10
|
+
* mutation { createArticle(input: { embeddingText: "Machine learning concepts" }) }
|
|
11
|
+
*
|
|
12
|
+
* This is the mutation counterpart to LlmTextSearchPlugin (which handles
|
|
13
|
+
* filter/query-side text-to-vector). Together they let clients work entirely
|
|
14
|
+
* with text/prompts instead of raw float vectors.
|
|
15
|
+
*
|
|
16
|
+
* Runtime embedding uses the v4-style resolver wrapping approach (same as
|
|
17
|
+
* graphile-upload-plugin and graphile-bucket-provisioner-plugin). grafserv v5
|
|
18
|
+
* supports this through its backwards-compatibility layer.
|
|
19
|
+
*
|
|
20
|
+
* The companion fields are only added when the LLM plugin is loaded.
|
|
21
|
+
* If no embedder is configured, the fields are still registered for schema
|
|
22
|
+
* stability but return a clear error at execution time.
|
|
23
|
+
*/
|
|
24
|
+
import 'graphile-build';
|
|
25
|
+
import 'graphile-build-pg';
|
|
26
|
+
import type { GraphileConfig } from 'graphile-config';
|
|
27
|
+
declare global {
|
|
28
|
+
namespace GraphileConfig {
|
|
29
|
+
interface Plugins {
|
|
30
|
+
LlmTextMutationPlugin: true;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Creates the LlmTextMutationPlugin.
|
|
36
|
+
*
|
|
37
|
+
* Hooks into GraphQLInputObjectType_fields for create/update input types
|
|
38
|
+
* and adds `{columnName}Text: String` for each vector column.
|
|
39
|
+
*
|
|
40
|
+
* Also wraps mutation resolvers via GraphQLObjectType_fields_field to
|
|
41
|
+
* intercept `*Text` companion field values, embed them, and inject the
|
|
42
|
+
* resulting vectors before the mutation executes.
|
|
43
|
+
*/
|
|
44
|
+
export declare function createLlmTextMutationPlugin(): GraphileConfig.Plugin;
|