npm - graphile-llm - Versions diffs - 0.7.3 → 0.9.0 - Mend

graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/__tests__/graphile-llm.test.js +87 -71
package/chat.d.ts +5 -5
package/chat.js +45 -43
package/config-cache.d.ts +77 -0
package/config-cache.js +148 -0
package/embedder.d.ts +5 -5
package/embedder.js +11 -17
package/env.d.ts +31 -0
package/env.js +52 -0
package/esm/__tests__/graphile-llm.test.js +87 -71
package/esm/chat.d.ts +5 -5
package/esm/chat.js +45 -40
package/esm/config-cache.d.ts +77 -0
package/esm/config-cache.js +143 -0
package/esm/embedder.d.ts +5 -5
package/esm/embedder.js +11 -17
package/esm/env.d.ts +31 -0
package/esm/env.js +49 -0
package/esm/index.d.ts +14 -5
package/esm/index.js +11 -5
package/esm/metering.d.ts +114 -0
package/esm/metering.js +352 -0
package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
package/esm/plugins/agent-discovery-plugin.js +65 -0
package/esm/plugins/llm-module-plugin.d.ts +11 -2
package/esm/plugins/llm-module-plugin.js +15 -7
package/esm/plugins/metering-plugin.d.ts +42 -0
package/esm/plugins/metering-plugin.js +175 -0
package/esm/plugins/rag-plugin.js +20 -20
package/esm/plugins/text-mutation-plugin.d.ts +4 -0
package/esm/plugins/text-mutation-plugin.js +23 -13
package/esm/plugins/text-search-plugin.d.ts +4 -0
package/esm/plugins/text-search-plugin.js +23 -11
package/esm/preset.d.ts +21 -1
package/esm/preset.js +33 -6
package/esm/types.d.ts +86 -10
package/index.d.ts +14 -5
package/index.js +25 -8
package/metering.d.ts +114 -0
package/metering.js +359 -0
package/package.json +15 -15
package/plugins/agent-discovery-plugin.d.ts +29 -0
package/plugins/agent-discovery-plugin.js +69 -0
package/plugins/llm-module-plugin.d.ts +11 -2
package/plugins/llm-module-plugin.js +15 -7
package/plugins/metering-plugin.d.ts +42 -0
package/plugins/metering-plugin.js +178 -0
package/plugins/rag-plugin.js +20 -20
package/plugins/text-mutation-plugin.d.ts +4 -0
package/plugins/text-mutation-plugin.js +23 -13
package/plugins/text-search-plugin.d.ts +4 -0
package/plugins/text-search-plugin.js +23 -11
package/preset.d.ts +21 -1
package/preset.js +33 -6
package/types.d.ts +86 -10

package/esm/config-cache.js ADDED Viewed

@@ -0,0 +1,143 @@
+/**
+ * config-cache — Per-database LLM billing configuration cache
+ *
+ * Caches resolved billing function names per database_id.
+ * Uses an LRU cache with TTL so config changes propagate within a bounded window
+ * without requiring a server restart.
+ *
+ * Resolution flow:
+ *   Billing config from `metaschema_modules_public.billing_module`
+ *   (schema name + function names for record_usage, check_billing_quota)
+ *
+ * All queries run through the Graphile `withPgClient` callback, which gives us
+ * a client connected to the tenant database with proper role settings.
+ *
+ * The LLM module config (provider, model, etc.) is already resolved by the
+ * LlmModulePlugin at schema-build time. This cache handles the runtime-only
+ * billing piece.
+ */
+import { ModuleConfigCache } from 'graphile-cache';
+// ─── SQL Queries ────────────────────────────────────────────────────────────
+/**
+ * Check if the billing_module table exists before querying it.
+ * This prevents hard errors on databases that don't have the billing
+ * module provisioned (the metaschema_modules_public schema or the
+ * billing_module table might not exist at all).
+ */
+const BILLING_MODULE_SQL = `
+  SELECT
+    s.schema_name AS public_schema,
+    ps.schema_name AS private_schema,
+    bm.record_usage_function
+  FROM metaschema_modules_public.billing_module bm
+  JOIN metaschema_public.schema s ON bm.schema_id = s.id
+  JOIN metaschema_public.schema ps ON bm.private_schema_id = ps.id
+  WHERE bm.database_id = $1
+  LIMIT 1
+`;
+/**
+ * Resolve the inference log module's schema and table name.
+ */
+const INFERENCE_LOG_MODULE_SQL = `
+  SELECT
+    s.schema_name AS schema,
+    ilm.inference_log_table_name AS table_name
+  FROM metaschema_modules_public.inference_log_module ilm
+  JOIN metaschema_public.schema s ON ilm.schema_id = s.id
+  WHERE ilm.database_id = $1
+  LIMIT 1
+`;
+// ─── Cache ──────────────────────────────────────────────────────────────────
+const billingCache = new ModuleConfigCache({
+    name: 'billing-config',
+    ttlMs: 5 * 60 * 1000, // 5 minutes
+    max: 50
+});
+// ─── Resolution Functions ───────────────────────────────────────────────────
+/**
+ * SQL to check if a schema exists. Used as a guard before querying
+ * metaschema tables that may not be provisioned.
+ */
+const SCHEMA_EXISTS_SQL = `
+  SELECT 1 FROM information_schema.schemata WHERE schema_name = $1 LIMIT 1
+`;
+async function resolveInferenceLogConfig(pgClient, databaseId) {
+    try {
+        const schemaCheck = await pgClient.query(SCHEMA_EXISTS_SQL, ['metaschema_modules_public']);
+        if (schemaCheck.rows.length === 0)
+            return null;
+        const result = await pgClient.query(INFERENCE_LOG_MODULE_SQL, [databaseId]);
+        const row = result.rows[0];
+        if (!row?.schema || !row?.table_name)
+            return null;
+        return {
+            schema: row.schema,
+            tableName: row.table_name
+        };
+    }
+    catch {
+        return null;
+    }
+}
+async function resolveBillingConfig(pgClient, databaseId) {
+    try {
+        // Guard: check if the metaschema_modules_public schema exists.
+        // If the database doesn't have the billing module provisioned,
+        // this schema (or the billing_module table) won't exist.
+        const schemaCheck = await pgClient.query(SCHEMA_EXISTS_SQL, ['metaschema_modules_public']);
+        if (schemaCheck.rows.length === 0)
+            return null;
+        const result = await pgClient.query(BILLING_MODULE_SQL, [databaseId]);
+        const row = result.rows[0];
+        if (!row?.record_usage_function)
+            return null;
+        return {
+            publicSchema: row.public_schema,
+            privateSchema: row.private_schema,
+            recordUsageFunction: row.record_usage_function,
+            // The check_billing_quota function name follows the inflection pattern
+            checkBillingQuotaFunction: 'check_billing_quota'
+        };
+    }
+    catch {
+        // Schema/table doesn't exist or query failed — billing not available
+        return null;
+    }
+}
+// ─── Public API ─────────────────────────────────────────────────────────────
+/**
+ * Resolve billing config for a database.
+ * Results are cached per database_id with a 5-minute TTL.
+ *
+ * @param pgClient - A client connected to the tenant database (from withPgClient)
+ * @param databaseId - The database UUID
+ */
+export async function getLlmBillingConfig(pgClient, databaseId) {
+    const cached = billingCache.get(databaseId);
+    if (cached)
+        return cached;
+    const [billing, inferenceLog] = await Promise.all([
+        resolveBillingConfig(pgClient, databaseId),
+        resolveInferenceLogConfig(pgClient, databaseId)
+    ]);
+    const entry = { billing, inferenceLog };
+    billingCache.set(databaseId, entry);
+    return entry;
+}
+/**
+ * Invalidate the cached config for a specific database (or all).
+ */
+export function invalidateLlmBillingConfig(databaseId) {
+    if (databaseId) {
+        billingCache.delete(databaseId);
+    }
+    else {
+        billingCache.clear();
+    }
+}
+/**
+ * Get cache stats for diagnostics.
+ */
+export function getLlmBillingCacheStats() {
+    return { size: billingCache.size, max: 50 };
+}

package/esm/embedder.d.ts CHANGED Viewed

@@ -24,12 +24,12 @@ export declare function buildEmbedder(config: EmbedderConfig): EmbedderFunction
  */
 export declare function buildEmbedderFromModule(data: LlmModuleData): EmbedderFunction | null;
 /**
- * Resolve an embedder from environment variables via getEnvOptions().
+ * Resolve an embedder from environment variables.
  * This is a fallback for development when no llm_module or defaultEmbedder is configured.
  *
- * Environment variables (parsed by @constructive-io/graphql-env):
- *   EMBEDDER_PROVIDER - Provider name ('ollama')
- *   EMBEDDER_MODEL    - Model identifier
- *   EMBEDDER_BASE_URL - Provider base URL
+ * Environment variables (with defaults from env.ts):
+ *   EMBEDDER_PROVIDER  - Provider name (default: 'ollama')
+ *   EMBEDDER_MODEL     - Model identifier (default: 'nomic-embed-text')
+ *   EMBEDDER_BASE_URL  - Provider base URL (default: 'http://localhost:11434')
  */
 export declare function buildEmbedderFromEnv(): EmbedderFunction | null;

package/esm/embedder.js CHANGED Viewed

@@ -10,10 +10,12 @@
  *   3. Environment variables (EMBEDDER_PROVIDER, EMBEDDER_MODEL, EMBEDDER_BASE_URL)
  */
 import OllamaClient from '@agentic-kit/ollama';
-import { getEnvOptions } from '@constructive-io/graphql-env';
+import { getLlmEnvOptions } from './env';
 // ─── Built-in Providers ─────────────────────────────────────────────────────
 /**
  * Create an Ollama-based embedder function.
+ *
+ * Uses the /api/embed endpoint which returns prompt_eval_count (real token count).
  */
 function createOllamaEmbedder(baseUrl = 'http://localhost:11434', model = 'nomic-embed-text') {
     const client = new OllamaClient(baseUrl);
@@ -47,27 +49,19 @@ export function buildEmbedderFromModule(data) {
     return buildEmbedder({
         provider: data.embedding_provider,
         model: data.embedding_model,
-        baseUrl: data.embedding_base_url,
-        apiKey: data.api_key_ref,
+        baseUrl: data.embedding_base_url
     });
 }
 /**
- * Resolve an embedder from environment variables via getEnvOptions().
+ * Resolve an embedder from environment variables.
  * This is a fallback for development when no llm_module or defaultEmbedder is configured.
  *
- * Environment variables (parsed by @constructive-io/graphql-env):
- *   EMBEDDER_PROVIDER - Provider name ('ollama')
- *   EMBEDDER_MODEL    - Model identifier
- *   EMBEDDER_BASE_URL - Provider base URL
+ * Environment variables (with defaults from env.ts):
+ *   EMBEDDER_PROVIDER  - Provider name (default: 'ollama')
+ *   EMBEDDER_MODEL     - Model identifier (default: 'nomic-embed-text')
+ *   EMBEDDER_BASE_URL  - Provider base URL (default: 'http://localhost:11434')
  */
 export function buildEmbedderFromEnv() {
-    const { llm } = getEnvOptions();
-    const provider = llm?.embedder?.provider;
-    if (!provider)
-        return null;
-    return buildEmbedder({
-        provider,
-        model: llm?.embedder?.model,
-        baseUrl: llm?.embedder?.baseUrl,
-    });
+    const { embedding } = getLlmEnvOptions();
+    return buildEmbedder(embedding);
 }

package/esm/env.d.ts ADDED Viewed

@@ -0,0 +1,31 @@
+/**
+ * LLM Environment Configuration
+ *
+ * Single source of truth for all LLM-related environment variables and defaults.
+ * Every other module in graphile-llm imports from here — no direct process.env
+ * reads or scattered null coalescing elsewhere.
+ *
+ * Environment variables:
+ *   EMBEDDER_PROVIDER  - Embedding provider name ('ollama')
+ *   EMBEDDER_MODEL     - Embedding model (default: 'nomic-embed-text')
+ *   EMBEDDER_BASE_URL  - Embedding provider URL (default: 'http://localhost:11434')
+ *   CHAT_PROVIDER      - Chat provider name ('ollama')
+ *   CHAT_MODEL         - Chat model (default: 'llama3')
+ *   CHAT_BASE_URL      - Chat provider URL (default: 'http://localhost:11434')
+ */
+export interface LlmProviderConfig {
+    provider: string;
+    model: string;
+    baseUrl: string;
+}
+export interface LlmEnvOptions {
+    embedding: LlmProviderConfig;
+    chat: LlmProviderConfig;
+}
+/**
+ * Resolve LLM configuration from environment variables with sensible defaults.
+ *
+ * Call this once and pass the result around — never read process.env directly
+ * in plugin code.
+ */
+export declare function getLlmEnvOptions(): LlmEnvOptions;

package/esm/env.js ADDED Viewed

@@ -0,0 +1,49 @@
+/**
+ * LLM Environment Configuration
+ *
+ * Single source of truth for all LLM-related environment variables and defaults.
+ * Every other module in graphile-llm imports from here — no direct process.env
+ * reads or scattered null coalescing elsewhere.
+ *
+ * Environment variables:
+ *   EMBEDDER_PROVIDER  - Embedding provider name ('ollama')
+ *   EMBEDDER_MODEL     - Embedding model (default: 'nomic-embed-text')
+ *   EMBEDDER_BASE_URL  - Embedding provider URL (default: 'http://localhost:11434')
+ *   CHAT_PROVIDER      - Chat provider name ('ollama')
+ *   CHAT_MODEL         - Chat model (default: 'llama3')
+ *   CHAT_BASE_URL      - Chat provider URL (default: 'http://localhost:11434')
+ */
+// ─── Defaults ───────────────────────────────────────────────────────────────
+const LLM_DEFAULTS = {
+    embedding: {
+        provider: 'ollama',
+        model: 'nomic-embed-text',
+        baseUrl: 'http://localhost:11434'
+    },
+    chat: {
+        provider: 'ollama',
+        model: 'llama3',
+        baseUrl: 'http://localhost:11434'
+    }
+};
+// ─── Resolution ─────────────────────────────────────────────────────────────
+/**
+ * Resolve LLM configuration from environment variables with sensible defaults.
+ *
+ * Call this once and pass the result around — never read process.env directly
+ * in plugin code.
+ */
+export function getLlmEnvOptions() {
+    return {
+        embedding: {
+            provider: process.env.EMBEDDER_PROVIDER ?? LLM_DEFAULTS.embedding.provider,
+            model: process.env.EMBEDDER_MODEL ?? LLM_DEFAULTS.embedding.model,
+            baseUrl: process.env.EMBEDDER_BASE_URL ?? LLM_DEFAULTS.embedding.baseUrl
+        },
+        chat: {
+            provider: process.env.CHAT_PROVIDER ?? LLM_DEFAULTS.chat.provider,
+            model: process.env.CHAT_MODEL ?? LLM_DEFAULTS.chat.model,
+            baseUrl: process.env.CHAT_BASE_URL ?? LLM_DEFAULTS.chat.baseUrl
+        }
+    };
+}

package/esm/index.d.ts CHANGED Viewed

@@ -29,11 +29,20 @@
  * };
  * ```
  */
+export type { LlmEnvOptions, LlmProviderConfig } from './env';
+export { getLlmEnvOptions } from './env';
 export { GraphileLlmPreset } from './preset';
 export { createLlmModulePlugin } from './plugins/llm-module-plugin';
-export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
-export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
 export { createLlmRagPlugin } from './plugins/rag-plugin';
-export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
-export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
-export type { EmbedderFunction, EmbedderConfig, ChatFunction, ChatConfig, ChatMessage, ChatOptions, LlmModuleData, GraphileLlmOptions, RagDefaults, ChunkTableInfo, } from './types';
+export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
+export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
+export { createLlmMeteringPlugin } from './plugins/metering-plugin';
+export type { AgentDiscovery, AgentTableInfo } from './plugins/agent-discovery-plugin';
+export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
+export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
+export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
+export type { InferenceLogEntry, MeteringContext, MeteringOptions, MeterResult, WithPgClient } from './metering';
+export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
+export type { BillingConfig, InferenceLogConfig, LlmBillingCacheEntry, PgClient } from './config-cache';
+export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
+export type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, ChatResult, ChunkTableInfo, EmbedderConfig, EmbedderFunction, EmbeddingResult, GraphileLlmOptions, LlmModuleData, LlmUsage, MeteringConfig, RagDefaults } from './types';

package/esm/index.js CHANGED Viewed

@@ -29,14 +29,20 @@
  * };
  * ```
  */
+export { getLlmEnvOptions } from './env';
 // Preset (recommended entry point)
 export { GraphileLlmPreset } from './preset';
-// Individual plugins
+// Individual plugins (pure — no billing dependency)
 export { createLlmModulePlugin } from './plugins/llm-module-plugin';
-export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
-export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
 export { createLlmRagPlugin } from './plugins/rag-plugin';
+export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
+export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
+// Metering plugin (opt-in billing integration)
+export { createLlmMeteringPlugin } from './plugins/metering-plugin';
+export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
 // Embedder utilities
-export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
+export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
 // Chat completion utilities
-export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
+export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
+export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
+export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';

package/esm/metering.d.ts ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * metering — Billing-aware wrappers for embedder and chat functions
+ *
+ * Wraps EmbedderFunction and ChatFunction with:
+ *   1. Pre-check: `check_billing_quota(meter_slug, entity_id, estimated_amount)`
+ *   2. Execute the underlying function
+ *   3. Post-record: `record_usage(meter_slug, entity_id, actual_amount)`
+ *
+ * When the quota check fails, the wrapper returns null (graceful degradation)
+ * instead of throwing, so the search pipeline can fall back to text-only.
+ *
+ * Token counts:
+ *   - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
+ *   - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
+ *
+ * The billing functions live in the tenant database and are called via the
+ * Graphile `withPgClient` callback. Function locations (schema, names) are
+ * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
+ */
+import type { BillingConfig, InferenceLogConfig, PgClient } from './config-cache';
+import type { ChatFunction, ChatMessage, ChatOptions, EmbedderFunction } from './types';
+/**
+ * Callback matching Graphile's withPgClient signature.
+ * Acquires a pg client, calls the callback, then releases the client.
+ */
+export type WithPgClient = (pgSettings: Record<string, string>, callback: (pgClient: PgClient) => Promise<void>) => Promise<void>;
+export interface MeteringContext {
+    /** Callback to acquire a tenant database client */
+    withPgClient: WithPgClient;
+    /** pgSettings from the GraphQL context (for role/claims) */
+    pgSettings: Record<string, string>;
+    /** Billing function references from the billing_module */
+    billing: BillingConfig;
+    /** Entity ID to meter against (from JWT claims) */
+    entityId: string;
+    /** Per-request correlation ID (from request.id pgSetting) */
+    requestId: string | null;
+    /** Database UUID from JWT claims */
+    databaseId: string;
+    /** Actor (user) ID from JWT claims */
+    actorId: string | null;
+    /** Inference log table config (null if inference_log_module not provisioned) */
+    inferenceLog: InferenceLogConfig | null;
+}
+export interface MeteringOptions {
+    /** Meter slug for embedding operations (default: model name from build config) */
+    embeddingMeterSlug?: string;
+    /** Meter slug for chat completion operations (default: model name from build config) */
+    chatMeterSlug?: string;
+    /** Whether to skip metering entirely (e.g. for local dev). Default: false */
+    skipMetering?: boolean;
+    /** Embedding model name (for inference log) */
+    embeddingModel?: string;
+    /** Chat model name (for inference log) */
+    chatModel?: string;
+    /** Provider name (for inference log) */
+    provider?: string;
+}
+export interface MeterResult<T> {
+    /** The result from the underlying function, or null if quota exceeded */
+    result: T | null;
+    /** Whether the call was metered */
+    metered: boolean;
+    /** Whether the call was skipped due to quota limits */
+    quotaExceeded: boolean;
+    /** Latency of the underlying function call in ms */
+    latencyMs: number;
+}
+export interface InferenceLogEntry {
+    databaseId: string;
+    entityId: string;
+    actorId: string | null;
+    model: string;
+    provider: string | null;
+    service: 'llm' | 'embedding' | 'tts' | 'stt' | 'ocr' | 'image_gen' | 'search' | 'compute';
+    operation: string;
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+    cacheReadTokens: number | null;
+    cacheWriteTokens: number | null;
+    latencyMs: number;
+    ragEnabled: boolean;
+    chunksRetrieved: number | null;
+    embeddingModel: string | null;
+    embeddingLatencyMs: number | null;
+    status: 'success' | 'quota_exceeded' | 'provider_error' | 'timeout';
+    errorType: string | null;
+    rawUsage: Record<string, unknown> | null;
+}
+/**
+ * Write a row to the usage_log_inference table.
+ * Gracefully skips if the inference_log_module is not provisioned.
+ *
+ * TODO: Also write to child (generated) database when dual-write is needed.
+ */
+export declare function logInferenceUsage(ctx: MeteringContext, entry: InferenceLogEntry): Promise<void>;
+/**
+ * Wrap an embedder with billing quota check + usage recording.
+ *
+ * The returned MeterResult contains `quotaExceeded: true` when the pre-check
+ * fails, enabling the caller to fall back to text-only search.
+ */
+export declare function meteredEmbed(embedder: EmbedderFunction, text: string, ctx: MeteringContext | null, options?: MeteringOptions): Promise<MeterResult<number[]>>;
+/**
+ * Wrap a chat completion call with billing quota check + usage recording.
+ */
+export declare function meteredChat(chat: ChatFunction, messages: ChatMessage[], ctx: MeteringContext | null, chatOptions?: ChatOptions, meteringOptions?: MeteringOptions): Promise<MeterResult<string>>;
+export declare class QuotaExceededError extends Error {
+    readonly code = "QUOTA_EXCEEDED";
+    readonly meterSlug: string;
+    readonly entityId: string;
+    constructor(meterSlug: string, entityId: string);
+}