npm - graphile-llm - Versions diffs - 0.7.3 → 0.9.0 - Mend

graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/__tests__/graphile-llm.test.js +87 -71
package/chat.d.ts +5 -5
package/chat.js +45 -43
package/config-cache.d.ts +77 -0
package/config-cache.js +148 -0
package/embedder.d.ts +5 -5
package/embedder.js +11 -17
package/env.d.ts +31 -0
package/env.js +52 -0
package/esm/__tests__/graphile-llm.test.js +87 -71
package/esm/chat.d.ts +5 -5
package/esm/chat.js +45 -40
package/esm/config-cache.d.ts +77 -0
package/esm/config-cache.js +143 -0
package/esm/embedder.d.ts +5 -5
package/esm/embedder.js +11 -17
package/esm/env.d.ts +31 -0
package/esm/env.js +49 -0
package/esm/index.d.ts +14 -5
package/esm/index.js +11 -5
package/esm/metering.d.ts +114 -0
package/esm/metering.js +352 -0
package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
package/esm/plugins/agent-discovery-plugin.js +65 -0
package/esm/plugins/llm-module-plugin.d.ts +11 -2
package/esm/plugins/llm-module-plugin.js +15 -7
package/esm/plugins/metering-plugin.d.ts +42 -0
package/esm/plugins/metering-plugin.js +175 -0
package/esm/plugins/rag-plugin.js +20 -20
package/esm/plugins/text-mutation-plugin.d.ts +4 -0
package/esm/plugins/text-mutation-plugin.js +23 -13
package/esm/plugins/text-search-plugin.d.ts +4 -0
package/esm/plugins/text-search-plugin.js +23 -11
package/esm/preset.d.ts +21 -1
package/esm/preset.js +33 -6
package/esm/types.d.ts +86 -10
package/index.d.ts +14 -5
package/index.js +25 -8
package/metering.d.ts +114 -0
package/metering.js +359 -0
package/package.json +15 -15
package/plugins/agent-discovery-plugin.d.ts +29 -0
package/plugins/agent-discovery-plugin.js +69 -0
package/plugins/llm-module-plugin.d.ts +11 -2
package/plugins/llm-module-plugin.js +15 -7
package/plugins/metering-plugin.d.ts +42 -0
package/plugins/metering-plugin.js +178 -0
package/plugins/rag-plugin.js +20 -20
package/plugins/text-mutation-plugin.d.ts +4 -0
package/plugins/text-mutation-plugin.js +23 -13
package/plugins/text-search-plugin.d.ts +4 -0
package/plugins/text-search-plugin.js +23 -11
package/preset.d.ts +21 -1
package/preset.js +33 -6
package/types.d.ts +86 -10

package/esm/types.d.ts CHANGED Viewed

@@ -4,9 +4,18 @@
  * Shared type definitions for the LLM plugin.
  */
 /**
- * A function that converts text into a vector embedding.
+ * Result from an embedding call, including real token usage from the provider.
  */
-export type EmbedderFunction = (text: string) => Promise<number[]>;
+export interface EmbeddingResult {
+    /** The vector embedding */
+    embedding: number[];
+    /** Number of prompt tokens consumed (from provider; 0 if unavailable) */
+    promptTokens: number;
+}
+/**
+ * A function that converts text into a vector embedding with token usage.
+ */
+export type EmbedderFunction = (text: string) => Promise<EmbeddingResult>;
 /**
  * Configuration for an embedding provider.
  */
@@ -17,8 +26,24 @@ export interface EmbedderConfig {
     model?: string;
     /** Base URL for the provider (e.g. 'http://localhost:11434' for Ollama) */
     baseUrl?: string;
-    /** API key for providers that require authentication (e.g. OpenAI) */
-    apiKey?: string;
+}
+/**
+ * Token usage metadata returned by LLM providers.
+ * Maps to the billing schema's inference_log columns.
+ */
+export interface LlmUsage {
+    /** Prompt / input tokens consumed */
+    input: number;
+    /** Completion / output tokens generated (includes reasoning for providers that count it) */
+    output: number;
+    /** Reasoning tokens (subset of output — not additive) */
+    reasoning: number;
+    /** Tokens served from prompt cache (zero cost) */
+    cacheRead: number;
+    /** Tokens written to prompt cache */
+    cacheWrite: number;
+    /** input + output + cacheRead + cacheWrite */
+    totalTokens: number;
 }
 /**
  * A single message in a chat conversation.
@@ -37,9 +62,17 @@ export interface ChatOptions {
     temperature?: number;
 }
 /**
- * A function that sends messages to a chat completion provider and returns the response.
+ * Result from a chat completion call, including real token usage.
  */
-export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<string>;
+export interface ChatResult {
+    content: string;
+    usage: LlmUsage;
+}
+/**
+ * A function that sends messages to a chat completion provider
+ * and returns the response with token usage metadata.
+ */
+export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<ChatResult>;
 /**
  * Configuration for a chat completion provider.
  */
@@ -50,8 +83,6 @@ export interface ChatConfig {
     model?: string;
     /** Base URL for the provider */
     baseUrl?: string;
-    /** API key for providers that require authentication */
-    apiKey?: string;
 }
 /**
  * The shape of the `llm_module` data stored in `services_public.api_modules`.
@@ -74,8 +105,6 @@ export interface LlmModuleData {
     chat_model?: string;
     /** Base URL for the chat provider */
     chat_base_url?: string;
-    /** API key reference (e.g. 'vault://openai-key' or env var name) */
-    api_key_ref?: string;
     /** Rate limit: requests per minute */
     rate_limit_rpm?: number;
     /** Maximum tokens per request */
@@ -131,6 +160,41 @@ export interface ChunkTableInfo {
     /** Text content column on chunks table (the actual chunk text) */
     contentField: string;
 }
+/**
+ * Configuration for billing/metering integration.
+ * When provided, embedding and chat calls are wrapped with quota checks
+ * and usage recording via the billing_module functions.
+ */
+export interface MeteringConfig {
+    /**
+     * Meter slug for embedding operations.
+     * Must match a slug in the billing_module meters table.
+     *
+     * @default the embedding model name (e.g. 'text-embedding-3-small')
+     * — meter slug = model name, so each model has its own meter
+     * in the three-level waterfall (per-model → inference pool → universal).
+     */
+    embeddingMeterSlug?: string;
+    /**
+     * Meter slug for chat completion operations.
+     *
+     * @default the chat model name (e.g. 'gpt-4o-mini')
+     */
+    chatMeterSlug?: string;
+    /**
+     * Disable metering entirely (e.g. for local dev).
+     * When true, billing functions are never called.
+     * @default false
+     */
+    skipMetering?: boolean;
+    /**
+     * Resolve the billing entity_id from pgSettings.
+     * The entity_id identifies who gets billed (user, org, etc.).
+     *
+     * @default reads jwt.claims.user_id
+     */
+    resolveEntityId?: (pgSettings: Record<string, string>) => string | null;
+}
 /**
  * Options for the GraphileLlmPreset.
  */
@@ -170,4 +234,16 @@ export interface GraphileLlmOptions {
      * Individual queries can override these values.
      */
     ragDefaults?: RagDefaults;
+    /**
+     * Billing/metering configuration (opt-in).
+     * When truthy, loads the LlmMeteringPlugin which wraps the embedder
+     * with billing quota checks + usage recording.
+     *
+     * Set to `true` to enable metering with defaults (entity_id from jwt.claims.user_id).
+     * Provide a MeteringConfig object for fine-grained control (custom entity_id, meter slugs).
+     * Set to `false` or omit to disable metering entirely.
+     *
+     * @default undefined (metering disabled)
+     */
+    metering?: boolean | MeteringConfig;
 }

package/index.d.ts CHANGED Viewed

@@ -29,11 +29,20 @@
  * };
  * ```
  */
+export type { LlmEnvOptions, LlmProviderConfig } from './env';
+export { getLlmEnvOptions } from './env';
 export { GraphileLlmPreset } from './preset';
 export { createLlmModulePlugin } from './plugins/llm-module-plugin';
-export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
-export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
 export { createLlmRagPlugin } from './plugins/rag-plugin';
-export { buildEmbedder, buildEmbedderFromModule, buildEmbedderFromEnv, } from './embedder';
-export { buildChatCompleter, buildChatCompleterFromModule, buildChatCompleterFromEnv, } from './chat';
-export type { EmbedderFunction, EmbedderConfig, ChatFunction, ChatConfig, ChatMessage, ChatOptions, LlmModuleData, GraphileLlmOptions, RagDefaults, ChunkTableInfo, } from './types';
+export { createLlmTextMutationPlugin } from './plugins/text-mutation-plugin';
+export { createLlmTextSearchPlugin } from './plugins/text-search-plugin';
+export { createLlmMeteringPlugin } from './plugins/metering-plugin';
+export type { AgentDiscovery, AgentTableInfo } from './plugins/agent-discovery-plugin';
+export { clearAgentDiscoveryCache, getAgentDiscovery } from './plugins/agent-discovery-plugin';
+export { buildEmbedder, buildEmbedderFromEnv, buildEmbedderFromModule } from './embedder';
+export { buildChatCompleter, buildChatCompleterFromEnv, buildChatCompleterFromModule } from './chat';
+export type { InferenceLogEntry, MeteringContext, MeteringOptions, MeterResult, WithPgClient } from './metering';
+export { logInferenceUsage, meteredChat, meteredEmbed, QuotaExceededError } from './metering';
+export type { BillingConfig, InferenceLogConfig, LlmBillingCacheEntry, PgClient } from './config-cache';
+export { getLlmBillingCacheStats, getLlmBillingConfig, invalidateLlmBillingConfig } from './config-cache';
+export type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, ChatResult, ChunkTableInfo, EmbedderConfig, EmbedderFunction, EmbeddingResult, GraphileLlmOptions, LlmModuleData, LlmUsage, MeteringConfig, RagDefaults } from './types';

package/index.js CHANGED Viewed

@@ -31,26 +31,43 @@
  * ```
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.buildChatCompleterFromEnv = exports.buildChatCompleterFromModule = exports.buildChatCompleter = exports.buildEmbedderFromEnv = exports.buildEmbedderFromModule = exports.buildEmbedder = exports.createLlmRagPlugin = exports.createLlmTextMutationPlugin = exports.createLlmTextSearchPlugin = exports.createLlmModulePlugin = exports.GraphileLlmPreset = void 0;
+exports.invalidateLlmBillingConfig = exports.getLlmBillingConfig = exports.getLlmBillingCacheStats = exports.QuotaExceededError = exports.meteredEmbed = exports.meteredChat = exports.logInferenceUsage = exports.buildChatCompleterFromModule = exports.buildChatCompleterFromEnv = exports.buildChatCompleter = exports.buildEmbedderFromModule = exports.buildEmbedderFromEnv = exports.buildEmbedder = exports.getAgentDiscovery = exports.clearAgentDiscoveryCache = exports.createLlmMeteringPlugin = exports.createLlmTextSearchPlugin = exports.createLlmTextMutationPlugin = exports.createLlmRagPlugin = exports.createLlmModulePlugin = exports.GraphileLlmPreset = exports.getLlmEnvOptions = void 0;
+var env_1 = require("./env");
+Object.defineProperty(exports, "getLlmEnvOptions", { enumerable: true, get: function () { return env_1.getLlmEnvOptions; } });
 // Preset (recommended entry point)
 var preset_1 = require("./preset");
 Object.defineProperty(exports, "GraphileLlmPreset", { enumerable: true, get: function () { return preset_1.GraphileLlmPreset; } });
-// Individual plugins
+// Individual plugins (pure — no billing dependency)
 var llm_module_plugin_1 = require("./plugins/llm-module-plugin");
 Object.defineProperty(exports, "createLlmModulePlugin", { enumerable: true, get: function () { return llm_module_plugin_1.createLlmModulePlugin; } });
-var text_search_plugin_1 = require("./plugins/text-search-plugin");
-Object.defineProperty(exports, "createLlmTextSearchPlugin", { enumerable: true, get: function () { return text_search_plugin_1.createLlmTextSearchPlugin; } });
-var text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
-Object.defineProperty(exports, "createLlmTextMutationPlugin", { enumerable: true, get: function () { return text_mutation_plugin_1.createLlmTextMutationPlugin; } });
 var rag_plugin_1 = require("./plugins/rag-plugin");
 Object.defineProperty(exports, "createLlmRagPlugin", { enumerable: true, get: function () { return rag_plugin_1.createLlmRagPlugin; } });
+var text_mutation_plugin_1 = require("./plugins/text-mutation-plugin");
+Object.defineProperty(exports, "createLlmTextMutationPlugin", { enumerable: true, get: function () { return text_mutation_plugin_1.createLlmTextMutationPlugin; } });
+var text_search_plugin_1 = require("./plugins/text-search-plugin");
+Object.defineProperty(exports, "createLlmTextSearchPlugin", { enumerable: true, get: function () { return text_search_plugin_1.createLlmTextSearchPlugin; } });
+// Metering plugin (opt-in billing integration)
+var metering_plugin_1 = require("./plugins/metering-plugin");
+Object.defineProperty(exports, "createLlmMeteringPlugin", { enumerable: true, get: function () { return metering_plugin_1.createLlmMeteringPlugin; } });
+var agent_discovery_plugin_1 = require("./plugins/agent-discovery-plugin");
+Object.defineProperty(exports, "clearAgentDiscoveryCache", { enumerable: true, get: function () { return agent_discovery_plugin_1.clearAgentDiscoveryCache; } });
+Object.defineProperty(exports, "getAgentDiscovery", { enumerable: true, get: function () { return agent_discovery_plugin_1.getAgentDiscovery; } });
 // Embedder utilities
 var embedder_1 = require("./embedder");
 Object.defineProperty(exports, "buildEmbedder", { enumerable: true, get: function () { return embedder_1.buildEmbedder; } });
-Object.defineProperty(exports, "buildEmbedderFromModule", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromModule; } });
 Object.defineProperty(exports, "buildEmbedderFromEnv", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromEnv; } });
+Object.defineProperty(exports, "buildEmbedderFromModule", { enumerable: true, get: function () { return embedder_1.buildEmbedderFromModule; } });
 // Chat completion utilities
 var chat_1 = require("./chat");
 Object.defineProperty(exports, "buildChatCompleter", { enumerable: true, get: function () { return chat_1.buildChatCompleter; } });
-Object.defineProperty(exports, "buildChatCompleterFromModule", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromModule; } });
 Object.defineProperty(exports, "buildChatCompleterFromEnv", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromEnv; } });
+Object.defineProperty(exports, "buildChatCompleterFromModule", { enumerable: true, get: function () { return chat_1.buildChatCompleterFromModule; } });
+var metering_1 = require("./metering");
+Object.defineProperty(exports, "logInferenceUsage", { enumerable: true, get: function () { return metering_1.logInferenceUsage; } });
+Object.defineProperty(exports, "meteredChat", { enumerable: true, get: function () { return metering_1.meteredChat; } });
+Object.defineProperty(exports, "meteredEmbed", { enumerable: true, get: function () { return metering_1.meteredEmbed; } });
+Object.defineProperty(exports, "QuotaExceededError", { enumerable: true, get: function () { return metering_1.QuotaExceededError; } });
+var config_cache_1 = require("./config-cache");
+Object.defineProperty(exports, "getLlmBillingCacheStats", { enumerable: true, get: function () { return config_cache_1.getLlmBillingCacheStats; } });
+Object.defineProperty(exports, "getLlmBillingConfig", { enumerable: true, get: function () { return config_cache_1.getLlmBillingConfig; } });
+Object.defineProperty(exports, "invalidateLlmBillingConfig", { enumerable: true, get: function () { return config_cache_1.invalidateLlmBillingConfig; } });

package/metering.d.ts ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * metering — Billing-aware wrappers for embedder and chat functions
+ *
+ * Wraps EmbedderFunction and ChatFunction with:
+ *   1. Pre-check: `check_billing_quota(meter_slug, entity_id, estimated_amount)`
+ *   2. Execute the underlying function
+ *   3. Post-record: `record_usage(meter_slug, entity_id, actual_amount)`
+ *
+ * When the quota check fails, the wrapper returns null (graceful degradation)
+ * instead of throwing, so the search pipeline can fall back to text-only.
+ *
+ * Token counts:
+ *   - Chat: real provider counts via ChatResult.usage (from OllamaAdapter.stream())
+ *   - Embedding: real provider counts via EmbeddingResult.promptTokens (from /api/embed)
+ *
+ * The billing functions live in the tenant database and are called via the
+ * Graphile `withPgClient` callback. Function locations (schema, names) are
+ * resolved from `billing_module` metaschema and cached by `config-cache.ts`.
+ */
+import type { BillingConfig, InferenceLogConfig, PgClient } from './config-cache';
+import type { ChatFunction, ChatMessage, ChatOptions, EmbedderFunction } from './types';
+/**
+ * Callback matching Graphile's withPgClient signature.
+ * Acquires a pg client, calls the callback, then releases the client.
+ */
+export type WithPgClient = (pgSettings: Record<string, string>, callback: (pgClient: PgClient) => Promise<void>) => Promise<void>;
+export interface MeteringContext {
+    /** Callback to acquire a tenant database client */
+    withPgClient: WithPgClient;
+    /** pgSettings from the GraphQL context (for role/claims) */
+    pgSettings: Record<string, string>;
+    /** Billing function references from the billing_module */
+    billing: BillingConfig;
+    /** Entity ID to meter against (from JWT claims) */
+    entityId: string;
+    /** Per-request correlation ID (from request.id pgSetting) */
+    requestId: string | null;
+    /** Database UUID from JWT claims */
+    databaseId: string;
+    /** Actor (user) ID from JWT claims */
+    actorId: string | null;
+    /** Inference log table config (null if inference_log_module not provisioned) */
+    inferenceLog: InferenceLogConfig | null;
+}
+export interface MeteringOptions {
+    /** Meter slug for embedding operations (default: model name from build config) */
+    embeddingMeterSlug?: string;
+    /** Meter slug for chat completion operations (default: model name from build config) */
+    chatMeterSlug?: string;
+    /** Whether to skip metering entirely (e.g. for local dev). Default: false */
+    skipMetering?: boolean;
+    /** Embedding model name (for inference log) */
+    embeddingModel?: string;
+    /** Chat model name (for inference log) */
+    chatModel?: string;
+    /** Provider name (for inference log) */
+    provider?: string;
+}
+export interface MeterResult<T> {
+    /** The result from the underlying function, or null if quota exceeded */
+    result: T | null;
+    /** Whether the call was metered */
+    metered: boolean;
+    /** Whether the call was skipped due to quota limits */
+    quotaExceeded: boolean;
+    /** Latency of the underlying function call in ms */
+    latencyMs: number;
+}
+export interface InferenceLogEntry {
+    databaseId: string;
+    entityId: string;
+    actorId: string | null;
+    model: string;
+    provider: string | null;
+    service: 'llm' | 'embedding' | 'tts' | 'stt' | 'ocr' | 'image_gen' | 'search' | 'compute';
+    operation: string;
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+    cacheReadTokens: number | null;
+    cacheWriteTokens: number | null;
+    latencyMs: number;
+    ragEnabled: boolean;
+    chunksRetrieved: number | null;
+    embeddingModel: string | null;
+    embeddingLatencyMs: number | null;
+    status: 'success' | 'quota_exceeded' | 'provider_error' | 'timeout';
+    errorType: string | null;
+    rawUsage: Record<string, unknown> | null;
+}
+/**
+ * Write a row to the usage_log_inference table.
+ * Gracefully skips if the inference_log_module is not provisioned.
+ *
+ * TODO: Also write to child (generated) database when dual-write is needed.
+ */
+export declare function logInferenceUsage(ctx: MeteringContext, entry: InferenceLogEntry): Promise<void>;
+/**
+ * Wrap an embedder with billing quota check + usage recording.
+ *
+ * The returned MeterResult contains `quotaExceeded: true` when the pre-check
+ * fails, enabling the caller to fall back to text-only search.
+ */
+export declare function meteredEmbed(embedder: EmbedderFunction, text: string, ctx: MeteringContext | null, options?: MeteringOptions): Promise<MeterResult<number[]>>;
+/**
+ * Wrap a chat completion call with billing quota check + usage recording.
+ */
+export declare function meteredChat(chat: ChatFunction, messages: ChatMessage[], ctx: MeteringContext | null, chatOptions?: ChatOptions, meteringOptions?: MeteringOptions): Promise<MeterResult<string>>;
+export declare class QuotaExceededError extends Error {
+    readonly code = "QUOTA_EXCEEDED";
+    readonly meterSlug: string;
+    readonly entityId: string;
+    constructor(meterSlug: string, entityId: string);
+}