npm - graphile-llm - Versions diffs - 0.7.3 → 0.9.0 - Mend

graphile-llm 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/__tests__/graphile-llm.test.js +87 -71
package/chat.d.ts +5 -5
package/chat.js +45 -43
package/config-cache.d.ts +77 -0
package/config-cache.js +148 -0
package/embedder.d.ts +5 -5
package/embedder.js +11 -17
package/env.d.ts +31 -0
package/env.js +52 -0
package/esm/__tests__/graphile-llm.test.js +87 -71
package/esm/chat.d.ts +5 -5
package/esm/chat.js +45 -40
package/esm/config-cache.d.ts +77 -0
package/esm/config-cache.js +143 -0
package/esm/embedder.d.ts +5 -5
package/esm/embedder.js +11 -17
package/esm/env.d.ts +31 -0
package/esm/env.js +49 -0
package/esm/index.d.ts +14 -5
package/esm/index.js +11 -5
package/esm/metering.d.ts +114 -0
package/esm/metering.js +352 -0
package/esm/plugins/agent-discovery-plugin.d.ts +29 -0
package/esm/plugins/agent-discovery-plugin.js +65 -0
package/esm/plugins/llm-module-plugin.d.ts +11 -2
package/esm/plugins/llm-module-plugin.js +15 -7
package/esm/plugins/metering-plugin.d.ts +42 -0
package/esm/plugins/metering-plugin.js +175 -0
package/esm/plugins/rag-plugin.js +20 -20
package/esm/plugins/text-mutation-plugin.d.ts +4 -0
package/esm/plugins/text-mutation-plugin.js +23 -13
package/esm/plugins/text-search-plugin.d.ts +4 -0
package/esm/plugins/text-search-plugin.js +23 -11
package/esm/preset.d.ts +21 -1
package/esm/preset.js +33 -6
package/esm/types.d.ts +86 -10
package/index.d.ts +14 -5
package/index.js +25 -8
package/metering.d.ts +114 -0
package/metering.js +359 -0
package/package.json +15 -15
package/plugins/agent-discovery-plugin.d.ts +29 -0
package/plugins/agent-discovery-plugin.js +69 -0
package/plugins/llm-module-plugin.d.ts +11 -2
package/plugins/llm-module-plugin.js +15 -7
package/plugins/metering-plugin.d.ts +42 -0
package/plugins/metering-plugin.js +178 -0
package/plugins/rag-plugin.js +20 -20
package/plugins/text-mutation-plugin.d.ts +4 -0
package/plugins/text-mutation-plugin.js +23 -13
package/plugins/text-search-plugin.d.ts +4 -0
package/plugins/text-search-plugin.js +23 -11
package/preset.d.ts +21 -1
package/preset.js +33 -6
package/types.d.ts +86 -10

package/types.d.ts CHANGED Viewed

@@ -4,9 +4,18 @@
  * Shared type definitions for the LLM plugin.
  */
 /**
- * A function that converts text into a vector embedding.
+ * Result from an embedding call, including real token usage from the provider.
  */
-export type EmbedderFunction = (text: string) => Promise<number[]>;
+export interface EmbeddingResult {
+    /** The vector embedding */
+    embedding: number[];
+    /** Number of prompt tokens consumed (from provider; 0 if unavailable) */
+    promptTokens: number;
+}
+/**
+ * A function that converts text into a vector embedding with token usage.
+ */
+export type EmbedderFunction = (text: string) => Promise<EmbeddingResult>;
 /**
  * Configuration for an embedding provider.
  */
@@ -17,8 +26,24 @@ export interface EmbedderConfig {
     model?: string;
     /** Base URL for the provider (e.g. 'http://localhost:11434' for Ollama) */
     baseUrl?: string;
-    /** API key for providers that require authentication (e.g. OpenAI) */
-    apiKey?: string;
+}
+/**
+ * Token usage metadata returned by LLM providers.
+ * Maps to the billing schema's inference_log columns.
+ */
+export interface LlmUsage {
+    /** Prompt / input tokens consumed */
+    input: number;
+    /** Completion / output tokens generated (includes reasoning for providers that count it) */
+    output: number;
+    /** Reasoning tokens (subset of output — not additive) */
+    reasoning: number;
+    /** Tokens served from prompt cache (zero cost) */
+    cacheRead: number;
+    /** Tokens written to prompt cache */
+    cacheWrite: number;
+    /** input + output + cacheRead + cacheWrite */
+    totalTokens: number;
 }
 /**
  * A single message in a chat conversation.
@@ -37,9 +62,17 @@ export interface ChatOptions {
     temperature?: number;
 }
 /**
- * A function that sends messages to a chat completion provider and returns the response.
+ * Result from a chat completion call, including real token usage.
  */
-export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<string>;
+export interface ChatResult {
+    content: string;
+    usage: LlmUsage;
+}
+/**
+ * A function that sends messages to a chat completion provider
+ * and returns the response with token usage metadata.
+ */
+export type ChatFunction = (messages: ChatMessage[], options?: ChatOptions) => Promise<ChatResult>;
 /**
  * Configuration for a chat completion provider.
  */
@@ -50,8 +83,6 @@ export interface ChatConfig {
     model?: string;
     /** Base URL for the provider */
     baseUrl?: string;
-    /** API key for providers that require authentication */
-    apiKey?: string;
 }
 /**
  * The shape of the `llm_module` data stored in `services_public.api_modules`.
@@ -74,8 +105,6 @@ export interface LlmModuleData {
     chat_model?: string;
     /** Base URL for the chat provider */
     chat_base_url?: string;
-    /** API key reference (e.g. 'vault://openai-key' or env var name) */
-    api_key_ref?: string;
     /** Rate limit: requests per minute */
     rate_limit_rpm?: number;
     /** Maximum tokens per request */
@@ -131,6 +160,41 @@ export interface ChunkTableInfo {
     /** Text content column on chunks table (the actual chunk text) */
     contentField: string;
 }
+/**
+ * Configuration for billing/metering integration.
+ * When provided, embedding and chat calls are wrapped with quota checks
+ * and usage recording via the billing_module functions.
+ */
+export interface MeteringConfig {
+    /**
+     * Meter slug for embedding operations.
+     * Must match a slug in the billing_module meters table.
+     *
+     * @default the embedding model name (e.g. 'text-embedding-3-small')
+     * — meter slug = model name, so each model has its own meter
+     * in the three-level waterfall (per-model → inference pool → universal).
+     */
+    embeddingMeterSlug?: string;
+    /**
+     * Meter slug for chat completion operations.
+     *
+     * @default the chat model name (e.g. 'gpt-4o-mini')
+     */
+    chatMeterSlug?: string;
+    /**
+     * Disable metering entirely (e.g. for local dev).
+     * When true, billing functions are never called.
+     * @default false
+     */
+    skipMetering?: boolean;
+    /**
+     * Resolve the billing entity_id from pgSettings.
+     * The entity_id identifies who gets billed (user, org, etc.).
+     *
+     * @default reads jwt.claims.user_id
+     */
+    resolveEntityId?: (pgSettings: Record<string, string>) => string | null;
+}
 /**
  * Options for the GraphileLlmPreset.
  */
@@ -170,4 +234,16 @@ export interface GraphileLlmOptions {
      * Individual queries can override these values.
      */
     ragDefaults?: RagDefaults;
+    /**
+     * Billing/metering configuration (opt-in).
+     * When truthy, loads the LlmMeteringPlugin which wraps the embedder
+     * with billing quota checks + usage recording.
+     *
+     * Set to `true` to enable metering with defaults (entity_id from jwt.claims.user_id).
+     * Provide a MeteringConfig object for fine-grained control (custom entity_id, meter slugs).
+     * Set to `false` or omit to disable metering entirely.
+     *
+     * @default undefined (metering disabled)
+     */
+    metering?: boolean | MeteringConfig;
 }