npm - @ramarivera/coding-agent-langfuse - Versions diffs - 0.1.42 → 0.1.43 - Mend

@ramarivera/coding-agent-langfuse 0.1.42 → 0.1.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -3,9 +3,10 @@
 Universal coding-agent Langfuse backfiller and OTLP exporter helpers.
 It imports local histories from Codex, Claude Code, Grok, OpenCode, and Pi into
-Langfuse as session traces with child observations. LLM usage records are kept
-as observation metadata so historical imports do not create Langfuse billing or
-cost rows. Tool calls remain child spans under the same session.
+Langfuse as session traces with child observations. LLM generations include
+Langfuse canonical `usage_details` and `cost_details` attributes so historical
+backfills participate in Langfuse model-usage and cost dashboards. Tool calls
+remain child spans under the same session.
 ```sh
 coding-agent-langfuse-backfill --agents codex,claude,grok,pi,opencode
@@ -35,6 +36,48 @@ npx @ramarivera/coding-agent-langfuse@latest \
 The importer is fail-fast: the first failed OTLP POST stops the run, prints the
 real network cause, and preserves local state so reruns resume cleanly.
+## Cost calculation
+Backfill cost calculation follows Langfuse's OpenTelemetry mapping: generation
+spans receive `langfuse.observation.usage_details` and
+`langfuse.observation.cost_details` JSON attributes. If a source history already
+records a total cost, that recorded value wins. Otherwise, the importer
+calculates per-usage-type USD costs from a model catalog using rates in USD per
+1M tokens.
+The built-in catalog covers OpenAI GPT-5.5 API list pricing plus the toolbox/Pi
+models already used in local configuration, including Fireworks Kimi K2.6,
+Fireworks DeepSeek V4 Pro, MiniMax-M3, Together DeepSeek/Kimi/GLM/MiniMax, and
+Zai GLM. `gpt-5.5` is charged at current standard API list price by default:
+`$5.00` input, `$0.50` cached input, and `$30.00` output per 1M tokens. GPT-5.5
+Pro defaults to `$30.00` input and `$180.00` output per 1M tokens.
+Use an override only when you intentionally want a different accounting policy:
+```sh
+npx @ramarivera/coding-agent-langfuse@latest \
+  --agents codex \
+  --cost-rates-json '{"gpt-5.5":{"input":1,"output":2,"cacheRead":0.1,"cacheWrite":0}}'
+```
+You can also keep the policy in a JSON file and pass `--cost-rates PATH`, or set
+`CODING_AGENT_LANGFUSE_COST_RATES_PATH` /
+`CODING_AGENT_LANGFUSE_COST_RATES_JSON` for both manual backfills and generated
+services. A file can be either a direct model map or `{ "rates": { ... } }`:
+```json
+{
+  "rates": {
+    "gpt-5.5": {
+      "input": 1,
+      "output": 2,
+      "cacheRead": 0.1,
+      "cacheWrite": 0
+    }
+  }
+}
+```
 ## Follow as a host service
 Install a live follower directly from npm. The generated service keeps inference
@@ -112,7 +155,7 @@ npm run test:e2e
 The e2e suite verifies:
 - Codex full session traces with messages, reasoning, tool calls, tool results,
-  and usage metadata
+  usage details, and cost details
 - Follow mode picking up newly written Codex events
 - One CLI run posting reconstructable traces for Claude Code, Codex, Grok,
   OpenCode, and Pi

package/dist/backfill.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@ type Usage = {
     cacheWrite?: number;
     total?: number;
     cost?: number;
+    inputIncludesCache?: boolean;
 };
 type BackfillEvent = {
     agent: AgentName;
@@ -44,6 +45,19 @@ type BackfillOptions = {
     maxRequestBytes: number;
     maxFieldBytes: number;
     postDelayMs: number;
+    costRates: CostCatalog;
+};
+type CostRates = {
+    input?: number;
+    output?: number;
+    reasoning?: number;
+    cacheRead?: number;
+    cacheWrite?: number;
+};
+type CostCatalog = Record<string, CostRates>;
+type OtlpOptions = {
+    maxFieldBytes?: number;
+    costRates?: CostCatalog;
 };
 type RunSummary = {
     discovered: Record<string, number>;
@@ -77,9 +91,7 @@ declare function opencodeEvents(homeDir: string, options?: {
     untilMs?: number;
 }): BackfillEvent[];
 declare function fingerprint(event: BackfillEvent): string;
-declare function toOtlp(events: BackfillEvent[], options?: {
-    maxFieldBytes?: number;
-}): Record<string, unknown>;
+declare function toOtlp(events: BackfillEvent[], options?: OtlpOptions): Record<string, unknown>;
 declare function discoverEvents(options: BackfillOptions): BackfillEvent[];
 declare function run(options: BackfillOptions): Promise<RunSummary>;
 declare function follow(options: BackfillOptions): Promise<FollowSummary>;

package/dist/backfill.js CHANGED Viewed

@@ -5,13 +5,13 @@ import { existsSync, mkdirSync, renameSync, readdirSync, readFileSync, statSync,
 import { hostname, homedir } from "node:os";
 import { dirname, join } from "node:path";
 const allAgents = ["claude", "codex", "grok", "opencode", "pi"];
-const importIdentityVersion = "v8-cached-input-token-split";
+const importIdentityVersion = "v9-cost-details";
 const importIdentityVersions = {
-    claude: "v11-tool-results",
-    codex: "v9-codex-conversation-events",
-    grok: "v11-chat-history-only",
-    opencode: "v10-opencode-message-parts",
-    pi: "v11-tool-results",
+    claude: "v12-cost-details",
+    codex: "v10-cost-details",
+    grok: "v12-cost-details",
+    opencode: "v11-cost-details",
+    pi: "v12-cost-details",
 };
 const defaultEndpoint = "https://langfuse.ai.roxasroot.net/otel/v1/traces";
 const deadRemoteEndpoint = "http://langfuse.ai.roxasroot.net:14318/v1/traces";
@@ -19,6 +19,79 @@ const defaultMaxRequestBytes = 12 * 1024 * 1024;
 const defaultMaxFieldBytes = 512 * 1024;
 const defaultStatePath = join(homedir(), ".local/state/coding-agent-langfuse/backfill-v6.json");
 const currentHost = hostname();
+const kimiFirepassRates = {
+    input: 2,
+    output: 8,
+    cacheRead: 0.3,
+    cacheWrite: 0,
+};
+const deepseekFireworksRates = {
+    input: 1.74,
+    output: 3.48,
+    cacheRead: 0.15,
+    cacheWrite: 0,
+};
+const miniMaxM3Rates = {
+    input: 0.6,
+    output: 2.4,
+    cacheRead: 0.12,
+    cacheWrite: 0,
+};
+const gpt55Rates = {
+    input: 5,
+    output: 30,
+    cacheRead: 0.5,
+    cacheWrite: 5,
+};
+const gpt55ProRates = {
+    input: 30,
+    output: 180,
+    cacheRead: 30,
+    cacheWrite: 30,
+};
+const defaultCostRates = {
+    "gpt-5.5": gpt55Rates,
+    "openai/gpt-5.5": gpt55Rates,
+    "gpt-5.5-pro": gpt55ProRates,
+    "openai/gpt-5.5-pro": gpt55ProRates,
+    "accounts/fireworks/routers/kimi-k2p6-turbo": kimiFirepassRates,
+    "fireworks-firepass/accounts/fireworks/routers/kimi-k2p6-turbo": kimiFirepassRates,
+    "kimi-for-coding": kimiFirepassRates,
+    "accounts/fireworks/models/deepseek-v4-pro": deepseekFireworksRates,
+    "fireworks/accounts/fireworks/models/deepseek-v4-pro": deepseekFireworksRates,
+    "MiniMax-M3": miniMaxM3Rates,
+    "minimax/MiniMax-M3": miniMaxM3Rates,
+    "together/deepseek-ai/DeepSeek-V4-Pro": {
+        input: 2.1,
+        output: 4.4,
+        cacheRead: 0.2,
+        cacheWrite: 0,
+    },
+    "together/zai-org/GLM-5.1": {
+        input: 1.4,
+        output: 4.4,
+        cacheRead: 0.2,
+        cacheWrite: 0,
+    },
+    "together/moonshotai/Kimi-K2.6": {
+        input: 1.2,
+        output: 4.5,
+        cacheRead: 0.2,
+        cacheWrite: 0,
+    },
+    "together/MiniMaxAI/MiniMax-M2.7": {
+        input: 0.3,
+        output: 1.2,
+        cacheRead: 0.06,
+        cacheWrite: 0,
+    },
+    "zai/glm-5.1": {
+        input: 1.4,
+        output: 4.4,
+        cacheRead: 0.2,
+        cacheWrite: 0,
+    },
+};
 function projectMetadata(cwd) {
     if (!cwd)
         return {};
@@ -46,6 +119,8 @@ Options:
   --max-request-bytes N   Split OTLP POSTs below this JSON byte size (default: ${defaultMaxRequestBytes})
   --max-field-bytes N     Truncate individual input/output fields above this byte size (default: ${defaultMaxFieldBytes})
   --post-delay-ms N       Delay after each successful OTLP POST (default: 0)
+  --cost-rates PATH       JSON model cost-rate overrides in USD per 1M tokens
+  --cost-rates-json JSON  Inline JSON model cost-rate overrides in USD per 1M tokens
   --follow                Keep scanning and sending newly written events
   --poll-interval-ms N    Delay between --follow scans (default: 5000)
   --idle-exit-after-ms N  Stop --follow after this much time without new sends
@@ -71,6 +146,7 @@ function parseArgs(argv) {
     let postDelayMs = Number.parseInt(process.env.LANGFUSE_BACKFILL_POST_DELAY_MS ?? "", 10);
     if (!Number.isFinite(postDelayMs))
         postDelayMs = 0;
+    let costRates = loadCostCatalogFromEnv();
     let follow = false;
     let pollIntervalMs = 5_000;
     let idleExitAfterMs;
@@ -125,6 +201,12 @@ function parseArgs(argv) {
         else if (arg === "--post-delay-ms") {
             postDelayMs = Number.parseInt(next(), 10);
         }
+        else if (arg === "--cost-rates") {
+            costRates = mergeCostCatalog(costRates, loadCostCatalogFile(next()));
+        }
+        else if (arg === "--cost-rates-json") {
+            costRates = mergeCostCatalog(costRates, parseCostCatalogJson(next()));
+        }
         else if (arg === "--follow") {
             follow = true;
         }
@@ -190,7 +272,82 @@ function parseArgs(argv) {
         maxRequestBytes,
         maxFieldBytes,
         postDelayMs,
+        costRates,
+    };
+}
+function loadCostCatalogFromEnv() {
+    let catalog = { ...defaultCostRates };
+    const path = process.env.CODING_AGENT_LANGFUSE_COST_RATES_PATH ??
+        process.env.LANGFUSE_BACKFILL_COST_RATES_PATH;
+    const inlineJson = process.env.CODING_AGENT_LANGFUSE_COST_RATES_JSON ??
+        process.env.LANGFUSE_BACKFILL_COST_RATES_JSON;
+    if (path)
+        catalog = mergeCostCatalog(catalog, loadCostCatalogFile(path));
+    if (inlineJson)
+        catalog = mergeCostCatalog(catalog, parseCostCatalogJson(inlineJson));
+    return catalog;
+}
+function loadCostCatalogFile(path) {
+    return parseCostCatalogJson(readFileSync(path, "utf8"), path);
+}
+function parseCostCatalogJson(json, source = "inline JSON") {
+    let parsed;
+    try {
+        parsed = JSON.parse(json);
+    }
+    catch (error) {
+        throw new Error(`Invalid cost rates ${source}: ${describeError(error)}`);
+    }
+    const root = asRecord(parsed);
+    const nestedRates = asRecord(root.rates);
+    const entries = Object.keys(nestedRates).length > 0 ? nestedRates : root;
+    const catalog = {};
+    for (const [modelKey, rawRates] of Object.entries(entries)) {
+        const rates = normalizeCostRates(rawRates, modelKey, source);
+        if (rates)
+            catalog[modelKey] = rates;
+    }
+    return catalog;
+}
+function normalizeCostRates(value, modelKey, source) {
+    const record = asRecord(value);
+    const rates = {
+        input: asNumber(record.input) ??
+            asNumber(record.input_cost) ??
+            asNumber(record.inputPerMillion),
+        output: asNumber(record.output) ??
+            asNumber(record.output_cost) ??
+            asNumber(record.outputPerMillion),
+        reasoning: asNumber(record.reasoning) ??
+            asNumber(record.reasoning_cost) ??
+            asNumber(record.reasoningPerMillion),
+        cacheRead: asNumber(record.cacheRead) ??
+            asNumber(record.cache_read) ??
+            asNumber(record.cachedInput) ??
+            asNumber(record.input_cached_tokens),
+        cacheWrite: asNumber(record.cacheWrite) ??
+            asNumber(record.cache_write) ??
+            asNumber(record.inputCacheCreation) ??
+            asNumber(record.input_cache_creation),
     };
+    const values = Object.entries(rates).filter(([, rate]) => rate !== undefined);
+    if (values.length === 0)
+        return undefined;
+    const cleanRates = {};
+    for (const [name, rate] of values) {
+        if (rate === undefined || rate < 0) {
+            throw new Error(`Invalid ${name} cost rate for '${modelKey}' in ${source}; rates must be non-negative USD per 1M tokens.`);
+        }
+        cleanRates[name] = rate;
+    }
+    return cleanRates;
+}
+function mergeCostCatalog(base, override) {
+    const merged = { ...base };
+    for (const [modelKey, rates] of Object.entries(override)) {
+        merged[modelKey] = { ...(merged[modelKey] ?? {}), ...rates };
+    }
+    return merged;
 }
 function normalizeEndpoint(endpoint) {
     if (endpoint !== deadRemoteEndpoint)
@@ -312,10 +469,11 @@ function normalizeUsage(value) {
     const cache = asRecord(record.cache);
     const inputDetails = asRecord(record.input_tokens_details);
     const outputDetails = asRecord(record.output_tokens_details);
+    const directInput = asNumber(record.input);
+    const aggregateInput = asNumber(record.input_tokens) ??
+        asNumber(record.prompt_tokens);
     const usage = {
-        input: asNumber(record.input) ??
-            asNumber(record.input_tokens) ??
-            asNumber(record.prompt_tokens),
+        input: directInput ?? aggregateInput,
         output: asNumber(record.output) ??
             asNumber(record.output_tokens) ??
             asNumber(record.completion_tokens),
@@ -340,11 +498,16 @@ function normalizeUsage(value) {
             asNumber(record.total_cost) ??
             asNumber(record.cost),
     };
+    if (usage.input !== undefined) {
+        usage.inputIncludesCache = directInput === undefined;
+    }
     if (usage.total === undefined) {
+        const cacheRead = usage.inputIncludesCache === false ? (usage.cacheRead ?? 0) : 0;
         const total = (usage.input ?? 0) +
             (usage.output ?? 0) +
             (usage.reasoning ?? 0) +
-            (usage.cacheWrite ?? 0);
+            (usage.cacheWrite ?? 0) +
+            cacheRead;
         if (total > 0)
             usage.total = total;
     }
@@ -365,8 +528,8 @@ function usageDetails(usage) {
     if (!usage)
         return undefined;
     const details = {};
-    const cachedInput = usage.cacheRead ?? 0;
-    const cacheWrite = usage.cacheWrite ?? 0;
+    const cachedInput = usage.inputIncludesCache === false ? 0 : (usage.cacheRead ?? 0);
+    const cacheWrite = usage.inputIncludesCache === false ? 0 : (usage.cacheWrite ?? 0);
     const regularInput = usage.input === undefined
         ? undefined
         : Math.max(usage.input - cachedInput - cacheWrite, 0);
@@ -384,6 +547,62 @@ function usageDetails(usage) {
         details.total = usage.total;
     return Object.keys(details).length > 0 ? details : undefined;
 }
+function calculateCost(event, usage, costRates) {
+    if (!usage)
+        return undefined;
+    if (event.usage?.cost !== undefined) {
+        return {
+            details: { total: roundCost(event.usage.cost) },
+            source: "recorded",
+        };
+    }
+    const match = findCostRates(event, costRates);
+    if (!match)
+        return undefined;
+    const { rates, modelKey } = match;
+    const details = {};
+    setCostPart(details, "input", usage.input, rates.input);
+    setCostPart(details, "output", usage.output, rates.output);
+    setCostPart(details, "output_reasoning", usage.output_reasoning, rates.reasoning ?? rates.output);
+    setCostPart(details, "input_cached_tokens", usage.input_cached_tokens, rates.cacheRead ?? rates.input);
+    setCostPart(details, "input_cache_creation", usage.input_cache_creation, rates.cacheWrite ?? rates.input);
+    if (Object.keys(details).length === 0)
+        return undefined;
+    details.total = roundCost(Object.values(details).reduce((sum, value) => sum + value, 0));
+    return {
+        details,
+        source: "calculated",
+        modelKey,
+        rates,
+    };
+}
+function findCostRates(event, costRates) {
+    const modelName = normalizeModelName(event.model);
+    const candidates = [
+        event.provider && event.model ? `${event.provider}/${event.model}` : undefined,
+        event.provider && modelName ? `${event.provider}/${modelName}` : undefined,
+        event.model,
+        modelName,
+    ];
+    const seen = new Set();
+    for (const candidate of candidates) {
+        if (!candidate || seen.has(candidate))
+            continue;
+        seen.add(candidate);
+        const rates = costRates[candidate];
+        if (rates)
+            return { modelKey: candidate, rates };
+    }
+    return undefined;
+}
+function setCostPart(details, key, tokens, usdPerMillionTokens) {
+    if (tokens === undefined || usdPerMillionTokens === undefined)
+        return;
+    details[key] = roundCost((tokens * usdPerMillionTokens) / 1_000_000);
+}
+function roundCost(value) {
+    return Number(value.toFixed(12));
+}
 function isGenerationEvent(event) {
     return event.usage !== undefined && event.role !== "user" &&
         event.role !== "developer" && event.role !== "system";
@@ -1189,6 +1408,7 @@ function limitEventPayload(event, maxFieldBytes) {
 }
 function toOtlp(events, options = {}) {
     const maxFieldBytes = options.maxFieldBytes ?? defaultMaxFieldBytes;
+    const costRates = options.costRates ?? loadCostCatalogFromEnv();
     const spansByTrace = new Map();
     for (const rawEvent of events) {
         const event = limitEventPayload(rawEvent, maxFieldBytes);
@@ -1259,6 +1479,7 @@ function toOtlp(events, options = {}) {
             const modelName = normalizeModelName(event.model);
             const generation = isGenerationEvent(event);
             const usage = usageDetails(event.usage);
+            const cost = generation ? calculateCost(event, usage, costRates) : undefined;
             const eventProject = projectMetadata(event.cwd);
             const attributes = [
                 attr("service.name", `agent.${event.agent}`),
@@ -1286,7 +1507,13 @@ function toOtlp(events, options = {}) {
                 attr("langfuse.observation.metadata.project_folder", eventProject.projectFolder),
                 attr("langfuse.observation.metadata.model", modelName ?? event.model),
                 attr("langfuse.observation.metadata.provider", event.provider),
+                attr("langfuse.observation.usage_details", generation ? usage : undefined),
+                attr("langfuse.observation.cost_details", cost?.details),
                 attr("langfuse.observation.metadata.usage_details", usage),
+                attr("langfuse.observation.metadata.cost_details", cost?.details),
+                attr("langfuse.observation.metadata.cost_source", cost?.source),
+                attr("langfuse.observation.metadata.cost_model_key", cost?.modelKey),
+                attr("langfuse.observation.metadata.cost_rates", cost?.rates),
                 attr("langfuse.observation.metadata.recorded_cost", event.usage?.cost),
                 attr("langfuse.observation.input", event.input),
                 attr("langfuse.observation.output", event.output),
@@ -1465,6 +1692,7 @@ async function run(options) {
                 batchSize: options.batchSize,
                 maxRequestBytes: options.maxRequestBytes,
                 maxFieldBytes: options.maxFieldBytes,
+                costRates: options.costRates,
             });
         }
         catch (error) {
@@ -1490,6 +1718,7 @@ async function run(options) {
             try {
                 await postOtlp(options.endpoint, batch, {
                     maxFieldBytes: options.maxFieldBytes,
+                    costRates: options.costRates,
                 });
                 for (const event of batch) {
                     state.sent[fingerprint(event)] = new Date().toISOString();

package/dist/service.d.ts CHANGED Viewed

@@ -13,6 +13,8 @@ type ServiceOptions = {
     batchSize: number;
     pollIntervalMs: number;
     postDelayMs: number;
+    costRatesPath?: string;
+    costRatesJson?: string;
     npxPath: string;
     since?: string;
     dryRun: boolean;

package/dist/service.js CHANGED Viewed

@@ -24,6 +24,8 @@ Service options:
   --batch-size N          OTLP spans per POST (default: 10)
   --poll-interval-ms N    Delay between --follow scans (default: 5000)
   --post-delay-ms N       Delay after each successful OTLP POST (default: 0)
+  --cost-rates PATH       JSON model cost-rate overrides in USD per 1M tokens
+  --cost-rates-json JSON  Inline JSON model cost-rate overrides in USD per 1M tokens
   --since ISO_OR_MS       Optional lower bound for events the follower may send
   --working-directory DIR Directory the service starts in (default: --home)
   --path VALUE            PATH value injected into the service environment
@@ -49,6 +51,10 @@ function parseServiceArgs(argv) {
     let batchSize = 10;
     let pollIntervalMs = 5_000;
     let postDelayMs = 0;
+    let costRatesPath = process.env.CODING_AGENT_LANGFUSE_COST_RATES_PATH ??
+        process.env.LANGFUSE_BACKFILL_COST_RATES_PATH;
+    let costRatesJson = process.env.CODING_AGENT_LANGFUSE_COST_RATES_JSON ??
+        process.env.LANGFUSE_BACKFILL_COST_RATES_JSON;
     let since;
     let dryRun = false;
     let start = true;
@@ -95,6 +101,12 @@ function parseServiceArgs(argv) {
         else if (arg === "--post-delay-ms") {
             postDelayMs = parseNonNegativeInt(arg, next());
         }
+        else if (arg === "--cost-rates") {
+            costRatesPath = next();
+        }
+        else if (arg === "--cost-rates-json") {
+            costRatesJson = next();
+        }
         else if (arg === "--since") {
             since = next();
         }
@@ -134,6 +146,8 @@ function parseServiceArgs(argv) {
         batchSize,
         pollIntervalMs,
         postDelayMs,
+        costRatesPath,
+        costRatesJson,
         since,
         dryRun,
         start,
@@ -282,6 +296,10 @@ function buildFollowCommand(options) {
     ];
     if (options.since)
         command.push("--since", options.since);
+    if (options.costRatesPath)
+        command.push("--cost-rates", options.costRatesPath);
+    if (options.costRatesJson)
+        command.push("--cost-rates-json", options.costRatesJson);
     return command;
 }
 function renderSystemdUnit(options, command) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ramarivera/coding-agent-langfuse",
-  "version": "0.1.42",
+  "version": "0.1.43",
   "description": "Universal coding-agent Langfuse backfiller and live OTLP helpers",
   "type": "module",
   "license": "MIT",