npm - @stigmer/protos - Versions diffs - 0.3.3 → 0.4.0 - Mend

@stigmer/protos 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/ai/stigmer/agentic/agentexecution/v1/usage_pb.d.ts CHANGED Viewed

@@ -1,421 +1,860 @@
-import type { GenFile, GenMessage } from "@bufbuild/protobuf/codegenv1";
+import type { GenEnum, GenFile, GenMessage } from "@bufbuild/protobuf/codegenv1";
+import type { Timestamp } from "@bufbuild/protobuf/wkt";
 import type { Message } from "@bufbuild/protobuf";
 /**
  * Describes the file ai/stigmer/agentic/agentexecution/v1/usage.proto.
  */
 export declare const file_ai_stigmer_agentic_agentexecution_v1_usage: GenFile;
 /**
- * UsageMetrics tracks token consumption, cost, and LLM resource usage.
- * Provides execution-level aggregation for cost tracking, billing, and analytics.
+ * Normalized token usage from a single LLM call.
+ * Disjoint buckets: every token falls into exactly one category for cost calculation.
  *
- * ## Scope
- *
- * Each UsageMetrics instance represents direct LLM usage within its context:
- * - AgentExecutionStatus.usage: Main agent's LLM calls (excludes sub-agents)
- * - SubAgentExecution.usage: That sub-agent's LLM calls
- *
- * To calculate total execution cost, sum: status.usage + sum(sub_agent.usage)
- *
- * ## Token Counting
- *
- * - prompt_tokens: Total input tokens sent to the LLM (includes cached tokens)
- * - completion_tokens: Output tokens generated by the LLM (response)
- * - cache_creation_tokens / cache_read_tokens: Cache-aware breakdown
- * - Extracted from on_chat_model_end events via LangChain usage_metadata
+ * @generated from message ai.stigmer.agentic.agentexecution.v1.TokenUsage
+ */
+export type TokenUsage = Message<"ai.stigmer.agentic.agentexecution.v1.TokenUsage"> & {
+    /**
+     * Regular input tokens (non-cached, non-tool, billed at standard input rate).
+     *
+     * @generated from field: int64 input_tokens = 1;
+     */
+    inputTokens: bigint;
+    /**
+     * Output/completion tokens generated by the model.
+     *
+     * @generated from field: int64 output_tokens = 2;
+     */
+    outputTokens: bigint;
+    /**
+     * Provider-reported total tokens. May differ from sum of other fields
+     * because providers include reasoning/overhead differently.
+     *
+     * @generated from field: int64 total_tokens = 3;
+     */
+    totalTokens: bigint;
+    /**
+     * Tokens written to prompt cache (Anthropic cache_creation_input_tokens).
+     *
+     * @generated from field: int64 cache_creation_input_tokens = 4;
+     */
+    cacheCreationInputTokens: bigint;
+    /**
+     * Tokens read from prompt cache (cache hits).
+     *
+     * @generated from field: int64 cache_read_input_tokens = 5;
+     */
+    cacheReadInputTokens: bigint;
+    /**
+     * Reasoning/thinking tokens (OpenAI reasoning_tokens, Vertex thoughtsTokenCount).
+     *
+     * @generated from field: int64 reasoning_tokens = 6;
+     */
+    reasoningTokens: bigint;
+    /**
+     * Tool-use prompt tokens (Vertex toolUsePromptTokenCount).
+     *
+     * @generated from field: int64 tool_use_prompt_tokens = 7;
+     */
+    toolUsePromptTokens: bigint;
+    /**
+     * Audio tokens (future multimodal billing).
+     *
+     * @generated from field: int64 audio_input_tokens = 8;
+     */
+    audioInputTokens: bigint;
+    /**
+     * @generated from field: int64 audio_output_tokens = 9;
+     */
+    audioOutputTokens: bigint;
+    /**
+     * Provider-specific token details not yet normalized.
+     *
+     * @generated from field: map<string, int64> provider_token_details = 20;
+     */
+    providerTokenDetails: {
+        [key: string]: bigint;
+    };
+};
+/**
+ * Describes the message ai.stigmer.agentic.agentexecution.v1.TokenUsage.
+ * Use `create(TokenUsageSchema)` to create a new message.
+ */
+export declare const TokenUsageSchema: GenMessage<TokenUsage>;
+/**
+ * Pricing rates snapshot at the time of the LLM call.
  *
- * ## Relationship Between Token Fields
+ * @generated from message ai.stigmer.agentic.agentexecution.v1.PricingSnapshot
+ */
+export type PricingSnapshot = Message<"ai.stigmer.agentic.agentexecution.v1.PricingSnapshot"> & {
+    /**
+     * Version of the model pricing registry used.
+     *
+     * @generated from field: string pricing_registry_version = 1;
+     */
+    pricingRegistryVersion: string;
+    /**
+     * When this pricing became effective.
+     *
+     * @generated from field: google.protobuf.Timestamp pricing_effective_at = 2;
+     */
+    pricingEffectiveAt?: Timestamp;
+    /**
+     * Currency (always "USD" for now).
+     *
+     * @generated from field: string currency = 3;
+     */
+    currency: string;
+    /**
+     * Provider cost rates (micro-USD per million tokens).
+     *
+     * @generated from field: int64 input_price_micros_per_million = 10;
+     */
+    inputPriceMicrosPerMillion: bigint;
+    /**
+     * @generated from field: int64 output_price_micros_per_million = 11;
+     */
+    outputPriceMicrosPerMillion: bigint;
+    /**
+     * @generated from field: int64 cache_creation_price_micros_per_million = 12;
+     */
+    cacheCreationPriceMicrosPerMillion: bigint;
+    /**
+     * @generated from field: int64 cache_read_price_micros_per_million = 13;
+     */
+    cacheReadPriceMicrosPerMillion: bigint;
+    /**
+     * @generated from field: int64 reasoning_price_micros_per_million = 14;
+     */
+    reasoningPriceMicrosPerMillion: bigint;
+    /**
+     * Customer billing policy applied.
+     *
+     * @generated from field: string markup_policy_version = 30;
+     */
+    markupPolicyVersion: string;
+    /**
+     * @generated from field: string cost_tier = 31;
+     */
+    costTier: string;
+};
+/**
+ * Describes the message ai.stigmer.agentic.agentexecution.v1.PricingSnapshot.
+ * Use `create(PricingSnapshotSchema)` to create a new message.
+ */
+export declare const PricingSnapshotSchema: GenMessage<PricingSnapshot>;
+/**
+ * Cost computation result stamped on each usage record.
  *
- * prompt_tokens is the total across all input token types:
- *   prompt_tokens = sum(model_breakdown[].input_tokens
- *                       + model_breakdown[].cache_creation_tokens
- *                       + model_breakdown[].cache_read_tokens)
+ * @generated from message ai.stigmer.agentic.agentexecution.v1.CostStamp
+ */
+export type CostStamp = Message<"ai.stigmer.agentic.agentexecution.v1.CostStamp"> & {
+    /**
+     * Currency (always "USD").
+     *
+     * @generated from field: string currency = 1;
+     */
+    currency: string;
+    /**
+     * Raw provider cost computed server-side from pricing registry.
+     *
+     * @generated from field: int64 provider_cost_micros = 2;
+     */
+    providerCostMicros: bigint;
+    /**
+     * Amount debited from customer credits (after markup policy).
+     *
+     * @generated from field: int64 customer_billable_amount_micros = 3;
+     */
+    customerBillableAmountMicros: bigint;
+    /**
+     * How cost was computed.
+     *
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.CostCalculationStatus calculation_status = 4;
+     */
+    calculationStatus: CostCalculationStatus;
+    /**
+     * Full pricing snapshot for historical reproducibility.
+     *
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.PricingSnapshot pricing = 5;
+     */
+    pricing?: PricingSnapshot;
+};
+/**
+ * Describes the message ai.stigmer.agentic.agentexecution.v1.CostStamp.
+ * Use `create(CostStampSchema)` to create a new message.
+ */
+export declare const CostStampSchema: GenMessage<CostStamp>;
+/**
+ * Proxy-observed timing for a single LLM call.
+ * These are server-side measurements, not client-perceived latency.
  *
- * At the aggregate level, cache_creation_tokens and cache_read_tokens are the
- * sums of their respective fields across all model breakdowns. The "fresh"
- * (non-cached) input portion equals:
- *   prompt_tokens - cache_creation_tokens - cache_read_tokens
+ * @generated from message ai.stigmer.agentic.agentexecution.v1.ProxyTiming
+ */
+export type ProxyTiming = Message<"ai.stigmer.agentic.agentexecution.v1.ProxyTiming"> & {
+    /**
+     * When the proxy received the runner's request.
+     *
+     * @generated from field: google.protobuf.Timestamp proxy_received_at = 1;
+     */
+    proxyReceivedAt?: Timestamp;
+    /**
+     * When the proxy sent the request to the upstream provider.
+     *
+     * @generated from field: google.protobuf.Timestamp upstream_request_started_at = 2;
+     */
+    upstreamRequestStartedAt?: Timestamp;
+    /**
+     * When the first response byte arrived from the provider.
+     *
+     * @generated from field: google.protobuf.Timestamp first_response_byte_at = 3;
+     */
+    firstResponseByteAt?: Timestamp;
+    /**
+     * When the last response byte arrived from the provider.
+     *
+     * @generated from field: google.protobuf.Timestamp last_response_byte_at = 4;
+     */
+    lastResponseByteAt?: Timestamp;
+    /**
+     * When the proxy completed its handling.
+     *
+     * @generated from field: google.protobuf.Timestamp proxy_completed_at = 5;
+     */
+    proxyCompletedAt?: Timestamp;
+    /**
+     * Derived durations in milliseconds.
+     *
+     * @generated from field: int64 upstream_ttfb_ms = 10;
+     */
+    upstreamTtfbMs: bigint;
+    /**
+     * @generated from field: int64 upstream_ttlb_ms = 11;
+     */
+    upstreamTtlbMs: bigint;
+    /**
+     * @generated from field: int64 stream_duration_ms = 12;
+     */
+    streamDurationMs: bigint;
+    /**
+     * @generated from field: int64 proxy_total_duration_ms = 13;
+     */
+    proxyTotalDurationMs: bigint;
+    /**
+     * Transfer sizes.
+     *
+     * @generated from field: int64 request_bytes = 20;
+     */
+    requestBytes: bigint;
+    /**
+     * @generated from field: int64 response_bytes = 21;
+     */
+    responseBytes: bigint;
+};
+/**
+ * Describes the message ai.stigmer.agentic.agentexecution.v1.ProxyTiming.
+ * Use `create(ProxyTimingSchema)` to create a new message.
+ */
+export declare const ProxyTimingSchema: GenMessage<ProxyTiming>;
+/**
+ * Links a usage record to its billing debit in the credit ledger.
  *
- * ## Cost Computation
+ * @generated from message ai.stigmer.agentic.agentexecution.v1.BillingLink
+ */
+export type BillingLink = Message<"ai.stigmer.agentic.agentexecution.v1.BillingLink"> & {
+    /**
+     * Current debit status.
+     *
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.BillingDebitStatus debit_status = 1;
+     */
+    debitStatus: BillingDebitStatus;
+    /**
+     * Execution reservation ID (from authorizeExecution).
+     *
+     * @generated from field: string reservation_id = 2;
+     */
+    reservationId: string;
+    /**
+     * Billing ledger debit ID (from reportLlmCallUsage).
+     *
+     * @generated from field: string billing_debit_id = 3;
+     */
+    billingDebitId: string;
+    /**
+     * When the debit was applied.
+     *
+     * @generated from field: google.protobuf.Timestamp debited_at = 4;
+     */
+    debitedAt?: Timestamp;
+    /**
+     * Operational visibility.
+     *
+     * @generated from field: int32 billing_attempt_count = 5;
+     */
+    billingAttemptCount: number;
+    /**
+     * @generated from field: string last_billing_error = 6;
+     */
+    lastBillingError: string;
+};
+/**
+ * Describes the message ai.stigmer.agentic.agentexecution.v1.BillingLink.
+ * Use `create(BillingLinkSchema)` to create a new message.
+ */
+export declare const BillingLinkSchema: GenMessage<BillingLink>;
+/**
+ * Immutable, billing-authoritative record for a single LLM API call.
  *
- * estimated_cost_usd is computed at execution time using pricing rates stamped
- * on each ModelUsage entry. This makes historical data self-contained — an
- * execution from January 2026 carries January 2026 rates even if prices change.
+ * Written by the proxy after each SSE stream completes (cloud mode) or by
+ * the runner directly (OSS mode). Stored in the `llm_call_usage_record`
+ * MongoDB collection, never embedded in the execution document.
  *
- * ## Model Tracking
+ * ## Lifecycle
+ * 1. Proxy observes SSE stream completion and extracts final usage
+ * 2. Server inserts LlmCallUsageRecord (idempotent by idempotency_key)
+ * 3. Server computes cost and triggers billing debit
+ * 4. billing_link.debit_status transitions to DEBITED
  *
- * primary_model reflects the configured model from ExecutionConfig.model_name.
- * When model is auto-selected or varies per call, this captures the first/main model used.
- * model_breakdown provides per-model detail when multiple models are used.
+ * ## Idempotency
+ * Unique on (execution_id, sequence, metering_source). Insert-only; never mutated
+ * except for billing_link status transitions.
  *
- * @generated from message ai.stigmer.agentic.agentexecution.v1.UsageMetrics
+ * @generated from message ai.stigmer.agentic.agentexecution.v1.LlmCallUsageRecord
  */
-export type UsageMetrics = Message<"ai.stigmer.agentic.agentexecution.v1.UsageMetrics"> & {
+export type LlmCallUsageRecord = Message<"ai.stigmer.agentic.agentexecution.v1.LlmCallUsageRecord"> & {
     /**
-     * Total input tokens (prompts) consumed across all LLM calls in this context.
-     * Includes all input token types: regular, cache writes, and cache reads.
-     * Accumulated from each on_chat_model_end event's usage_metadata.input_tokens.
+     * ─── Identity ───────────────────────────────────────────────────────────────
+     * Stable unique ID for this record (ULID).
      *
-     * @generated from field: int32 prompt_tokens = 1;
+     * @generated from field: string usage_record_id = 1;
      */
-    promptTokens: number;
+    usageRecordId: string;
     /**
-     * Total output tokens (completions) generated across all LLM calls in this context.
-     * Accumulated from each on_chat_model_end event's usage_metadata.output_tokens.
+     * Execution this call belongs to.
      *
-     * @generated from field: int32 completion_tokens = 2;
+     * @generated from field: string execution_id = 2;
      */
-    completionTokens: number;
+    executionId: string;
     /**
-     * Total tokens (prompt + completion). Convenience field for quick reference.
-     * Equals: prompt_tokens + completion_tokens
+     * For sub-agent rollups: root execution of the tree.
      *
-     * @generated from field: int32 total_tokens = 3;
+     * @generated from field: string root_execution_id = 3;
      */
-    totalTokens: number;
+    rootExecutionId: string;
     /**
-     * Number of LLM API calls made in this context.
-     * Incremented for each on_chat_model_end event.
-     * Useful for: average tokens per call, call frequency analysis.
+     * 1-based call ordering within the execution.
      *
-     * @generated from field: int32 llm_call_count = 4;
+     * @generated from field: int32 sequence = 4;
      */
-    llmCallCount: number;
+    sequence: number;
     /**
-     * Primary model used for this execution.
-     * Typically matches ExecutionConfig.model_name or the first model detected.
-     * Examples: "claude-sonnet-4-20250514", "gpt-4o", "gemini-1.5-pro"
+     * Deduplication key: execution_id + sequence + metering_source.
      *
-     * @generated from field: string primary_model = 5;
+     * @generated from field: string idempotency_key = 5;
      */
-    primaryModel: string;
+    idempotencyKey: string;
+    /**
+     * Hash of normalized payload for conflict detection on retries.
+     *
+     * @generated from field: string canonical_payload_hash = 6;
+     */
+    canonicalPayloadHash: string;
     /**
-     * Tokens written to provider prompt cache across all LLM calls.
-     * Anthropic: cache_creation_input_tokens (billed at 1.25x input rate)
-     * OpenAI: N/A (caching is automatic, no separate write metric)
-     * Zero if prompt caching is not active or not supported by provider.
+     * ─── Timestamps ─────────────────────────────────────────────────────────────
+     * When the proxy observed stream completion.
      *
-     * @generated from field: int32 cache_creation_tokens = 6;
+     * @generated from field: google.protobuf.Timestamp observed_at = 10;
      */
-    cacheCreationTokens: number;
+    observedAt?: Timestamp;
     /**
-     * Tokens read from provider prompt cache (cache hits) across all LLM calls.
-     * Anthropic: cache_read_input_tokens (billed at 0.1x input rate)
-     * OpenAI: prompt_tokens_details.cached_tokens (billed at 0.5x input rate)
-     * Zero if no cache hits occurred.
+     * When this record was created in the database.
      *
-     * @generated from field: int32 cache_read_tokens = 7;
+     * @generated from field: google.protobuf.Timestamp created_at = 11;
      */
-    cacheReadTokens: number;
+    createdAt?: Timestamp;
     /**
-     * Breakdown of token usage and cost per model.
-     * Each entry aggregates all LLM calls for a specific model within this context.
-     * Used for cost reports: "claude-sonnet-4 used 50K input tokens = $0.15."
+     * ─── Trust / Source ─────────────────────────────────────────────────────────
+     * Who wrote this record and at what trust level.
      *
-     * @generated from field: repeated ai.stigmer.agentic.agentexecution.v1.ModelUsage model_breakdown = 8;
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.UsageMeteringSource metering_source = 20;
+     */
+    meteringSource: UsageMeteringSource;
+    /**
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.UsageTrustLevel trust_level = 21;
+     */
+    trustLevel: UsageTrustLevel;
+    /**
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.UsageCompletionStatus usage_status = 22;
      */
-    modelBreakdown: ModelUsage[];
+    usageStatus: UsageCompletionStatus;
     /**
-     * Total estimated cost in USD for this execution context.
-     * Computed at execution time using the pricing rates stamped on each ModelUsage.
-     * Equals: sum(model_breakdown[].estimated_cost_usd)
+     * Whether this record should produce a billing debit.
      *
-     * Captured at write time because pricing changes over time — storing the
-     * computed cost makes historical data self-contained and accurate without
-     * needing a pricing history table.
+     * @generated from field: bool is_billable = 23;
+     */
+    isBillable: boolean;
+    /**
+     * ─── Provider / Request Metadata ────────────────────────────────────────────
      *
-     * @generated from field: double estimated_cost_usd = 9;
+     * @generated from field: string provider = 30;
+     */
+    provider: string;
+    /**
+     * @generated from field: string requested_model = 31;
+     */
+    requestedModel: string;
+    /**
+     * @generated from field: string resolved_model = 32;
+     */
+    resolvedModel: string;
+    /**
+     * @generated from field: string endpoint = 33;
+     */
+    endpoint: string;
+    /**
+     * @generated from field: bool streaming = 34;
+     */
+    streaming: boolean;
+    /**
+     * @generated from field: string service_tier = 35;
+     */
+    serviceTier: string;
+    /**
+     * @generated from field: string provider_request_id = 36;
+     */
+    providerRequestId: string;
+    /**
+     * @generated from field: string harness = 37;
+     */
+    harness: string;
+    /**
+     * @generated from field: int32 http_status_code = 40;
      */
-    estimatedCostUsd: number;
+    httpStatusCode: number;
     /**
-     * Total characters truncated from tool results during this execution.
-     * Non-zero indicates the agent received tool outputs that exceeded the
-     * configured max_tool_result_chars limit.
-     * Useful for: tuning truncation limits, identifying verbose tools.
+     * @generated from field: string finish_reason = 41;
+     */
+    finishReason: string;
+    /**
+     * @generated from field: string error_code = 42;
+     */
+    errorCode: string;
+    /**
+     * ─── Token Usage ────────────────────────────────────────────────────────────
      *
-     * @generated from field: int64 tool_result_chars_truncated = 10;
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.TokenUsage tokens = 50;
      */
-    toolResultCharsTruncated: bigint;
+    tokens?: TokenUsage;
     /**
-     * Per-call breakdown of every LLM API call in this context.
-     * Ordered chronologically (call 1, call 2, ...).
-     * Enables debugging expensive calls, tracking token growth across calls,
-     * and verifying cache hit patterns.
+     * ─── Cost ───────────────────────────────────────────────────────────────────
      *
-     * This is the detailed view — model_breakdown is the per-model aggregate.
-     * Both are populated: model_breakdown for reports, llm_calls for debugging.
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.CostStamp cost = 51;
+     */
+    cost?: CostStamp;
+    /**
+     * ─── Proxy Timing ──────────────────────────────────────────────────────────
+     * Present when metering_source is PROXY_PROVIDER_REPORTED.
      *
-     * @generated from field: repeated ai.stigmer.agentic.agentexecution.v1.LlmCallMetrics llm_calls = 11;
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.ProxyTiming proxy_timing = 60;
      */
-    llmCalls: LlmCallMetrics[];
+    proxyTiming?: ProxyTiming;
     /**
-     * Total wall-clock duration of the execution in milliseconds.
-     * Convenience field equivalent to (completed_at - started_at).
-     * Includes all time: LLM calls, tool execution, approval waits, overhead.
+     * ─── Raw Provider Usage ─────────────────────────────────────────────────────
+     * Compact JSON of provider's raw usage object for audit/debug.
      *
-     * @generated from field: int32 total_duration_ms = 12;
+     * @generated from field: string provider_usage_json = 70;
      */
-    totalDurationMs: number;
+    providerUsageJson: string;
     /**
-     * Time spent waiting for LLM responses in milliseconds.
-     * Sum of all LLM call durations (llm_calls[].duration_ms).
-     * Directly correlates with token generation cost.
+     * ─── Billing Handoff ────────────────────────────────────────────────────────
      *
-     * @generated from field: int32 llm_duration_ms = 13;
+     * @generated from field: ai.stigmer.agentic.agentexecution.v1.BillingLink billing = 80;
      */
-    llmDurationMs: number;
+    billing?: BillingLink;
     /**
-     * Time spent executing tools in milliseconds.
-     * Sum of all tool execution durations (tool start to tool complete).
-     * Represents compute resource usage in the sandbox.
+     * Organization that owns this execution.
      *
-     * @generated from field: int32 tool_duration_ms = 14;
+     * @generated from field: string org_id = 7;
      */
-    toolDurationMs: number;
+    orgId: string;
     /**
-     * Time spent waiting for user approval in milliseconds.
-     * Sum of all approval wait times (approval_requested_at to approval_decided_at).
-     * This is idle time — no LLM or compute resources consumed.
-     * Useful for excluding from "active execution cost" calculations.
+     * Session this execution belongs to.
      *
-     * @generated from field: int32 approval_wait_duration_ms = 15;
+     * @generated from field: string session_id = 8;
      */
-    approvalWaitDurationMs: number;
+    sessionId: string;
     /**
-     * Provider that served the primary model.
-     * Examples: "anthropic", "openai", "google", "ollama", "aws-bedrock"
-     * Matches the provider field in model_breakdown for the primary model.
-     * Useful for quick display without diving into model_breakdown.
+     * ─── Labels ─────────────────────────────────────────────────────────────────
+     * Custom metadata for filtering (e.g., agent_path, node_path).
      *
-     * @generated from field: string primary_provider = 16;
+     * @generated from field: map<string, string> labels = 90;
      */
-    primaryProvider: string;
+    labels: {
+        [key: string]: string;
+    };
 };
 /**
- * Describes the message ai.stigmer.agentic.agentexecution.v1.UsageMetrics.
- * Use `create(UsageMetricsSchema)` to create a new message.
+ * Describes the message ai.stigmer.agentic.agentexecution.v1.LlmCallUsageRecord.
+ * Use `create(LlmCallUsageRecordSchema)` to create a new message.
  */
-export declare const UsageMetricsSchema: GenMessage<UsageMetrics>;
+export declare const LlmCallUsageRecordSchema: GenMessage<LlmCallUsageRecord>;
 /**
- * ModelUsage captures token usage and cost for a single model within an execution.
- * Provides the per-model breakdown needed for accurate cost calculation when
- * multiple models are used (e.g., main model + summarization model).
- *
- * ## Token Bucket Design
- *
- * The four token fields (input_tokens, output_tokens, cache_creation_tokens,
- * cache_read_tokens) are disjoint, non-overlapping buckets. Every token falls
- * into exactly one bucket, making cost calculation straightforward:
- *   cost = (input_tokens * input_price / 1M)
- *        + (output_tokens * output_price / 1M)
- *        + (cache_creation_tokens * cache_creation_price / 1M)
- *        + (cache_read_tokens * cache_read_price / 1M)
- *
- * ## input_tokens Semantics
+ * Aggregated usage across a scope (execution, session, agent, or org).
  *
- * input_tokens represents regular (non-cached) input tokens only — tokens
- * billed at the standard input rate. This follows the Anthropic provider
- * convention where input_tokens excludes cache tokens.
+ * Used as the `total_usage` field in session and agent usage report responses.
+ * All token fields are int64 to support large aggregates (org-wide, multi-day).
+ * Cost fields are int64 micro-USD for precision.
  *
- * Total input for this model = input_tokens + cache_creation_tokens + cache_read_tokens
- *
- * ## Pricing Rates
+ * @generated from message ai.stigmer.agentic.agentexecution.v1.UsageReportAggregate
+ */
+export type UsageReportAggregate = Message<"ai.stigmer.agentic.agentexecution.v1.UsageReportAggregate"> & {
+    /**
+     * Token counts.
+     *
+     * @generated from field: int64 input_tokens = 1;
+     */
+    inputTokens: bigint;
+    /**
+     * @generated from field: int64 output_tokens = 2;
+     */
+    outputTokens: bigint;
+    /**
+     * @generated from field: int64 total_tokens = 3;
+     */
+    totalTokens: bigint;
+    /**
+     * @generated from field: int64 cache_creation_input_tokens = 4;
+     */
+    cacheCreationInputTokens: bigint;
+    /**
+     * @generated from field: int64 cache_read_input_tokens = 5;
+     */
+    cacheReadInputTokens: bigint;
+    /**
+     * @generated from field: int64 reasoning_tokens = 6;
+     */
+    reasoningTokens: bigint;
+    /**
+     * Number of LLM API calls in this aggregate.
+     *
+     * @generated from field: int32 llm_call_count = 10;
+     */
+    llmCallCount: number;
+    /**
+     * Cost in micro-USD (1 USD = 1,000,000 micros).
+     * billable = what the customer pays (with markup).
+     * provider = what Stigmer pays the LLM provider (raw).
+     *
+     * @generated from field: int64 billable_cost_micros = 20;
+     */
+    billableCostMicros: bigint;
+    /**
+     * @generated from field: int64 provider_cost_micros = 21;
+     */
+    providerCostMicros: bigint;
+    /**
+     * Primary model and provider (most-used by call count).
+     *
+     * @generated from field: string primary_model = 30;
+     */
+    primaryModel: string;
+    /**
+     * @generated from field: string primary_provider = 31;
+     */
+    primaryProvider: string;
+};
+/**
+ * Describes the message ai.stigmer.agentic.agentexecution.v1.UsageReportAggregate.
+ * Use `create(UsageReportAggregateSchema)` to create a new message.
+ */
+export declare const UsageReportAggregateSchema: GenMessage<UsageReportAggregate>;
+/**
+ * Per-model usage breakdown within a report scope.
  *
- * Pricing fields are stamped at execution time from the Model Pricing Registry.
- * This makes historical data self-contained: an execution from January 2026
- * carries January 2026 rates even if provider pricing changes later.
+ * Groups token counts and cost by (model, provider) pair. Used in
+ * model_breakdown repeated fields on report responses.
  *
  * @generated from message ai.stigmer.agentic.agentexecution.v1.ModelUsage
  */
 export type ModelUsage = Message<"ai.stigmer.agentic.agentexecution.v1.ModelUsage"> & {
     /**
-     * Model identifier as returned by the provider.
-     * Example: "claude-sonnet-4-20250514", "gpt-4o-2024-08-06"
+     * Model identifier as resolved by the provider.
      *
      * @generated from field: string model = 1;
      */
     model: string;
     /**
-     * Provider identifier. Determines which pricing table to use.
-     * Examples: "anthropic", "openai", "google", "ollama", "aws-bedrock"
+     * LLM provider (e.g., "openai", "anthropic", "cursor").
      *
      * @generated from field: string provider = 2;
      */
     provider: string;
     /**
-     * Regular input tokens (non-cached) billed at the standard input rate.
-     * Anthropic: input_tokens (excludes cache tokens)
-     * OpenAI: prompt_tokens - cached_tokens
+     * Token counts for this model.
      *
-     * Total input = input_tokens + cache_creation_tokens + cache_read_tokens
+     * @generated from field: int64 input_tokens = 3;
+     */
+    inputTokens: bigint;
+    /**
+     * @generated from field: int64 output_tokens = 4;
+     */
+    outputTokens: bigint;
+    /**
+     * @generated from field: int64 cache_creation_input_tokens = 5;
+     */
+    cacheCreationInputTokens: bigint;
+    /**
+     * @generated from field: int64 cache_read_input_tokens = 6;
+     */
+    cacheReadInputTokens: bigint;
+    /**
+     * Number of LLM API calls to this model.
      *
-     * @generated from field: int32 input_tokens = 3;
+     * @generated from field: int32 call_count = 7;
      */
-    inputTokens: number;
+    callCount: number;
     /**
-     * Output tokens generated by the model.
+     * Cost in micro-USD for this model.
      *
-     * @generated from field: int32 output_tokens = 4;
+     * @generated from field: int64 billable_cost_micros = 8;
      */
-    outputTokens: number;
+    billableCostMicros: bigint;
     /**
-     * Tokens written to prompt cache during this execution.
-     * Anthropic: cache_creation_input_tokens (billed at 1.25x input rate)
-     * OpenAI: N/A (caching is automatic, no separate write metric)
+     * @generated from field: int64 provider_cost_micros = 9;
+     */
+    providerCostMicros: bigint;
+};
+/**
+ * Describes the message ai.stigmer.agentic.agentexecution.v1.ModelUsage.
+ * Use `create(ModelUsageSchema)` to create a new message.
+ */
+export declare const ModelUsageSchema: GenMessage<ModelUsage>;
+/**
+ * Identifies who wrote the usage record and under what conditions.
+ *
+ * @generated from enum ai.stigmer.agentic.agentexecution.v1.UsageMeteringSource
+ */
+export declare enum UsageMeteringSource {
+    /**
+     * @generated from enum value: USAGE_METERING_SOURCE_UNSPECIFIED = 0;
+     */
+    UNSPECIFIED = 0,
+    /**
+     * Cloud mode: Stigmer proxy observed provider-reported final usage from SSE stream.
      *
-     * @generated from field: int32 cache_creation_tokens = 5;
+     * @generated from enum value: USAGE_METERING_SOURCE_PROXY_PROVIDER_REPORTED = 1;
      */
-    cacheCreationTokens: number;
+    PROXY_PROVIDER_REPORTED = 1,
     /**
-     * Tokens read from prompt cache (cache hits).
-     * Anthropic: cache_read_input_tokens (billed at 0.1x input rate)
-     * OpenAI: prompt_tokens_details.cached_tokens (billed at 0.5x input rate)
+     * OSS/BYOK mode: runner reports provider response usage directly.
      *
-     * @generated from field: int32 cache_read_tokens = 6;
+     * @generated from enum value: USAGE_METERING_SOURCE_RUNNER_PROVIDER_REPORTED_OSS = 2;
      */
-    cacheReadTokens: number;
+    RUNNER_PROVIDER_REPORTED_OSS = 2,
     /**
-     * Number of LLM API calls made with this model.
+     * Estimate from tokenizer/preflight/local accounting. Never billing-authoritative in cloud mode.
      *
-     * @generated from field: int32 call_count = 7;
+     * @generated from enum value: USAGE_METERING_SOURCE_ESTIMATED = 3;
      */
-    callCount: number;
+    ESTIMATED = 3,
     /**
-     * Input token price per million tokens (USD) at execution time.
+     * Reconciliation from provider admin usage/cost reports.
      *
-     * @generated from field: double input_price_per_million = 8;
+     * @generated from enum value: USAGE_METERING_SOURCE_PROVIDER_ADMIN_RECONCILED = 4;
      */
-    inputPricePerMillion: number;
+    PROVIDER_ADMIN_RECONCILED = 4,
     /**
-     * Output token price per million tokens (USD) at execution time.
+     * Human/system manual adjustment.
      *
-     * @generated from field: double output_price_per_million = 9;
+     * @generated from enum value: USAGE_METERING_SOURCE_MANUAL_ADJUSTMENT = 5;
+     */
+    MANUAL_ADJUSTMENT = 5
+}
+/**
+ * Describes the enum ai.stigmer.agentic.agentexecution.v1.UsageMeteringSource.
+ */
+export declare const UsageMeteringSourceSchema: GenEnum<UsageMeteringSource>;
+/**
+ * Declares what the record can be used for.
+ *
+ * @generated from enum ai.stigmer.agentic.agentexecution.v1.UsageTrustLevel
+ */
+export declare enum UsageTrustLevel {
+    /**
+     * @generated from enum value: USAGE_TRUST_LEVEL_UNSPECIFIED = 0;
      */
-    outputPricePerMillion: number;
+    UNSPECIFIED = 0,
     /**
-     * Cache write token price per million tokens (USD) at execution time.
-     * Anthropic: 1.25x of input_price. OpenAI: same as input.
+     * Billing-authoritative: safe to debit credits from.
      *
-     * @generated from field: double cache_creation_price_per_million = 10;
+     * @generated from enum value: USAGE_TRUST_LEVEL_BILLING_AUTHORITY = 1;
      */
-    cacheCreationPricePerMillion: number;
+    BILLING_AUTHORITY = 1,
     /**
-     * Cache read token price per million tokens (USD) at execution time.
-     * Anthropic: 0.1x of input_price. OpenAI: 0.5x of input_price.
+     * Server-observed but not billing (e.g., proxy timing).
      *
-     * @generated from field: double cache_read_price_per_million = 11;
+     * @generated from enum value: USAGE_TRUST_LEVEL_SERVER_OBSERVED = 2;
      */
-    cacheReadPricePerMillion: number;
+    SERVER_OBSERVED = 2,
     /**
-     * Computed cost in USD for this model's usage in this execution.
-     * = (input_tokens * input_price / 1M)
-     *   + (output_tokens * output_price / 1M)
-     *   + (cache_creation_tokens * cache_creation_price / 1M)
-     *   + (cache_read_tokens * cache_read_price / 1M)
+     * Display-only: runner-reported, never used for billing.
      *
-     * @generated from field: double estimated_cost_usd = 12;
+     * @generated from enum value: USAGE_TRUST_LEVEL_DISPLAY_ONLY = 3;
      */
-    estimatedCostUsd: number;
-};
+    DISPLAY_ONLY = 3
+}
 /**
- * Describes the message ai.stigmer.agentic.agentexecution.v1.ModelUsage.
- * Use `create(ModelUsageSchema)` to create a new message.
+ * Describes the enum ai.stigmer.agentic.agentexecution.v1.UsageTrustLevel.
  */
-export declare const ModelUsageSchema: GenMessage<ModelUsage>;
+export declare const UsageTrustLevelSchema: GenEnum<UsageTrustLevel>;
 /**
- * Metrics for a single LLM API call during execution.
- * Provides per-call granularity for debugging, optimization, and cost attribution.
- *
- * One entry is created for each on_chat_model_end event.
- * Ordered chronologically — the Nth entry corresponds to the Nth LLM call.
- *
- * ## Cache Verification
- *
- * Especially valuable for verifying prompt caching: you expect call 1 to be a
- * cache miss (or cache write), and calls 2+ to have high cache_read_tokens.
- * If they don't, something is wrong with cache breakpoint placement.
+ * Status of provider-reported token usage extraction.
  *
- * ## Size Consideration
- *
- * A typical execution has 3-15 LLM calls. Each LlmCallMetrics is ~10 fields /
- * ~80 bytes. Even a 50-call execution adds only ~4KB — negligible compared to
- * the messages[] and tool_calls[] arrays which carry full content strings.
+ * @generated from enum ai.stigmer.agentic.agentexecution.v1.UsageCompletionStatus
+ */
+export declare enum UsageCompletionStatus {
+    /**
+     * @generated from enum value: USAGE_COMPLETION_STATUS_UNSPECIFIED = 0;
+     */
+    UNSPECIFIED = 0,
+    /**
+     * Provider reported complete final usage (normal case).
+     *
+     * @generated from enum value: USAGE_COMPLETION_STATUS_COMPLETE = 1;
+     */
+    COMPLETE = 1,
+    /**
+     * Stream ended without final usage event (e.g., connection interrupted).
+     *
+     * @generated from enum value: USAGE_COMPLETION_STATUS_STREAM_INTERRUPTED = 2;
+     */
+    STREAM_INTERRUPTED = 2,
+    /**
+     * Provider returned error but included partial usage data.
+     *
+     * @generated from enum value: USAGE_COMPLETION_STATUS_PROVIDER_ERROR_WITH_USAGE = 3;
+     */
+    PROVIDER_ERROR_WITH_USAGE = 3,
+    /**
+     * Provider returned error with no usage data.
+     *
+     * @generated from enum value: USAGE_COMPLETION_STATUS_PROVIDER_ERROR_NO_USAGE = 4;
+     */
+    PROVIDER_ERROR_NO_USAGE = 4,
+    /**
+     * Usage is estimated (tokenizer/preflight), not provider-reported.
+     *
+     * @generated from enum value: USAGE_COMPLETION_STATUS_ESTIMATED_ONLY = 5;
+     */
+    ESTIMATED_ONLY = 5,
+    /**
+     * Reconciled against provider admin reports.
+     *
+     * @generated from enum value: USAGE_COMPLETION_STATUS_RECONCILED = 6;
+     */
+    RECONCILED = 6,
+    /**
+     * Conflict: retry with different payload detected.
+     *
+     * @generated from enum value: USAGE_COMPLETION_STATUS_CONFLICT = 7;
+     */
+    CONFLICT = 7
+}
+/**
+ * Describes the enum ai.stigmer.agentic.agentexecution.v1.UsageCompletionStatus.
+ */
+export declare const UsageCompletionStatusSchema: GenEnum<UsageCompletionStatus>;
+/**
+ * Status of the billing debit for a usage record.
  *
- * @generated from message ai.stigmer.agentic.agentexecution.v1.LlmCallMetrics
+ * @generated from enum ai.stigmer.agentic.agentexecution.v1.BillingDebitStatus
  */
-export type LlmCallMetrics = Message<"ai.stigmer.agentic.agentexecution.v1.LlmCallMetrics"> & {
+export declare enum BillingDebitStatus {
     /**
-     * Sequence number of this LLM call within the execution (1-based).
+     * @generated from enum value: BILLING_DEBIT_STATUS_UNSPECIFIED = 0;
+     */
+    UNSPECIFIED = 0,
+    /**
+     * No billing applicable (e.g., OSS mode, $0 cost, incomplete usage).
      *
-     * @generated from field: int32 sequence = 1;
+     * @generated from enum value: BILLING_DEBIT_STATUS_NOT_APPLICABLE = 1;
      */
-    sequence: number;
+    NOT_APPLICABLE = 1,
     /**
-     * Model used for this specific call.
-     * May differ from primary_model if model fallback or routing occurred.
+     * Debit pending (usage written, billing not yet attempted).
      *
-     * @generated from field: string model = 2;
+     * @generated from enum value: BILLING_DEBIT_STATUS_PENDING = 2;
      */
-    model: string;
+    PENDING = 2,
     /**
-     * Provider for this call.
+     * Credits successfully debited.
      *
-     * @generated from field: string provider = 3;
+     * @generated from enum value: BILLING_DEBIT_STATUS_DEBITED = 3;
      */
-    provider: string;
+    DEBITED = 3,
     /**
-     * Input tokens for this call (total prompt size sent to the model).
+     * Debit failed but retryable (transient error).
      *
-     * @generated from field: int32 input_tokens = 4;
+     * @generated from enum value: BILLING_DEBIT_STATUS_FAILED_RETRYABLE = 4;
      */
-    inputTokens: number;
+    FAILED_RETRYABLE = 4,
     /**
-     * Output tokens generated by the model in this call.
+     * Debit failed terminally (e.g., no reservation found).
      *
-     * @generated from field: int32 output_tokens = 5;
+     * @generated from enum value: BILLING_DEBIT_STATUS_FAILED_TERMINAL = 5;
      */
-    outputTokens: number;
+    FAILED_TERMINAL = 5,
     /**
-     * Cache tokens written during this call.
+     * Conflict detected (duplicate with different payload).
      *
-     * @generated from field: int32 cache_creation_tokens = 6;
+     * @generated from enum value: BILLING_DEBIT_STATUS_CONFLICT = 6;
      */
-    cacheCreationTokens: number;
+    CONFLICT = 6
+}
+/**
+ * Describes the enum ai.stigmer.agentic.agentexecution.v1.BillingDebitStatus.
+ */
+export declare const BillingDebitStatusSchema: GenEnum<BillingDebitStatus>;
+/**
+ * Status of cost calculation for a usage record.
+ *
+ * @generated from enum ai.stigmer.agentic.agentexecution.v1.CostCalculationStatus
+ */
+export declare enum CostCalculationStatus {
+    /**
+     * @generated from enum value: COST_CALCULATION_STATUS_UNSPECIFIED = 0;
+     */
+    UNSPECIFIED = 0,
     /**
-     * Cache tokens read (cache hits) during this call.
+     * Cost computed from pricing registry at write time.
      *
-     * @generated from field: int32 cache_read_tokens = 7;
+     * @generated from enum value: COST_CALCULATION_STATUS_COMPUTED = 1;
      */
-    cacheReadTokens: number;
+    COMPUTED = 1,
     /**
-     * Computed cost in USD for this single call.
+     * Cost is an estimate (model not found in registry).
      *
-     * @generated from field: double estimated_cost_usd = 8;
+     * @generated from enum value: COST_CALCULATION_STATUS_ESTIMATED = 2;
      */
-    estimatedCostUsd: number;
+    ESTIMATED = 2,
     /**
-     * Wall-clock duration of this LLM call in milliseconds.
-     * From request sent to response complete.
+     * Model pricing not found; cost is zero.
      *
-     * @generated from field: int32 duration_ms = 9;
+     * @generated from enum value: COST_CALCULATION_STATUS_PRICE_NOT_FOUND = 3;
      */
-    durationMs: number;
+    PRICE_NOT_FOUND = 3,
     /**
-     * ISO 8601 timestamp when this LLM call started.
+     * Cost was reconciled against provider reports.
      *
-     * @generated from field: string timestamp = 10;
+     * @generated from enum value: COST_CALCULATION_STATUS_RECONCILED = 4;
      */
-    timestamp: string;
+    RECONCILED = 4,
     /**
-     * Total tokens involved in this call. Convenience field for quick reference.
-     * Equals: input_tokens + output_tokens + cache_creation_tokens + cache_read_tokens
-     * Consistent with UsageMetrics.total_tokens (prompt_tokens + completion_tokens).
+     * Cost was manually adjusted.
      *
-     * @generated from field: int32 total_tokens = 11;
+     * @generated from enum value: COST_CALCULATION_STATUS_MANUAL_ADJUSTED = 5;
      */
-    totalTokens: number;
-};
+    MANUAL_ADJUSTED = 5
+}
 /**
- * Describes the message ai.stigmer.agentic.agentexecution.v1.LlmCallMetrics.
- * Use `create(LlmCallMetricsSchema)` to create a new message.
+ * Describes the enum ai.stigmer.agentic.agentexecution.v1.CostCalculationStatus.
  */
-export declare const LlmCallMetricsSchema: GenMessage<LlmCallMetrics>;
+export declare const CostCalculationStatusSchema: GenEnum<CostCalculationStatus>;