npm - @littlebearapps/platform-admin-sdk - Versions diffs - 1.0.0 - Mend

@littlebearapps/platform-admin-sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/templates/shared/workers/lib/usage/queue/cost-budget-enforcement.ts ADDED Viewed

@@ -0,0 +1,128 @@
+/**
+ * Cost Budget Enforcement
+ *
+ * Cost-based circuit breaker enforcement for platform-usage queue processing.
+ * Implements rolling window cost accumulation and budget checking.
+ *
+ * Part of the real-time cost tracking feature for Platform SDK.
+ *
+ * Key Concepts:
+ * - Cost budgets stored in KV: CONFIG:FEATURE:{key}:COST_BUDGET
+ * - Accumulated costs stored in KV: STATE:COST:{key}:ACCUMULATED
+ * - Status stored in KV: CONFIG:FEATURE:{key}:STATUS (same key as resource CB)
+ */
+import type { Env } from '../shared';
+import { createLoggerFromEnv } from '@littlebearapps/platform-consumer-sdk';
+/**
+ * Cost budget configuration for a feature.
+ * Stored in KV at CONFIG:FEATURE:{key}:COST_BUDGET.
+ */
+export interface CostBudgetConfig {
+  /** Daily cost limit in USD */
+  daily_limit_usd: number;
+  /** Optional alert threshold percentage (e.g., 0.8 = 80%) */
+  alert_threshold_pct?: number;
+}
+/**
+ * Accumulated cost state stored in KV.
+ */
+interface AccumulatedCostState {
+  /** Total cost accumulated in window */
+  cost: number;
+  /** Window start timestamp in milliseconds */
+  windowStart: number;
+}
+/**
+ * Check and update cost budget status for a feature.
+ * Uses a rolling 24-hour window for cost accumulation.
+ *
+ * If total cost exceeds the configured daily limit, trips the circuit breaker
+ * using the same STATUS key as resource-based circuit breakers.
+ *
+ * @param featureKey - Feature identifier (e.g., 'my-app:scanner:harvest')
+ * @param costIncrement - Cost in USD to add to accumulator
+ * @param env - Worker environment
+ */
+export async function checkAndUpdateCostBudgetStatus(
+  featureKey: string,
+  costIncrement: number,
+  env: Env
+): Promise<void> {
+  const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:cost-budget');
+  const budgetKey = `CONFIG:FEATURE:${featureKey}:COST_BUDGET`;
+  const statusKey = `CONFIG:FEATURE:${featureKey}:STATUS`;
+  const accumulatorKey = `STATE:COST:${featureKey}:ACCUMULATED`;
+  try {
+    // Check if cost budget is configured for this feature
+    const budgetJson = await env.PLATFORM_CACHE.get(budgetKey);
+    if (!budgetJson) {
+      // No cost budget configured - skip checking
+      return;
+    }
+    const budget = JSON.parse(budgetJson) as CostBudgetConfig;
+    // Rolling 24-hour window
+    const windowMs = 24 * 60 * 60 * 1000;
+    const windowStart = Date.now() - windowMs;
+    // Get current accumulated cost
+    let totalCost = costIncrement;
+    let existingWindowStart = Date.now();
+    const stored = await env.PLATFORM_CACHE.get(accumulatorKey);
+    if (stored) {
+      const data = JSON.parse(stored) as AccumulatedCostState;
+      // Only add to existing cost if within the same window
+      if (data.windowStart > windowStart) {
+        // Use fixed precision to prevent floating point accumulation errors
+        totalCost = Number((data.cost + costIncrement).toFixed(6));
+        existingWindowStart = data.windowStart;
+      }
+    }
+    // Store updated cost with 25-hour TTL (allows for window overlap)
+    // Round to 6 decimal places to prevent floating point run-on
+    await env.PLATFORM_CACHE.put(
+      accumulatorKey,
+      JSON.stringify({ cost: Number(totalCost.toFixed(6)), windowStart: existingWindowStart }),
+      { expirationTtl: 90000 } // 25 hours
+    );
+    // Check budget violation
+    if (totalCost > budget.daily_limit_usd) {
+      const reason = `cost_usd=${totalCost.toFixed(4)}>${budget.daily_limit_usd}`;
+      // Trip the circuit breaker in KV
+      await env.PLATFORM_CACHE.put(statusKey, 'STOP');
+      // Log to D1 for historical tracking
+      try {
+        await env.PLATFORM_DB.prepare(
+          `INSERT INTO feature_circuit_breaker_events
+           (id, feature_key, event_type, reason, violated_resource, current_value, budget_limit, created_at)
+           VALUES (?1, ?2, 'trip', ?3, 'cost_usd', ?4, ?5, unixepoch())`
+        )
+          .bind(crypto.randomUUID(), featureKey, reason, totalCost, budget.daily_limit_usd)
+          .run();
+      } catch (d1Error) {
+        // D1 logging failure should not prevent KV trip
+        log.error(`Failed to log cost CB event to D1 for ${featureKey}`, d1Error);
+      }
+      log.warn(`Cost CB tripped: ${featureKey}`, {
+        totalCost: totalCost.toFixed(4),
+        limit: budget.daily_limit_usd,
+      });
+    }
+  } catch (error) {
+    // Cost budget check failures should not fail the telemetry write
+    log.error(`Cost budget check failed for ${featureKey}`, error);
+  }
+}

package/templates/shared/workers/lib/usage/queue/cost-calculator.ts ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * Cost Calculator
+ *
+ * Calculates CF resource cost from telemetry metrics.
+ * Uses pricing tiers from workers/lib/costs.ts for consistency.
+ *
+ * Part of the real-time cost tracking feature for Platform SDK.
+ */
+import type { FeatureMetrics } from '@littlebearapps/platform-consumer-sdk';
+import { PRICING_TIERS } from '@littlebearapps/platform-consumer-sdk';
+/**
+ * Calculate CF resource cost from telemetry metrics.
+ * Returns cost in USD based on current pricing tiers.
+ *
+ * @param metrics - Feature metrics from telemetry message
+ * @returns Total cost in USD for CF resources
+ */
+export function calculateCFCostFromMetrics(metrics: FeatureMetrics): number {
+  let cost = 0;
+  // D1: $0.001/billion reads, $1.00/million writes
+  if (metrics.d1RowsRead) {
+    cost += (metrics.d1RowsRead / 1e9) * PRICING_TIERS.d1.rowsReadPerBillion;
+  }
+  if (metrics.d1RowsWritten) {
+    cost += (metrics.d1RowsWritten / 1e6) * PRICING_TIERS.d1.rowsWrittenPerMillion;
+  }
+  // KV: $0.50/million reads, $5.00/million writes, $5.00/million deletes, $5.00/million lists
+  if (metrics.kvReads) {
+    cost += (metrics.kvReads / 1e6) * PRICING_TIERS.kv.readsPerMillion;
+  }
+  if (metrics.kvWrites) {
+    cost += (metrics.kvWrites / 1e6) * PRICING_TIERS.kv.writesPerMillion;
+  }
+  if (metrics.kvDeletes) {
+    cost += (metrics.kvDeletes / 1e6) * PRICING_TIERS.kv.deletesPerMillion;
+  }
+  if (metrics.kvLists) {
+    cost += (metrics.kvLists / 1e6) * PRICING_TIERS.kv.listsPerMillion;
+  }
+  // R2: $4.50/million Class A, $0.36/million Class B
+  if (metrics.r2ClassA) {
+    cost += (metrics.r2ClassA / 1e6) * PRICING_TIERS.r2.classAPerMillion;
+  }
+  if (metrics.r2ClassB) {
+    cost += (metrics.r2ClassB / 1e6) * PRICING_TIERS.r2.classBPerMillion;
+  }
+  // Workers AI: $0.011/1000 neurons
+  if (metrics.aiNeurons) {
+    cost += (metrics.aiNeurons / 1000) * PRICING_TIERS.workersAI.neuronsPerThousand;
+  }
+  // Durable Objects: $0.15/million requests, $12.50/million GB-seconds
+  if (metrics.doRequests) {
+    cost += (metrics.doRequests / 1e6) * PRICING_TIERS.durableObjects.requestsPerMillion;
+  }
+  if (metrics.doGbSeconds) {
+    cost += (metrics.doGbSeconds / 1e6) * PRICING_TIERS.durableObjects.gbSecondsPerMillion;
+  }
+  // Vectorize: $0.01/million queried dimensions
+  if (metrics.vectorizeQueries) {
+    cost += (metrics.vectorizeQueries / 1e6) * PRICING_TIERS.vectorize.queriedDimensionsPerMillion;
+  }
+  // Queues: $0.40/million messages
+  if (metrics.queueMessages) {
+    cost += (metrics.queueMessages / 1e6) * PRICING_TIERS.queues.messagesPerMillion;
+  }
+  return cost;
+}

package/templates/shared/workers/lib/usage/queue/dlq-handler.ts ADDED Viewed

@@ -0,0 +1,161 @@
+/**
+ * Dead Letter Queue Handler
+ *
+ * Consumes messages from the platform-telemetry-dlq queue and persists them
+ * to D1 for admin visibility, debugging, and replay.
+ *
+ * Messages land here after max_retries (5) failures in the main queue consumer.
+ */
+import type { MessageBatch } from '@cloudflare/workers-types';
+import type { Env, TelemetryMessage } from '../shared';
+import { createLoggerFromEnv } from '@littlebearapps/platform-consumer-sdk';
+import { categoriseError } from '@littlebearapps/platform-consumer-sdk';
+// =============================================================================
+// DLQ CONSTANTS
+// =============================================================================
+const MAX_ERROR_MESSAGE_LENGTH = 1000;
+const MAX_PAYLOAD_LENGTH = 10000;
+// =============================================================================
+// DLQ MESSAGE PERSISTENCE
+// =============================================================================
+/**
+ * Persist a DLQ message to D1 for admin visibility.
+ */
+async function persistDLQMessage(
+  telemetry: TelemetryMessage,
+  errorMessage: string | null,
+  errorCategory: string,
+  errorFingerprint: string,
+  retryCount: number,
+  env: Env
+): Promise<void> {
+  const payload = JSON.stringify(telemetry);
+  const truncatedPayload =
+    payload.length > MAX_PAYLOAD_LENGTH ? payload.slice(0, MAX_PAYLOAD_LENGTH) + '...' : payload;
+  const truncatedError = errorMessage?.slice(0, MAX_ERROR_MESSAGE_LENGTH) || null;
+  await env.PLATFORM_DB.prepare(
+    `INSERT INTO dead_letter_queue (
+      id, message_payload, feature_key, project, category, feature,
+      error_message, error_category, error_fingerprint, retry_count,
+      correlation_id, original_timestamp, status, created_at, updated_at
+    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', unixepoch(), unixepoch())`
+  )
+    .bind(
+      crypto.randomUUID(),
+      truncatedPayload,
+      telemetry.feature_key,
+      telemetry.project,
+      telemetry.category,
+      telemetry.feature,
+      truncatedError,
+      errorCategory,
+      errorFingerprint,
+      retryCount,
+      telemetry.correlation_id || null,
+      telemetry.timestamp
+    )
+    .run();
+}
+// =============================================================================
+// DLQ QUEUE HANDLER
+// =============================================================================
+/**
+ * Handle messages from the Dead Letter Queue.
+ *
+ * Messages arrive here after exhausting retries in the main queue.
+ * We persist them to D1 for visibility and always ack to prevent re-delivery.
+ */
+async function handleDLQ(batch: MessageBatch<TelemetryMessage>, env: Env): Promise<void> {
+  const log = createLoggerFromEnv(env, 'platform-usage', 'platform:usage:dlq');
+  log.warn('Processing DLQ batch', { messages: batch.messages.length });
+  let successCount = 0;
+  let errorCount = 0;
+  for (const message of batch.messages) {
+    try {
+      const telemetry = message.body;
+      // Extract error info from the message metadata if available
+      // Cloudflare doesn't expose retry count directly, so we use max_retries setting
+      const retryCount = 5; // Matches max_retries in wrangler config
+      // Since we don't have the original error, categorise based on telemetry content
+      const errorCategory = telemetry.error_category || 'INTERNAL';
+      const errorFingerprint = `dlq:${telemetry.feature_key}:${errorCategory}`;
+      // Persist to D1
+      await persistDLQMessage(
+        telemetry,
+        'Message exhausted retries in telemetry queue',
+        errorCategory,
+        errorFingerprint,
+        retryCount,
+        env
+      );
+      log.info('DLQ message persisted', {
+        feature_key: telemetry.feature_key,
+        project: telemetry.project,
+        error_category: errorCategory,
+        correlation_id: telemetry.correlation_id,
+      });
+      message.ack();
+      successCount++;
+    } catch (error) {
+      // Even if D1 write fails, ack the message to prevent infinite loop
+      // Log the error for investigation
+      const errorCategory = categoriseError(error);
+      log.error('Failed to persist DLQ message, acknowledging anyway', error, {
+        feature_key: message.body.feature_key,
+        error_category: errorCategory,
+      });
+      message.ack();
+      errorCount++;
+    }
+  }
+  log.info('DLQ batch complete', {
+    persisted: successCount,
+    failed: errorCount,
+    total: batch.messages.length,
+  });
+  // Send alert if DLQ is receiving messages (indicates systemic issue)
+  if (batch.messages.length > 0 && env.ALERT_ROUTER) {
+    try {
+      await env.ALERT_ROUTER.fetch('https://alert-router/errors', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          type: 'p1_digest',
+          feature_key: 'platform:usage:dlq',
+          project: 'platform',
+          category: 'usage',
+          feature: 'dlq',
+          total_errors: batch.messages.length,
+          distinct_types: new Set(batch.messages.map((m) => m.body.error_category || 'INTERNAL'))
+            .size,
+        }),
+      });
+    } catch (alertError) {
+      log.error('Failed to send DLQ alert', alertError);
+    }
+  }
+}
+// =============================================================================
+// EXPORTS
+// =============================================================================
+export { handleDLQ, persistDLQMessage };

package/templates/shared/workers/lib/usage/queue/index.ts ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Queue Module Exports
+ *
+ * Barrel export for all queue processing modules.
+ * These handle telemetry queue consumption, budget enforcement, and circuit breakers.
+ */
+// Telemetry processing (queue consumer, heartbeat handling)
+export * from './telemetry-processor';
+// Dead Letter Queue handler
+export * from './dlq-handler';
+// Budget enforcement (circuit breakers, status tracking)
+export * from './budget-enforcement';
+// Cost calculation and enforcement
+export * from './cost-calculator';
+export * from './cost-budget-enforcement';