@cat-factory/app 0.39.0 → 0.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,13 @@ const headroomTone = computed(() => headroomColor(headroom.value, m.value.trunca
43
43
  <span class="tabular-nums text-slate-400" title="Prompt / completion tokens">
44
44
  {{ formatTokens(m.promptTokens) }}↑ {{ formatTokens(m.completionTokens) }}↓
45
45
  </span>
46
+ <span
47
+ v-if="(m.cachedPromptTokens ?? 0) > 0"
48
+ class="tabular-nums text-emerald-400/80"
49
+ title="Prompt tokens served from the provider's cache"
50
+ >
51
+ ({{ formatTokens(m.cachedPromptTokens ?? 0) }} cached)
52
+ </span>
46
53
  <div class="ml-auto flex items-center gap-1">
47
54
  <UBadge v-if="m.errors > 0" color="error" variant="subtle" size="sm">
48
55
  {{ m.errors }} error{{ m.errors === 1 ? '' : 's' }}
@@ -34,6 +34,13 @@ interface ProviderMeta {
34
34
  label: string
35
35
  url: string
36
36
  steps: string[]
37
+ /**
38
+ * Whether this provider caches the re-sent prompt prefix. Connecting a key here
39
+ * upgrades its models to the caching `direct` flavour, so a long agentic run stops
40
+ * re-billing its whole growing prompt every turn. Mirrors the backend
41
+ * `providerCachePolicy`; the gateways are pass-through (no caching we rely on yet).
42
+ */
43
+ caches?: boolean
37
44
  }
38
45
 
39
46
  /** Direct vendors: the key reaches that one vendor's own endpoint. */
@@ -46,6 +53,7 @@ const DIRECT_PROVIDERS: ProviderMeta[] = [
46
53
  'Open platform.openai.com → API keys and create a new secret key.',
47
54
  'Copy the key (starts with sk-…); it is shown only once.',
48
55
  ],
56
+ caches: true,
49
57
  },
50
58
  {
51
59
  value: 'anthropic',
@@ -55,6 +63,7 @@ const DIRECT_PROVIDERS: ProviderMeta[] = [
55
63
  'Open console.anthropic.com → Settings → API Keys and create a key.',
56
64
  'Copy the key (starts with sk-ant-…).',
57
65
  ],
66
+ caches: true,
58
67
  },
59
68
  {
60
69
  value: 'qwen',
@@ -64,6 +73,7 @@ const DIRECT_PROVIDERS: ProviderMeta[] = [
64
73
  'Open the DashScope console (international) → API-KEY and create a key.',
65
74
  'Copy the key; it authenticates the OpenAI-compatible Qwen endpoint.',
66
75
  ],
76
+ caches: true,
67
77
  },
68
78
  {
69
79
  value: 'deepseek',
@@ -73,6 +83,7 @@ const DIRECT_PROVIDERS: ProviderMeta[] = [
73
83
  'Open platform.deepseek.com → API keys and create a key.',
74
84
  'Copy the key (starts with sk-…).',
75
85
  ],
86
+ caches: true,
76
87
  },
77
88
  {
78
89
  value: 'moonshot',
@@ -268,6 +279,14 @@ async function remove(k: ApiKey) {
268
279
  </li>
269
280
  </ol>
270
281
 
282
+ <!-- caching capability: connecting a direct key that caches upgrades its models to
283
+ the caching flavour, so long agentic runs stop re-billing the whole prompt. -->
284
+ <p v-if="selected.caches" class="flex items-center gap-1.5 text-[12px] text-emerald-400/90">
285
+ <UIcon name="i-lucide-zap" class="h-3.5 w-3.5 shrink-0" />
286
+ Enables prompt caching for {{ selected.label }} models — a long multi-turn run reuses its
287
+ cached prompt prefix instead of re-sending it every turn.
288
+ </p>
289
+
271
290
  <!-- add form -->
272
291
  <div class="space-y-2">
273
292
  <UFormField label="Label (optional)">
@@ -15,7 +15,7 @@ import { onKeyStroke } from '@vueuse/core'
15
15
  import type { AgentKind } from '~/types/domain'
16
16
  import type { ModelPreset } from '~/types/model-presets'
17
17
  import { MODEL_CONFIGURABLE_SYSTEM_KINDS } from '~/utils/catalog'
18
- import { contextLabel, costLabel, displayFlavor, isSelectable } from '~/stores/models'
18
+ import { cachingLabel, contextLabel, costLabel, displayFlavor, isSelectable } from '~/stores/models'
19
19
 
20
20
  const ui = useUiStore()
21
21
  const models = useModelsStore()
@@ -84,7 +84,10 @@ const selectableModels = computed(() => {
84
84
  const flavor = displayFlavor(m, configured)
85
85
  const ctx = contextLabel(flavor.contextTokens)
86
86
  const price = costLabel(flavor) ?? (flavor.quotaBased ? 'quota' : undefined)
87
- const suffix = [flavor.providerLabel, ctx, price].filter(Boolean).join(' · ')
87
+ // Surface caching in the suffix: a cache-less flavour (the Workers-AI hot path)
88
+ // re-bills its whole growing prompt every turn, which the user can act on.
89
+ const caching = cachingLabel(flavor)
90
+ const suffix = [flavor.providerLabel, ctx, price, caching].filter(Boolean).join(' · ')
88
91
  return {
89
92
  id: m.id,
90
93
  label: m.label,
@@ -12,6 +12,13 @@ export interface DisplayFlavor {
12
12
  /** True ⇒ flat-rate quota; its cost is a quota burn rate, not budget spend. */
13
13
  quotaBased: boolean
14
14
  vendor?: SubscriptionVendor
15
+ /**
16
+ * Whether this flavour's provider caches the re-sent prompt prefix. False on a
17
+ * Cloudflare/Workers-AI flavour (the hot path re-bills the whole prompt every turn);
18
+ * true once a direct key upgrades the model to its caching `direct` flavour. Undefined
19
+ * ⇒ unknown (older catalog). Surfaced as a badge in the picker.
20
+ */
21
+ cachesPrompts?: boolean
15
22
  }
16
23
 
17
24
  /**
@@ -30,6 +37,7 @@ export function displayFlavor(m: ModelOption, configured: Set<SubscriptionVendor
30
37
  cost: m.subscription.cost,
31
38
  quotaBased: true,
32
39
  vendor: m.subscription.vendor,
40
+ cachesPrompts: m.subscription.cachesPrompts,
33
41
  }
34
42
  }
35
43
  return {
@@ -40,6 +48,7 @@ export function displayFlavor(m: ModelOption, configured: Set<SubscriptionVendor
40
48
  cost: m.cost,
41
49
  quotaBased: m.quotaBased ?? false,
42
50
  vendor: m.vendor,
51
+ cachesPrompts: m.cachesPrompts,
43
52
  }
44
53
  }
45
54
 
@@ -69,6 +78,20 @@ export function costLabel(flavor: DisplayFlavor): string | undefined {
69
78
  return flavor.quotaBased ? `quota burn ~${body}` : body
70
79
  }
71
80
 
81
+ /**
82
+ * A short caching label for the picker: whether the flavour's provider caches the
83
+ * re-sent prompt prefix. `null` when unknown (older catalog) so the caller can omit it
84
+ * entirely. A long agentic run on a non-caching flavour re-bills its whole growing
85
+ * prompt every turn (slower, more rate-limited), so we surface it as an informational
86
+ * hint the user can act on (connect a direct key / pick a caching model). The model
87
+ * picker is a text-only dropdown-menu item list, so this is a label token in the option
88
+ * suffix rather than a styled badge.
89
+ */
90
+ export function cachingLabel(flavor: DisplayFlavor): string | null {
91
+ if (flavor.cachesPrompts === undefined) return null
92
+ return flavor.cachesPrompts ? 'Prompt caching' : 'No prompt caching'
93
+ }
94
+
72
95
  /**
73
96
  * The model picker catalog. Served by `GET /models`, where each model is already
74
97
  * resolved to the flavour in use for this deployment (direct when the provider's
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cat-factory/app",
3
- "version": "0.39.0",
3
+ "version": "0.40.0",
4
4
  "description": "Reusable Nuxt layer for the Agent Architecture Board SPA (components, stores, composables, pages). Consume it from a thin deployment app via `extends: ['@cat-factory/app']` and point it at your backend with NUXT_PUBLIC_API_BASE. See deploy/frontend for an example.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -32,7 +32,7 @@
32
32
  "pinia-plugin-persistedstate": "^4.7.1",
33
33
  "vue": "^3.5.38",
34
34
  "wretch": "^3.0.9",
35
- "@cat-factory/contracts": "0.38.0"
35
+ "@cat-factory/contracts": "0.39.0"
36
36
  },
37
37
  "devDependencies": {
38
38
  "@toad-contracts/testing": "0.3.1",