@mmnto/totem 1.12.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/dist/compile-lesson.d.ts +25 -1
  2. package/dist/compile-lesson.d.ts.map +1 -1
  3. package/dist/compile-lesson.js +33 -9
  4. package/dist/compile-lesson.js.map +1 -1
  5. package/dist/compile-lesson.test.js +135 -5
  6. package/dist/compile-lesson.test.js.map +1 -1
  7. package/dist/compiler-schema.d.ts +86 -4
  8. package/dist/compiler-schema.d.ts.map +1 -1
  9. package/dist/compiler-schema.js +34 -2
  10. package/dist/compiler-schema.js.map +1 -1
  11. package/dist/compiler.d.ts +0 -4
  12. package/dist/compiler.d.ts.map +1 -1
  13. package/dist/compiler.js +30 -5
  14. package/dist/compiler.js.map +1 -1
  15. package/dist/compiler.test.js +128 -0
  16. package/dist/compiler.test.js.map +1 -1
  17. package/dist/config-schema.d.ts +440 -0
  18. package/dist/config-schema.d.ts.map +1 -1
  19. package/dist/config-schema.js +24 -0
  20. package/dist/config-schema.js.map +1 -1
  21. package/dist/config-schema.test.js +127 -0
  22. package/dist/config-schema.test.js.map +1 -1
  23. package/dist/drift-detector.d.ts +6 -2
  24. package/dist/drift-detector.d.ts.map +1 -1
  25. package/dist/drift-detector.js +34 -5
  26. package/dist/drift-detector.js.map +1 -1
  27. package/dist/drift-detector.test.js +77 -0
  28. package/dist/drift-detector.test.js.map +1 -1
  29. package/dist/errors.d.ts +1 -1
  30. package/dist/errors.d.ts.map +1 -1
  31. package/dist/errors.js.map +1 -1
  32. package/dist/index.d.ts +4 -4
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +3 -3
  35. package/dist/index.js.map +1 -1
  36. package/dist/ingest/pipeline.js +1 -1
  37. package/dist/ingest/pipeline.js.map +1 -1
  38. package/dist/lesson-io.d.ts.map +1 -1
  39. package/dist/lesson-io.js +21 -3
  40. package/dist/lesson-io.js.map +1 -1
  41. package/dist/lesson-io.test.js +48 -0
  42. package/dist/lesson-io.test.js.map +1 -1
  43. package/dist/lesson-pattern.d.ts +22 -0
  44. package/dist/lesson-pattern.d.ts.map +1 -1
  45. package/dist/lesson-pattern.js +95 -7
  46. package/dist/lesson-pattern.js.map +1 -1
  47. package/dist/lesson-pattern.test.js +255 -1
  48. package/dist/lesson-pattern.test.js.map +1 -1
  49. package/dist/rule-engine.d.ts.map +1 -1
  50. package/dist/rule-engine.js +11 -9
  51. package/dist/rule-engine.js.map +1 -1
  52. package/dist/rule-metrics.d.ts +114 -2
  53. package/dist/rule-metrics.d.ts.map +1 -1
  54. package/dist/rule-metrics.js +24 -1
  55. package/dist/rule-metrics.js.map +1 -1
  56. package/dist/rule-metrics.test.js +60 -1
  57. package/dist/rule-metrics.test.js.map +1 -1
  58. package/dist/rule-tester.d.ts +8 -0
  59. package/dist/rule-tester.d.ts.map +1 -1
  60. package/dist/rule-tester.js +19 -1
  61. package/dist/rule-tester.js.map +1 -1
  62. package/dist/rule-tester.test.js +36 -1
  63. package/dist/rule-tester.test.js.map +1 -1
  64. package/dist/semantic-dedup.d.ts +12 -1
  65. package/dist/semantic-dedup.d.ts.map +1 -1
  66. package/dist/semantic-dedup.js +34 -4
  67. package/dist/semantic-dedup.js.map +1 -1
  68. package/dist/semantic-dedup.test.d.ts +2 -0
  69. package/dist/semantic-dedup.test.d.ts.map +1 -0
  70. package/dist/semantic-dedup.test.js +83 -0
  71. package/dist/semantic-dedup.test.js.map +1 -0
  72. package/dist/store/lance-search.d.ts +4 -4
  73. package/dist/store/lance-search.d.ts.map +1 -1
  74. package/dist/store/lance-search.js +27 -12
  75. package/dist/store/lance-search.js.map +1 -1
  76. package/dist/store/lance-search.test.js +109 -27
  77. package/dist/store/lance-search.test.js.map +1 -1
  78. package/dist/store/lance-store.d.ts +17 -2
  79. package/dist/store/lance-store.d.ts.map +1 -1
  80. package/dist/store/lance-store.js +20 -4
  81. package/dist/store/lance-store.js.map +1 -1
  82. package/dist/store/lance-store.test.js +1 -1
  83. package/dist/store/lance-store.test.js.map +1 -1
  84. package/dist/types.d.ts +49 -0
  85. package/dist/types.d.ts.map +1 -1
  86. package/dist/types.js.map +1 -1
  87. package/package.json +1 -1
@@ -109,6 +109,30 @@ export declare const ShellOrchestratorSchema: z.ZodObject<{
109
109
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
110
110
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
111
111
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
112
+ /**
113
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
114
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
115
+ * `systemPrompt` segments will be marked with cache_control directives to
116
+ * reduce input-token cost on repeat invocations within the TTL window.
117
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
118
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
119
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
120
+ * `.totem/cache/<command>-<hash>.json`.
121
+ */
122
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
123
+ /**
124
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
125
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
126
+ * — 2x write cost, ~10% read cost). Only consulted when
127
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
128
+ *
129
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
130
+ * fail fast at config load instead of silently falling through to 5m at
131
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
132
+ * Anthropic docs verified for both values:
133
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
134
+ */
135
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
112
136
  provider: z.ZodLiteral<"shell">;
113
137
  /** Shell command with {file} and {model} placeholders */
114
138
  command: z.ZodString;
@@ -119,6 +143,8 @@ export declare const ShellOrchestratorSchema: z.ZodObject<{
119
143
  fallbackModel?: string | undefined;
120
144
  overrides?: Record<string, string> | undefined;
121
145
  cacheTtls?: Record<string, number> | undefined;
146
+ enableContextCaching?: boolean | undefined;
147
+ cacheTTL?: 300 | 3600 | undefined;
122
148
  }, {
123
149
  provider: "shell";
124
150
  command: string;
@@ -126,6 +152,8 @@ export declare const ShellOrchestratorSchema: z.ZodObject<{
126
152
  fallbackModel?: string | undefined;
127
153
  overrides?: Record<string, string> | undefined;
128
154
  cacheTtls?: Record<string, number> | undefined;
155
+ enableContextCaching?: boolean | undefined;
156
+ cacheTTL?: 300 | 3600 | undefined;
129
157
  }>;
130
158
  export declare const GeminiOrchestratorSchema: z.ZodObject<{
131
159
  /** Default model name if --model is not passed */
@@ -136,6 +164,30 @@ export declare const GeminiOrchestratorSchema: z.ZodObject<{
136
164
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
137
165
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
138
166
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
167
+ /**
168
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
169
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
170
+ * `systemPrompt` segments will be marked with cache_control directives to
171
+ * reduce input-token cost on repeat invocations within the TTL window.
172
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
173
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
174
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
175
+ * `.totem/cache/<command>-<hash>.json`.
176
+ */
177
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
178
+ /**
179
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
180
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
181
+ * — 2x write cost, ~10% read cost). Only consulted when
182
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
183
+ *
184
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
185
+ * fail fast at config load instead of silently falling through to 5m at
186
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
187
+ * Anthropic docs verified for both values:
188
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
189
+ */
190
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
139
191
  provider: z.ZodLiteral<"gemini">;
140
192
  }, "strip", z.ZodTypeAny, {
141
193
  provider: "gemini";
@@ -143,12 +195,16 @@ export declare const GeminiOrchestratorSchema: z.ZodObject<{
143
195
  fallbackModel?: string | undefined;
144
196
  overrides?: Record<string, string> | undefined;
145
197
  cacheTtls?: Record<string, number> | undefined;
198
+ enableContextCaching?: boolean | undefined;
199
+ cacheTTL?: 300 | 3600 | undefined;
146
200
  }, {
147
201
  provider: "gemini";
148
202
  defaultModel?: string | undefined;
149
203
  fallbackModel?: string | undefined;
150
204
  overrides?: Record<string, string> | undefined;
151
205
  cacheTtls?: Record<string, number> | undefined;
206
+ enableContextCaching?: boolean | undefined;
207
+ cacheTTL?: 300 | 3600 | undefined;
152
208
  }>;
153
209
  export declare const AnthropicOrchestratorSchema: z.ZodObject<{
154
210
  /** Default model name if --model is not passed */
@@ -159,6 +215,30 @@ export declare const AnthropicOrchestratorSchema: z.ZodObject<{
159
215
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
160
216
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
161
217
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
218
+ /**
219
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
220
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
221
+ * `systemPrompt` segments will be marked with cache_control directives to
222
+ * reduce input-token cost on repeat invocations within the TTL window.
223
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
224
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
225
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
226
+ * `.totem/cache/<command>-<hash>.json`.
227
+ */
228
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
229
+ /**
230
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
231
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
232
+ * — 2x write cost, ~10% read cost). Only consulted when
233
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
234
+ *
235
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
236
+ * fail fast at config load instead of silently falling through to 5m at
237
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
238
+ * Anthropic docs verified for both values:
239
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
240
+ */
241
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
162
242
  provider: z.ZodLiteral<"anthropic">;
163
243
  }, "strip", z.ZodTypeAny, {
164
244
  provider: "anthropic";
@@ -166,12 +246,16 @@ export declare const AnthropicOrchestratorSchema: z.ZodObject<{
166
246
  fallbackModel?: string | undefined;
167
247
  overrides?: Record<string, string> | undefined;
168
248
  cacheTtls?: Record<string, number> | undefined;
249
+ enableContextCaching?: boolean | undefined;
250
+ cacheTTL?: 300 | 3600 | undefined;
169
251
  }, {
170
252
  provider: "anthropic";
171
253
  defaultModel?: string | undefined;
172
254
  fallbackModel?: string | undefined;
173
255
  overrides?: Record<string, string> | undefined;
174
256
  cacheTtls?: Record<string, number> | undefined;
257
+ enableContextCaching?: boolean | undefined;
258
+ cacheTTL?: 300 | 3600 | undefined;
175
259
  }>;
176
260
  export declare const OpenAIOrchestratorSchema: z.ZodObject<{
177
261
  /** Default model name if --model is not passed */
@@ -182,6 +266,30 @@ export declare const OpenAIOrchestratorSchema: z.ZodObject<{
182
266
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
183
267
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
184
268
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
269
+ /**
270
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
271
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
272
+ * `systemPrompt` segments will be marked with cache_control directives to
273
+ * reduce input-token cost on repeat invocations within the TTL window.
274
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
275
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
276
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
277
+ * `.totem/cache/<command>-<hash>.json`.
278
+ */
279
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
280
+ /**
281
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
282
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
283
+ * — 2x write cost, ~10% read cost). Only consulted when
284
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
285
+ *
286
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
287
+ * fail fast at config load instead of silently falling through to 5m at
288
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
289
+ * Anthropic docs verified for both values:
290
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
291
+ */
292
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
185
293
  provider: z.ZodLiteral<"openai">;
186
294
  /** Optional base URL for OpenAI-compatible servers (Ollama, LM Studio, etc.) */
187
295
  baseUrl: z.ZodOptional<z.ZodString>;
@@ -192,6 +300,8 @@ export declare const OpenAIOrchestratorSchema: z.ZodObject<{
192
300
  fallbackModel?: string | undefined;
193
301
  overrides?: Record<string, string> | undefined;
194
302
  cacheTtls?: Record<string, number> | undefined;
303
+ enableContextCaching?: boolean | undefined;
304
+ cacheTTL?: 300 | 3600 | undefined;
195
305
  }, {
196
306
  provider: "openai";
197
307
  baseUrl?: string | undefined;
@@ -199,6 +309,8 @@ export declare const OpenAIOrchestratorSchema: z.ZodObject<{
199
309
  fallbackModel?: string | undefined;
200
310
  overrides?: Record<string, string> | undefined;
201
311
  cacheTtls?: Record<string, number> | undefined;
312
+ enableContextCaching?: boolean | undefined;
313
+ cacheTTL?: 300 | 3600 | undefined;
202
314
  }>;
203
315
  export declare const OllamaOrchestratorSchema: z.ZodObject<{
204
316
  /** Default model name if --model is not passed */
@@ -209,6 +321,30 @@ export declare const OllamaOrchestratorSchema: z.ZodObject<{
209
321
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
210
322
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
211
323
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
324
+ /**
325
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
326
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
327
+ * `systemPrompt` segments will be marked with cache_control directives to
328
+ * reduce input-token cost on repeat invocations within the TTL window.
329
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
330
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
331
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
332
+ * `.totem/cache/<command>-<hash>.json`.
333
+ */
334
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
335
+ /**
336
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
337
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
338
+ * — 2x write cost, ~10% read cost). Only consulted when
339
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
340
+ *
341
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
342
+ * fail fast at config load instead of silently falling through to 5m at
343
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
344
+ * Anthropic docs verified for both values:
345
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
346
+ */
347
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
212
348
  provider: z.ZodLiteral<"ollama">;
213
349
  /** Base URL for the Ollama server */
214
350
  baseUrl: z.ZodDefault<z.ZodString>;
@@ -221,6 +357,8 @@ export declare const OllamaOrchestratorSchema: z.ZodObject<{
221
357
  fallbackModel?: string | undefined;
222
358
  overrides?: Record<string, string> | undefined;
223
359
  cacheTtls?: Record<string, number> | undefined;
360
+ enableContextCaching?: boolean | undefined;
361
+ cacheTTL?: 300 | 3600 | undefined;
224
362
  numCtx?: number | undefined;
225
363
  }, {
226
364
  provider: "ollama";
@@ -229,6 +367,8 @@ export declare const OllamaOrchestratorSchema: z.ZodObject<{
229
367
  fallbackModel?: string | undefined;
230
368
  overrides?: Record<string, string> | undefined;
231
369
  cacheTtls?: Record<string, number> | undefined;
370
+ enableContextCaching?: boolean | undefined;
371
+ cacheTTL?: 300 | 3600 | undefined;
232
372
  numCtx?: number | undefined;
233
373
  }>;
234
374
  export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.ZodObject<{
@@ -240,6 +380,30 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
240
380
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
241
381
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
242
382
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
383
+ /**
384
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
385
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
386
+ * `systemPrompt` segments will be marked with cache_control directives to
387
+ * reduce input-token cost on repeat invocations within the TTL window.
388
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
389
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
390
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
391
+ * `.totem/cache/<command>-<hash>.json`.
392
+ */
393
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
394
+ /**
395
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
396
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
397
+ * — 2x write cost, ~10% read cost). Only consulted when
398
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
399
+ *
400
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
401
+ * fail fast at config load instead of silently falling through to 5m at
402
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
403
+ * Anthropic docs verified for both values:
404
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
405
+ */
406
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
243
407
  provider: z.ZodLiteral<"shell">;
244
408
  /** Shell command with {file} and {model} placeholders */
245
409
  command: z.ZodString;
@@ -250,6 +414,8 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
250
414
  fallbackModel?: string | undefined;
251
415
  overrides?: Record<string, string> | undefined;
252
416
  cacheTtls?: Record<string, number> | undefined;
417
+ enableContextCaching?: boolean | undefined;
418
+ cacheTTL?: 300 | 3600 | undefined;
253
419
  }, {
254
420
  provider: "shell";
255
421
  command: string;
@@ -257,6 +423,8 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
257
423
  fallbackModel?: string | undefined;
258
424
  overrides?: Record<string, string> | undefined;
259
425
  cacheTtls?: Record<string, number> | undefined;
426
+ enableContextCaching?: boolean | undefined;
427
+ cacheTTL?: 300 | 3600 | undefined;
260
428
  }>, z.ZodObject<{
261
429
  /** Default model name if --model is not passed */
262
430
  defaultModel: z.ZodOptional<z.ZodString>;
@@ -266,6 +434,30 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
266
434
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
267
435
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
268
436
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
437
+ /**
438
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
439
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
440
+ * `systemPrompt` segments will be marked with cache_control directives to
441
+ * reduce input-token cost on repeat invocations within the TTL window.
442
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
443
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
444
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
445
+ * `.totem/cache/<command>-<hash>.json`.
446
+ */
447
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
448
+ /**
449
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
450
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
451
+ * — 2x write cost, ~10% read cost). Only consulted when
452
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
453
+ *
454
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
455
+ * fail fast at config load instead of silently falling through to 5m at
456
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
457
+ * Anthropic docs verified for both values:
458
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
459
+ */
460
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
269
461
  provider: z.ZodLiteral<"gemini">;
270
462
  }, "strip", z.ZodTypeAny, {
271
463
  provider: "gemini";
@@ -273,12 +465,16 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
273
465
  fallbackModel?: string | undefined;
274
466
  overrides?: Record<string, string> | undefined;
275
467
  cacheTtls?: Record<string, number> | undefined;
468
+ enableContextCaching?: boolean | undefined;
469
+ cacheTTL?: 300 | 3600 | undefined;
276
470
  }, {
277
471
  provider: "gemini";
278
472
  defaultModel?: string | undefined;
279
473
  fallbackModel?: string | undefined;
280
474
  overrides?: Record<string, string> | undefined;
281
475
  cacheTtls?: Record<string, number> | undefined;
476
+ enableContextCaching?: boolean | undefined;
477
+ cacheTTL?: 300 | 3600 | undefined;
282
478
  }>, z.ZodObject<{
283
479
  /** Default model name if --model is not passed */
284
480
  defaultModel: z.ZodOptional<z.ZodString>;
@@ -288,6 +484,30 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
288
484
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
289
485
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
290
486
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
487
+ /**
488
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
489
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
490
+ * `systemPrompt` segments will be marked with cache_control directives to
491
+ * reduce input-token cost on repeat invocations within the TTL window.
492
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
493
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
494
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
495
+ * `.totem/cache/<command>-<hash>.json`.
496
+ */
497
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
498
+ /**
499
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
500
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
501
+ * — 2x write cost, ~10% read cost). Only consulted when
502
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
503
+ *
504
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
505
+ * fail fast at config load instead of silently falling through to 5m at
506
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
507
+ * Anthropic docs verified for both values:
508
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
509
+ */
510
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
291
511
  provider: z.ZodLiteral<"anthropic">;
292
512
  }, "strip", z.ZodTypeAny, {
293
513
  provider: "anthropic";
@@ -295,12 +515,16 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
295
515
  fallbackModel?: string | undefined;
296
516
  overrides?: Record<string, string> | undefined;
297
517
  cacheTtls?: Record<string, number> | undefined;
518
+ enableContextCaching?: boolean | undefined;
519
+ cacheTTL?: 300 | 3600 | undefined;
298
520
  }, {
299
521
  provider: "anthropic";
300
522
  defaultModel?: string | undefined;
301
523
  fallbackModel?: string | undefined;
302
524
  overrides?: Record<string, string> | undefined;
303
525
  cacheTtls?: Record<string, number> | undefined;
526
+ enableContextCaching?: boolean | undefined;
527
+ cacheTTL?: 300 | 3600 | undefined;
304
528
  }>, z.ZodObject<{
305
529
  /** Default model name if --model is not passed */
306
530
  defaultModel: z.ZodOptional<z.ZodString>;
@@ -310,6 +534,30 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
310
534
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
311
535
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
312
536
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
537
+ /**
538
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
539
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
540
+ * `systemPrompt` segments will be marked with cache_control directives to
541
+ * reduce input-token cost on repeat invocations within the TTL window.
542
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
543
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
544
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
545
+ * `.totem/cache/<command>-<hash>.json`.
546
+ */
547
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
548
+ /**
549
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
550
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
551
+ * — 2x write cost, ~10% read cost). Only consulted when
552
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
553
+ *
554
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
555
+ * fail fast at config load instead of silently falling through to 5m at
556
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
557
+ * Anthropic docs verified for both values:
558
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
559
+ */
560
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
313
561
  provider: z.ZodLiteral<"openai">;
314
562
  /** Optional base URL for OpenAI-compatible servers (Ollama, LM Studio, etc.) */
315
563
  baseUrl: z.ZodOptional<z.ZodString>;
@@ -320,6 +568,8 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
320
568
  fallbackModel?: string | undefined;
321
569
  overrides?: Record<string, string> | undefined;
322
570
  cacheTtls?: Record<string, number> | undefined;
571
+ enableContextCaching?: boolean | undefined;
572
+ cacheTTL?: 300 | 3600 | undefined;
323
573
  }, {
324
574
  provider: "openai";
325
575
  baseUrl?: string | undefined;
@@ -327,6 +577,8 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
327
577
  fallbackModel?: string | undefined;
328
578
  overrides?: Record<string, string> | undefined;
329
579
  cacheTtls?: Record<string, number> | undefined;
580
+ enableContextCaching?: boolean | undefined;
581
+ cacheTTL?: 300 | 3600 | undefined;
330
582
  }>, z.ZodObject<{
331
583
  /** Default model name if --model is not passed */
332
584
  defaultModel: z.ZodOptional<z.ZodString>;
@@ -336,6 +588,30 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
336
588
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
337
589
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
338
590
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
591
+ /**
592
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
593
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
594
+ * `systemPrompt` segments will be marked with cache_control directives to
595
+ * reduce input-token cost on repeat invocations within the TTL window.
596
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
597
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
598
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
599
+ * `.totem/cache/<command>-<hash>.json`.
600
+ */
601
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
602
+ /**
603
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
604
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
605
+ * — 2x write cost, ~10% read cost). Only consulted when
606
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
607
+ *
608
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
609
+ * fail fast at config load instead of silently falling through to 5m at
610
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
611
+ * Anthropic docs verified for both values:
612
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
613
+ */
614
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
339
615
  provider: z.ZodLiteral<"ollama">;
340
616
  /** Base URL for the Ollama server */
341
617
  baseUrl: z.ZodDefault<z.ZodString>;
@@ -348,6 +624,8 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
348
624
  fallbackModel?: string | undefined;
349
625
  overrides?: Record<string, string> | undefined;
350
626
  cacheTtls?: Record<string, number> | undefined;
627
+ enableContextCaching?: boolean | undefined;
628
+ cacheTTL?: 300 | 3600 | undefined;
351
629
  numCtx?: number | undefined;
352
630
  }, {
353
631
  provider: "ollama";
@@ -356,6 +634,8 @@ export declare const OrchestratorSchema: z.ZodDiscriminatedUnion<"provider", [z.
356
634
  fallbackModel?: string | undefined;
357
635
  overrides?: Record<string, string> | undefined;
358
636
  cacheTtls?: Record<string, number> | undefined;
637
+ enableContextCaching?: boolean | undefined;
638
+ cacheTTL?: 300 | 3600 | undefined;
359
639
  numCtx?: number | undefined;
360
640
  }>]>;
361
641
  export declare const GarbageCollectionSchema: z.ZodObject<{
@@ -462,6 +742,30 @@ export declare const TotemConfigSchema: z.ZodObject<{
462
742
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
463
743
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
464
744
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
745
+ /**
746
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
747
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
748
+ * `systemPrompt` segments will be marked with cache_control directives to
749
+ * reduce input-token cost on repeat invocations within the TTL window.
750
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
751
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
752
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
753
+ * `.totem/cache/<command>-<hash>.json`.
754
+ */
755
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
756
+ /**
757
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
758
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
759
+ * — 2x write cost, ~10% read cost). Only consulted when
760
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
761
+ *
762
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
763
+ * fail fast at config load instead of silently falling through to 5m at
764
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
765
+ * Anthropic docs verified for both values:
766
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
767
+ */
768
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
465
769
  provider: z.ZodLiteral<"shell">;
466
770
  /** Shell command with {file} and {model} placeholders */
467
771
  command: z.ZodString;
@@ -472,6 +776,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
472
776
  fallbackModel?: string | undefined;
473
777
  overrides?: Record<string, string> | undefined;
474
778
  cacheTtls?: Record<string, number> | undefined;
779
+ enableContextCaching?: boolean | undefined;
780
+ cacheTTL?: 300 | 3600 | undefined;
475
781
  }, {
476
782
  provider: "shell";
477
783
  command: string;
@@ -479,6 +785,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
479
785
  fallbackModel?: string | undefined;
480
786
  overrides?: Record<string, string> | undefined;
481
787
  cacheTtls?: Record<string, number> | undefined;
788
+ enableContextCaching?: boolean | undefined;
789
+ cacheTTL?: 300 | 3600 | undefined;
482
790
  }>, z.ZodObject<{
483
791
  /** Default model name if --model is not passed */
484
792
  defaultModel: z.ZodOptional<z.ZodString>;
@@ -488,6 +796,30 @@ export declare const TotemConfigSchema: z.ZodObject<{
488
796
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
489
797
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
490
798
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
799
+ /**
800
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
801
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
802
+ * `systemPrompt` segments will be marked with cache_control directives to
803
+ * reduce input-token cost on repeat invocations within the TTL window.
804
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
805
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
806
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
807
+ * `.totem/cache/<command>-<hash>.json`.
808
+ */
809
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
810
+ /**
811
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
812
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
813
+ * — 2x write cost, ~10% read cost). Only consulted when
814
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
815
+ *
816
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
817
+ * fail fast at config load instead of silently falling through to 5m at
818
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
819
+ * Anthropic docs verified for both values:
820
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
821
+ */
822
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
491
823
  provider: z.ZodLiteral<"gemini">;
492
824
  }, "strip", z.ZodTypeAny, {
493
825
  provider: "gemini";
@@ -495,12 +827,16 @@ export declare const TotemConfigSchema: z.ZodObject<{
495
827
  fallbackModel?: string | undefined;
496
828
  overrides?: Record<string, string> | undefined;
497
829
  cacheTtls?: Record<string, number> | undefined;
830
+ enableContextCaching?: boolean | undefined;
831
+ cacheTTL?: 300 | 3600 | undefined;
498
832
  }, {
499
833
  provider: "gemini";
500
834
  defaultModel?: string | undefined;
501
835
  fallbackModel?: string | undefined;
502
836
  overrides?: Record<string, string> | undefined;
503
837
  cacheTtls?: Record<string, number> | undefined;
838
+ enableContextCaching?: boolean | undefined;
839
+ cacheTTL?: 300 | 3600 | undefined;
504
840
  }>, z.ZodObject<{
505
841
  /** Default model name if --model is not passed */
506
842
  defaultModel: z.ZodOptional<z.ZodString>;
@@ -510,6 +846,30 @@ export declare const TotemConfigSchema: z.ZodObject<{
510
846
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
511
847
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
512
848
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
849
+ /**
850
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
851
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
852
+ * `systemPrompt` segments will be marked with cache_control directives to
853
+ * reduce input-token cost on repeat invocations within the TTL window.
854
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
855
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
856
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
857
+ * `.totem/cache/<command>-<hash>.json`.
858
+ */
859
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
860
+ /**
861
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
862
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
863
+ * — 2x write cost, ~10% read cost). Only consulted when
864
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
865
+ *
866
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
867
+ * fail fast at config load instead of silently falling through to 5m at
868
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
869
+ * Anthropic docs verified for both values:
870
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
871
+ */
872
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
513
873
  provider: z.ZodLiteral<"anthropic">;
514
874
  }, "strip", z.ZodTypeAny, {
515
875
  provider: "anthropic";
@@ -517,12 +877,16 @@ export declare const TotemConfigSchema: z.ZodObject<{
517
877
  fallbackModel?: string | undefined;
518
878
  overrides?: Record<string, string> | undefined;
519
879
  cacheTtls?: Record<string, number> | undefined;
880
+ enableContextCaching?: boolean | undefined;
881
+ cacheTTL?: 300 | 3600 | undefined;
520
882
  }, {
521
883
  provider: "anthropic";
522
884
  defaultModel?: string | undefined;
523
885
  fallbackModel?: string | undefined;
524
886
  overrides?: Record<string, string> | undefined;
525
887
  cacheTtls?: Record<string, number> | undefined;
888
+ enableContextCaching?: boolean | undefined;
889
+ cacheTTL?: 300 | 3600 | undefined;
526
890
  }>, z.ZodObject<{
527
891
  /** Default model name if --model is not passed */
528
892
  defaultModel: z.ZodOptional<z.ZodString>;
@@ -532,6 +896,30 @@ export declare const TotemConfigSchema: z.ZodObject<{
532
896
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
533
897
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
534
898
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
899
+ /**
900
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
901
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
902
+ * `systemPrompt` segments will be marked with cache_control directives to
903
+ * reduce input-token cost on repeat invocations within the TTL window.
904
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
905
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
906
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
907
+ * `.totem/cache/<command>-<hash>.json`.
908
+ */
909
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
910
+ /**
911
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
912
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
913
+ * — 2x write cost, ~10% read cost). Only consulted when
914
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
915
+ *
916
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
917
+ * fail fast at config load instead of silently falling through to 5m at
918
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
919
+ * Anthropic docs verified for both values:
920
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
921
+ */
922
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
535
923
  provider: z.ZodLiteral<"openai">;
536
924
  /** Optional base URL for OpenAI-compatible servers (Ollama, LM Studio, etc.) */
537
925
  baseUrl: z.ZodOptional<z.ZodString>;
@@ -542,6 +930,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
542
930
  fallbackModel?: string | undefined;
543
931
  overrides?: Record<string, string> | undefined;
544
932
  cacheTtls?: Record<string, number> | undefined;
933
+ enableContextCaching?: boolean | undefined;
934
+ cacheTTL?: 300 | 3600 | undefined;
545
935
  }, {
546
936
  provider: "openai";
547
937
  baseUrl?: string | undefined;
@@ -549,6 +939,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
549
939
  fallbackModel?: string | undefined;
550
940
  overrides?: Record<string, string> | undefined;
551
941
  cacheTtls?: Record<string, number> | undefined;
942
+ enableContextCaching?: boolean | undefined;
943
+ cacheTTL?: 300 | 3600 | undefined;
552
944
  }>, z.ZodObject<{
553
945
  /** Default model name if --model is not passed */
554
946
  defaultModel: z.ZodOptional<z.ZodString>;
@@ -558,6 +950,30 @@ export declare const TotemConfigSchema: z.ZodObject<{
558
950
  overrides: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
559
951
  /** Per-command cache TTLs in seconds (e.g., { 'triage': 3600, 'shield': 0 }) */
560
952
  cacheTtls: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
953
+ /**
954
+ * Enable provider-native prompt caching (mmnto/totem#1291 Proposal 217). When true and
955
+ * the provider supports it (Anthropic in 1.15.0, Gemini in 1.16.0+), persistent
956
+ * `systemPrompt` segments will be marked with cache_control directives to
957
+ * reduce input-token cost on repeat invocations within the TTL window.
958
+ * Defaults to undefined (off) — opt-in for 1.15.0 to avoid surprising existing
959
+ * users mid-cycle. Distinct from `cacheTtls` above, which controls the
960
+ * orthogonal response-level cache (mmnto/totem#52, closed) at
961
+ * `.totem/cache/<command>-<hash>.json`.
962
+ */
963
+ enableContextCaching: z.ZodOptional<z.ZodBoolean>;
964
+ /**
965
+ * Prompt cache TTL in seconds (mmnto/totem#1291). Anthropic supports exactly
966
+ * two values today: 300 (5m, default ephemeral) and 3600 (1h, extended cache
967
+ * — 2x write cost, ~10% read cost). Only consulted when
968
+ * `enableContextCaching` is true. Defaults to 300 when omitted.
969
+ *
970
+ * Constrained to literals at parse time so invalid TTLs (e.g. 600, 1800)
971
+ * fail fast at config load instead of silently falling through to 5m at
972
+ * provider-invocation time. Caught by CodeRabbit on PR #1292 review.
973
+ * Anthropic docs verified for both values:
974
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
975
+ */
976
+ cacheTTL: z.ZodOptional<z.ZodUnion<[z.ZodLiteral<300>, z.ZodLiteral<3600>]>>;
561
977
  provider: z.ZodLiteral<"ollama">;
562
978
  /** Base URL for the Ollama server */
563
979
  baseUrl: z.ZodDefault<z.ZodString>;
@@ -570,6 +986,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
570
986
  fallbackModel?: string | undefined;
571
987
  overrides?: Record<string, string> | undefined;
572
988
  cacheTtls?: Record<string, number> | undefined;
989
+ enableContextCaching?: boolean | undefined;
990
+ cacheTTL?: 300 | 3600 | undefined;
573
991
  numCtx?: number | undefined;
574
992
  }, {
575
993
  provider: "ollama";
@@ -578,6 +996,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
578
996
  fallbackModel?: string | undefined;
579
997
  overrides?: Record<string, string> | undefined;
580
998
  cacheTtls?: Record<string, number> | undefined;
999
+ enableContextCaching?: boolean | undefined;
1000
+ cacheTTL?: 300 | 3600 | undefined;
581
1001
  numCtx?: number | undefined;
582
1002
  }>]>, {
583
1003
  provider: "shell";
@@ -586,18 +1006,24 @@ export declare const TotemConfigSchema: z.ZodObject<{
586
1006
  fallbackModel?: string | undefined;
587
1007
  overrides?: Record<string, string> | undefined;
588
1008
  cacheTtls?: Record<string, number> | undefined;
1009
+ enableContextCaching?: boolean | undefined;
1010
+ cacheTTL?: 300 | 3600 | undefined;
589
1011
  } | {
590
1012
  provider: "gemini";
591
1013
  defaultModel?: string | undefined;
592
1014
  fallbackModel?: string | undefined;
593
1015
  overrides?: Record<string, string> | undefined;
594
1016
  cacheTtls?: Record<string, number> | undefined;
1017
+ enableContextCaching?: boolean | undefined;
1018
+ cacheTTL?: 300 | 3600 | undefined;
595
1019
  } | {
596
1020
  provider: "anthropic";
597
1021
  defaultModel?: string | undefined;
598
1022
  fallbackModel?: string | undefined;
599
1023
  overrides?: Record<string, string> | undefined;
600
1024
  cacheTtls?: Record<string, number> | undefined;
1025
+ enableContextCaching?: boolean | undefined;
1026
+ cacheTTL?: 300 | 3600 | undefined;
601
1027
  } | {
602
1028
  provider: "openai";
603
1029
  baseUrl?: string | undefined;
@@ -605,6 +1031,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
605
1031
  fallbackModel?: string | undefined;
606
1032
  overrides?: Record<string, string> | undefined;
607
1033
  cacheTtls?: Record<string, number> | undefined;
1034
+ enableContextCaching?: boolean | undefined;
1035
+ cacheTTL?: 300 | 3600 | undefined;
608
1036
  } | {
609
1037
  provider: "ollama";
610
1038
  baseUrl: string;
@@ -612,6 +1040,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
612
1040
  fallbackModel?: string | undefined;
613
1041
  overrides?: Record<string, string> | undefined;
614
1042
  cacheTtls?: Record<string, number> | undefined;
1043
+ enableContextCaching?: boolean | undefined;
1044
+ cacheTTL?: 300 | 3600 | undefined;
615
1045
  numCtx?: number | undefined;
616
1046
  }, unknown>>;
617
1047
  /** Optional: override the .totem/ directory path */
@@ -744,18 +1174,24 @@ export declare const TotemConfigSchema: z.ZodObject<{
744
1174
  fallbackModel?: string | undefined;
745
1175
  overrides?: Record<string, string> | undefined;
746
1176
  cacheTtls?: Record<string, number> | undefined;
1177
+ enableContextCaching?: boolean | undefined;
1178
+ cacheTTL?: 300 | 3600 | undefined;
747
1179
  } | {
748
1180
  provider: "gemini";
749
1181
  defaultModel?: string | undefined;
750
1182
  fallbackModel?: string | undefined;
751
1183
  overrides?: Record<string, string> | undefined;
752
1184
  cacheTtls?: Record<string, number> | undefined;
1185
+ enableContextCaching?: boolean | undefined;
1186
+ cacheTTL?: 300 | 3600 | undefined;
753
1187
  } | {
754
1188
  provider: "anthropic";
755
1189
  defaultModel?: string | undefined;
756
1190
  fallbackModel?: string | undefined;
757
1191
  overrides?: Record<string, string> | undefined;
758
1192
  cacheTtls?: Record<string, number> | undefined;
1193
+ enableContextCaching?: boolean | undefined;
1194
+ cacheTTL?: 300 | 3600 | undefined;
759
1195
  } | {
760
1196
  provider: "openai";
761
1197
  baseUrl?: string | undefined;
@@ -763,6 +1199,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
763
1199
  fallbackModel?: string | undefined;
764
1200
  overrides?: Record<string, string> | undefined;
765
1201
  cacheTtls?: Record<string, number> | undefined;
1202
+ enableContextCaching?: boolean | undefined;
1203
+ cacheTTL?: 300 | 3600 | undefined;
766
1204
  } | {
767
1205
  provider: "ollama";
768
1206
  baseUrl: string;
@@ -770,6 +1208,8 @@ export declare const TotemConfigSchema: z.ZodObject<{
770
1208
  fallbackModel?: string | undefined;
771
1209
  overrides?: Record<string, string> | undefined;
772
1210
  cacheTtls?: Record<string, number> | undefined;
1211
+ enableContextCaching?: boolean | undefined;
1212
+ cacheTTL?: 300 | 3600 | undefined;
773
1213
  numCtx?: number | undefined;
774
1214
  } | undefined;
775
1215
  docs?: {