runtime-narrative 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. runtime_narrative-1.1.0/PKG-INFO +1169 -0
  2. runtime_narrative-1.1.0/README.md +1113 -0
  3. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/pyproject.toml +2 -2
  4. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/__init__.py +24 -2
  5. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/context.py +5 -0
  6. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/events.py +33 -1
  7. runtime_narrative-1.1.0/runtime_narrative/logging_bridge.py +54 -0
  8. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/middleware.py +6 -1
  9. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/console.py +96 -3
  10. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/json_renderer.py +22 -0
  11. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/otel_renderer.py +4 -0
  12. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/stage.py +6 -1
  13. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/story.py +105 -7
  14. runtime_narrative-1.1.0/runtime_narrative.egg-info/PKG-INFO +1169 -0
  15. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative.egg-info/SOURCES.txt +4 -1
  16. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_console_renderer.py +81 -1
  17. runtime_narrative-1.1.0/tests/test_issues.py +213 -0
  18. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_json_renderer.py +43 -0
  19. runtime_narrative-1.1.0/tests/test_logging_bridge.py +93 -0
  20. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_otel_renderer.py +19 -0
  21. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_story.py +68 -0
  22. runtime_narrative-1.0.0/PKG-INFO +0 -1124
  23. runtime_narrative-1.0.0/README.MD +0 -1068
  24. runtime_narrative-1.0.0/runtime_narrative.egg-info/PKG-INFO +0 -1124
  25. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/LICENSE +0 -0
  26. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/analyzers/__init__.py +0 -0
  27. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/analyzers/anthropic.py +0 -0
  28. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/analyzers/base.py +0 -0
  29. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/analyzers/deduplication.py +0 -0
  30. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/analyzers/ollama.py +0 -0
  31. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/celery.py +0 -0
  32. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/cli.py +0 -0
  33. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/decorators.py +0 -0
  34. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/diagnostics.py +0 -0
  35. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/failure.py +0 -0
  36. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/grpc_interceptor.py +0 -0
  37. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/instrumentation.py +0 -0
  38. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/middleware_django.py +0 -0
  39. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/__init__.py +0 -0
  40. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/alert_renderer.py +0 -0
  41. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/html_renderer.py +0 -0
  42. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/otel_log_renderer.py +0 -0
  43. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/otel_metrics_renderer.py +0 -0
  44. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/persistence_renderer.py +0 -0
  45. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/renderer/prometheus_renderer.py +0 -0
  46. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/task_group.py +0 -0
  47. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative/testing.py +0 -0
  48. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative.egg-info/dependency_links.txt +0 -0
  49. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative.egg-info/entry_points.txt +0 -0
  50. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative.egg-info/requires.txt +0 -0
  51. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/runtime_narrative.egg-info/top_level.txt +0 -0
  52. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/setup.cfg +0 -0
  53. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_alert_renderer.py +0 -0
  54. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_analyzers.py +0 -0
  55. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_anthropic_analyzer.py +0 -0
  56. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_async_renderer.py +0 -0
  57. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_celery.py +0 -0
  58. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_decorators.py +0 -0
  59. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_deduplication.py +0 -0
  60. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_diagnostics.py +0 -0
  61. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_dry_run.py +0 -0
  62. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_failure.py +0 -0
  63. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_grpc_interceptor.py +0 -0
  64. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_html_renderer.py +0 -0
  65. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_instrumentation.py +0 -0
  66. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_instrumentation_phase2.py +0 -0
  67. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_middleware.py +0 -0
  68. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_middleware_django.py +0 -0
  69. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_middleware_propagation.py +0 -0
  70. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_otel_log_renderer.py +0 -0
  71. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_otel_metrics_renderer.py +0 -0
  72. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_persistence_renderer.py +0 -0
  73. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_prometheus_renderer.py +0 -0
  74. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_redaction_extended.py +0 -0
  75. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_stage.py +0 -0
  76. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_stage_metadata.py +0 -0
  77. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_structured_analysis.py +0 -0
  78. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_task_group.py +0 -0
  79. {runtime_narrative-1.0.0 → runtime_narrative-1.1.0}/tests/test_testing_utils.py +0 -0
@@ -0,0 +1,1169 @@
1
+ Metadata-Version: 2.4
2
+ Name: runtime-narrative
3
+ Version: 1.1.0
4
+ Summary: Model execution as human-readable stories with lean/rich failure diagnostics and optional LLM analysis
5
+ Author-email: Shashank Raj <shashank.raj28@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/sraj0501/runtime_narrative
8
+ Project-URL: Repository, https://github.com/sraj0501/runtime_narrative
9
+ Project-URL: Bug Tracker, https://github.com/sraj0501/runtime_narrative/issues
10
+ Keywords: logging,observability,tracing,fastapi,debugging,diagnostics,runtime_narrative
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: System :: Logging
22
+ Classifier: Topic :: System :: Monitoring
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: python-dotenv>=1.2.1
28
+ Provides-Extra: console
29
+ Requires-Dist: typer>=0.9.0; extra == "console"
30
+ Provides-Extra: fastapi
31
+ Requires-Dist: starlette>=0.27.0; extra == "fastapi"
32
+ Provides-Extra: otel
33
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == "otel"
34
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == "otel"
35
+ Provides-Extra: prometheus
36
+ Requires-Dist: prometheus-client>=0.19.0; extra == "prometheus"
37
+ Provides-Extra: anthropic
38
+ Requires-Dist: anthropic>=0.25.0; extra == "anthropic"
39
+ Provides-Extra: django
40
+ Requires-Dist: django>=3.2; extra == "django"
41
+ Provides-Extra: celery
42
+ Requires-Dist: celery>=5.0; extra == "celery"
43
+ Provides-Extra: grpc
44
+ Requires-Dist: grpcio>=1.50.0; extra == "grpc"
45
+ Provides-Extra: all
46
+ Requires-Dist: typer>=0.9.0; extra == "all"
47
+ Requires-Dist: starlette>=0.27.0; extra == "all"
48
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == "all"
49
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == "all"
50
+ Requires-Dist: prometheus-client>=0.19.0; extra == "all"
51
+ Requires-Dist: anthropic>=0.25.0; extra == "all"
52
+ Requires-Dist: django>=3.2; extra == "all"
53
+ Requires-Dist: celery>=5.0; extra == "all"
54
+ Requires-Dist: grpcio>=1.50.0; extra == "all"
55
+ Dynamic: license-file
56
+
57
+ # runtime-narrative
58
+
59
+ Turn any Python execution into a traceable **story** composed of named **stages**. Get minimal logs when everything works — and surgical, LLM-powered diagnostics the moment something breaks.
60
+
61
+ ```
62
+ ▶ Story started: Import Customers
63
+ ✔ Load CSV 0.012s
64
+ ✔ Validate Data 0.004s
65
+
66
+ ❌ Failure at: Insert Records
67
+
68
+ ValueError: duplicate customer id
69
+ Location: app/db.py:47 insert_row
70
+ Code: raise ValueError("duplicate customer id")
71
+ Chain: ValueError ← sqlite3.IntegrityError
72
+
73
+ ## Exact Why
74
+ A record with the same customer_id already exists (UNIQUE constraint).
75
+
76
+ ## Targeted Fix
77
+ Use INSERT OR IGNORE, or check for an existing row before inserting.
78
+ ```
79
+
80
+ ---
81
+
82
+ ## Contents
83
+
84
+ - [Installation](#installation)
85
+ - [Quick start](#quick-start)
86
+ - [Decorators](#decorators)
87
+ - [Auto-instrumentation](#auto-instrumentation)
88
+ - [Failure diagnostics](#failure-diagnostics)
89
+ - [Failure analyzers](#failure-analyzers)
90
+ - [Renderers](#renderers)
91
+ - [Framework integrations](#framework-integrations)
92
+ - [Async task groups](#async-task-groups)
93
+ - [Persistence and CLI](#persistence-and-cli)
94
+ - [Alert routing](#alert-routing)
95
+ - [Testing utilities](#testing-utilities)
96
+ - [Custom renderers and analyzers](#custom-renderers-and-analyzers)
97
+ - [Utilities](#utilities)
98
+ - [Sub-stories and log capture](#sub-stories-and-log-capture)
99
+ - [Environment variables](#environment-variables)
100
+
101
+ ---
102
+
103
+ ## Installation
104
+
105
+ ```bash
106
+ pip install runtime-narrative
107
+ ```
108
+
109
+ Optional extras unlock additional renderers and integrations:
110
+
111
+ | Extra | What it installs |
112
+ |---|---|
113
+ | `console` | `typer` — colored terminal output in `ConsoleRenderer` |
114
+ | `fastapi` | `starlette` — `RuntimeNarrativeMiddleware` |
115
+ | `otel` | `opentelemetry-api`, `opentelemetry-sdk` — `OtelRenderer`, `OtelLogRenderer`, `OtelMetricsRenderer` |
116
+ | `prometheus` | `prometheus-client` — `PrometheusRenderer` |
117
+ | `anthropic` | `anthropic` — `AnthropicFailureAnalyzer` |
118
+ | `django` | `django` — `RuntimeNarrativeDjangoMiddleware` / `SyncMiddleware` |
119
+ | `celery` | `celery` — `NarrativeTask`, `connect_narrative` |
120
+ | `grpc` | `grpcio` — `RuntimeNarrativeInterceptor` / `AsyncInterceptor` |
121
+ | `all` | Everything above |
122
+
123
+ ```bash
124
+ pip install "runtime-narrative[console,fastapi,anthropic]"
125
+ pip install "runtime-narrative[all]"
126
+ ```
127
+
128
+ ---
129
+
130
+ ## Quick start
131
+
132
+ ### Sync
133
+
134
+ ```python
135
+ from runtime_narrative import story, stage
136
+
137
+ with story("Import Customers"):
138
+ with stage("Load CSV"):
139
+ rows = load_csv("customers.csv")
140
+
141
+ with stage("Validate Data"):
142
+ validate(rows)
143
+
144
+ with stage("Insert Records"):
145
+ db.insert(rows)
146
+ ```
147
+
148
+ `ConsoleRenderer` is the default. On failure it prints the exact frame, a source snippet, the exception chain, and a compressed stack summary — no configuration needed.
149
+
150
+ ### Async
151
+
152
+ ```python
153
+ import asyncio
154
+ from runtime_narrative import story, stage
155
+
156
+ async def run():
157
+ async with story("Sync Pipeline"):
158
+ async with stage("Fetch Orders"):
159
+ orders = await fetch_orders()
160
+
161
+ async with stage("Process Orders"):
162
+ await process(orders)
163
+
164
+ asyncio.run(run())
165
+ ```
166
+
167
+ `story` and `stage` are dual sync/async context managers — use `with` or `async with` interchangeably.
168
+
169
+ ### Track progress
170
+
171
+ Declare the total stage count upfront so `progress_percent` is accurate at every stage boundary:
172
+
173
+ ```python
174
+ with story("Import Customers", total_stages=3) as runtime:
175
+ with stage("Load CSV"): ... # 33%
176
+ with stage("Validate"): ... # 66%
177
+ with stage("Insert"): ... # 100%
178
+ ```
179
+
180
+ Or set it dynamically after the story starts:
181
+
182
+ ```python
183
+ with story("Batch Job") as runtime:
184
+ items = fetch_batch()
185
+ runtime.set_total_stages(len(items))
186
+ for item in items:
187
+ with stage(f"Process {item.id}"): ...
188
+ ```
189
+
190
+ ---
191
+
192
+ ## Decorators
193
+
194
+ Wrap functions without restructuring call sites. The library detects `async def` automatically:
195
+
196
+ ```python
197
+ from runtime_narrative import runtime_narrative_story, runtime_narrative_stage
198
+
199
+ @runtime_narrative_stage("Load CSV")
200
+ def load_csv() -> list[str]:
201
+ return open("customers.csv").read().splitlines()
202
+
203
+ @runtime_narrative_stage("Insert Records")
204
+ def insert(rows: list[str]) -> None:
205
+ db.insert_many(rows)
206
+
207
+ @runtime_narrative_story("Import Customers")
208
+ def run() -> None:
209
+ rows = load_csv()
210
+ insert(rows)
211
+ ```
212
+
213
+ `@runtime_narrative_story` accepts the same keyword arguments as `story()`: `renderers`, `failure_analyzer`, `background_analysis`, `diagnostics_config`, `failure_diagnostics`, `allow_rich_in_production`, `app_roots`, `redact_extra`, `total_stages`, `dry_run`.
214
+
215
+ ---
216
+
217
+ ## Auto-instrumentation
218
+
219
+ Instrument an entire class or module without wrapping every function individually.
220
+
221
+ ### `@narrative_class`
222
+
223
+ Every public instance method becomes a stage. Stage names are `ClassName.method_name`:
224
+
225
+ ```python
226
+ from runtime_narrative import narrative_class, no_stage, story
227
+
228
+ @narrative_class
229
+ class OrderService:
230
+ def validate(self, order: dict) -> None: ... # → "OrderService.validate"
231
+ def charge(self, order: dict) -> str: ... # → "OrderService.charge"
232
+ def fulfill(self, order: dict) -> str: ... # → "OrderService.fulfill"
233
+
234
+ @no_stage
235
+ def _log(self, msg: str) -> None: ... # excluded
236
+
237
+ svc = OrderService()
238
+ with story("Process Order", total_stages=3):
239
+ svc.validate(order)
240
+ svc.charge(order)
241
+ svc.fulfill(order)
242
+ ```
243
+
244
+ Skipped by default: names starting with `_`, `@no_stage`-marked methods, `@property`, `@staticmethod`, `@classmethod`, and inherited methods. Opt in to class and static methods explicitly:
245
+
246
+ ```python
247
+ @narrative_class(instrument_classmethods=True, instrument_staticmethods=True)
248
+ class Factory:
249
+ @classmethod
250
+ def create(cls): ... # → "Factory.create"
251
+
252
+ @staticmethod
253
+ def validate(x): ... # → "Factory.validate"
254
+
255
+ @classmethod
256
+ @narrative_stage("Build Widget")
257
+ def build(cls): ... # → "Build Widget" (custom name)
258
+ ```
259
+
260
+ ### `@narrative_stage`
261
+
262
+ Override the stage name on a single method or standalone function. Prevents double-wrapping when used inside `@narrative_class`:
263
+
264
+ ```python
265
+ from runtime_narrative import narrative_class, narrative_stage
266
+
267
+ @narrative_class
268
+ class ReportService:
269
+ @narrative_stage("Generate PDF Report")
270
+ def generate(self, data: dict) -> bytes: ... # custom name
271
+
272
+ def archive(self, report: bytes) -> None: ... # "ReportService.archive" (default)
273
+ ```
274
+
275
+ When `name` is omitted, the function name is title-cased: `validate_order` → `"Validate Order"`.
276
+
277
+ ### `@no_stage`
278
+
279
+ Exclude any method or function from all auto-instrumentation:
280
+
281
+ ```python
282
+ from runtime_narrative import no_stage
283
+
284
+ @no_stage
285
+ def _cache_lookup(key: str): ...
286
+ ```
287
+
288
+ ### `instrument_module`
289
+
290
+ Instrument all public callables in an existing module in-place. Classes get `@narrative_class`; top-level functions are wrapped directly. Imported symbols are not touched:
291
+
292
+ ```python
293
+ import runtime_narrative
294
+ import myapp.services as svc
295
+
296
+ runtime_narrative.instrument_module(svc)
297
+ ```
298
+
299
+ ### `auto_instrument`
300
+
301
+ Register a `sys.meta_path` import hook that instruments every app module on import. Only modules whose source path is under `app_roots` (default: `cwd`) are affected — stdlib and installed packages are unaffected:
302
+
303
+ ```python
304
+ import runtime_narrative
305
+
306
+ finder = runtime_narrative.auto_instrument()
307
+
308
+ # All subsequent imports of app modules are instrumented automatically.
309
+ from myapp.services import OrderService
310
+
311
+ # Stop at any point:
312
+ import sys
313
+ sys.meta_path.remove(finder)
314
+ ```
315
+
316
+ Pin to specific directories:
317
+
318
+ ```python
319
+ runtime_narrative.auto_instrument(app_roots=["/app/src", "/app/workers"])
320
+ ```
321
+
322
+ ---
323
+
324
+ ## Failure diagnostics
325
+
326
+ ### Lean vs rich mode
327
+
328
+ | Mode | What is captured |
329
+ |---|---|
330
+ | `lean` (default) | Primary frame, source snippet (±2 lines), exception chain, compressed stack summary |
331
+ | `rich` | Everything in lean + local variable values for up to 2 frames, with automatic secret redaction |
332
+
333
+ ```bash
334
+ RUNTIME_NARRATIVE_FAILURE_DIAGNOSTICS=rich python app.py
335
+ ```
336
+
337
+ Or per-story:
338
+
339
+ ```python
340
+ with story("Import", failure_diagnostics="rich"):
341
+ ...
342
+ ```
343
+
344
+ ### Production safeguards
345
+
346
+ When `RUNTIME_NARRATIVE_ENV=production`:
347
+ - Tracebacks are capped at 8 000 characters.
348
+ - `rich` mode is silently downgraded to `lean` to prevent local variable leakage in logs.
349
+
350
+ Override the downgrade when needed:
351
+
352
+ ```python
353
+ with story("Debug", failure_diagnostics="rich", allow_rich_in_production=True):
354
+ ...
355
+ ```
356
+
357
+ ### Secret redaction
358
+
359
+ Rich mode automatically redacts local variables whose key names contain any of: `password`, `secret`, `token`, `api_key`, `apikey`, `authorization`, `cookie`, `session`, `credential`.
360
+
361
+ Extend the list with project-specific names:
362
+
363
+ ```python
364
+ with story("Import", failure_diagnostics="rich", redact_extra=["internal_id", "org_key"]):
365
+ ...
366
+ ```
367
+
368
+ For more expressive rules, use `FailureDiagnosticsConfig`:
369
+
370
+ ```python
371
+ from runtime_narrative import FailureDiagnosticsConfig
372
+
373
+ config = FailureDiagnosticsConfig(
374
+ failure_diagnostics="rich",
375
+ redact_patterns=("^internal_.*", r"\bpii\b"), # regex, case-insensitive re.search
376
+ redact_callback=lambda key: key.startswith("priv_"),
377
+ )
378
+
379
+ with story("Import", diagnostics_config=config):
380
+ ...
381
+ ```
382
+
383
+ `redact_callback` exceptions are caught and treated as non-redact. Both `redact_patterns` and `redact_callback` are available on `FailureDiagnosticsConfig` and flow through `merge()`.
384
+
385
+ ### Full `FailureDiagnosticsConfig` reference
386
+
387
+ ```python
388
+ from runtime_narrative import FailureDiagnosticsConfig
389
+
390
+ config = FailureDiagnosticsConfig(
391
+ runtime_environment="production", # "development" | "production"
392
+ failure_diagnostics="lean", # "lean" | "rich"
393
+ allow_rich_in_production=False,
394
+ app_roots=("/app/src",), # paths used for primary frame selection
395
+ redact_extra=("internal_id",), # additional substring matches
396
+ redact_patterns=("^priv_",), # regex patterns (case-insensitive)
397
+ redact_callback=lambda k: k.endswith("_key"),
398
+ max_traceback_chars=12_000, # development cap (None = unlimited)
399
+ production_traceback_cap=8_000,
400
+ max_locals_per_frame=12,
401
+ max_local_value_len=200,
402
+ max_local_depth=2,
403
+ max_frames_with_locals=2,
404
+ snippet_context_lines=2,
405
+ )
406
+
407
+ with story("Import", diagnostics_config=config):
408
+ ...
409
+ ```
410
+
411
+ `FailureDiagnosticsConfig.from_env()` reads the standard environment variables. `FailureDiagnosticsConfig.merge(base, **overrides)` layers per-story overrides on a base config.
412
+
413
+ ---
414
+
415
+ ## Failure analyzers
416
+
417
+ Analyzers receive structured failure data and return a diagnosis string that is attached to `FailureOccurred` and displayed by renderers. All analyzers are optional — if the endpoint is unreachable, your exception propagates normally.
418
+
419
+ ### `OllamaFailureAnalyzer`
420
+
421
+ Calls a local Ollama instance or any `/api/generate`-compatible endpoint:
422
+
423
+ ```python
424
+ from runtime_narrative import OllamaFailureAnalyzer, story
425
+
426
+ analyzer = OllamaFailureAnalyzer(
427
+ model="llama3",
428
+ endpoint="http://127.0.0.1:11434/api/generate", # default
429
+ timeout_seconds=60.0,
430
+ max_context_chars=8000,
431
+ )
432
+
433
+ with story("Import Customers", failure_analyzer=analyzer):
434
+ ...
435
+ ```
436
+
437
+ ### `LLMFailureAnalyzer`
438
+
439
+ Calls any OpenAI-compatible `/v1/chat/completions` endpoint (OpenAI, vLLM, llama.cpp, LM Studio, Ollama OpenAI mode):
440
+
441
+ ```python
442
+ from runtime_narrative import LLMFailureAnalyzer
443
+
444
+ analyzer = LLMFailureAnalyzer(
445
+ model="gpt-4o-mini",
446
+ endpoint="https://api.openai.com/v1/chat/completions",
447
+ api_key="sk-...",
448
+ max_context_chars=8000,
449
+ )
450
+ ```
451
+
452
+ ### `AnthropicFailureAnalyzer` (`[anthropic]` extra)
453
+
454
+ Uses the Anthropic Claude API. Reads `ANTHROPIC_API_KEY` from the environment by default:
455
+
456
+ ```python
457
+ from runtime_narrative import AnthropicFailureAnalyzer
458
+
459
+ analyzer = AnthropicFailureAnalyzer(
460
+ model="claude-haiku-4-5-20251001", # default
461
+ api_key="sk-ant-...", # or set ANTHROPIC_API_KEY
462
+ max_tokens=1024,
463
+ timeout_seconds=30.0,
464
+ )
465
+ ```
466
+
467
+ Both `analyze_failure()` (sync) and `analyze_failure_async()` (async) are implemented. The async path uses `anthropic.AsyncAnthropic` natively — no thread offloading.
468
+
469
+ ### Structured output
470
+
471
+ All analyzers request a JSON response (`exact_why`, `evidence`, `targeted_fix`, `code_changes`) and format it into guaranteed `## Header\ncontent` sections. Falls back to raw text when the model returns non-JSON.
472
+
473
+ ### Context budget
474
+
475
+ Tracebacks are trimmed from the top (keeping the most recent frames) when the prompt would exceed `max_context_chars`. If the budget is exhausted before any traceback fits, a `<traceback omitted>` marker is used:
476
+
477
+ ```python
478
+ analyzer = LLMFailureAnalyzer(model="llama3", max_context_chars=4000)
479
+ ```
480
+
481
+ ### `DeduplicatingAnalyzer`
482
+
483
+ Wraps any analyzer with an LRU cache keyed by `SHA-256(error_type, filename, lineno, exception_chain)`. Prevents redundant LLM calls for the same recurring error. `None` results (timeouts, network errors) are never cached:
484
+
485
+ ```python
486
+ from runtime_narrative import DeduplicatingAnalyzer, AnthropicFailureAnalyzer
487
+
488
+ analyzer = DeduplicatingAnalyzer(
489
+ AnthropicFailureAnalyzer(),
490
+ max_cache_size=256,
491
+ )
492
+ ```
493
+
494
+ Thread-safe; async-aware (delegates to `analyze_failure_async` when available).
495
+
496
+ ### Background analysis
497
+
498
+ `background_analysis=True` emits `FailureOccurred` immediately (with `llm_analysis=None`), then runs the LLM as a background `asyncio.Task`. When the task completes, `LLMAnalysisReady` is emitted:
499
+
500
+ ```python
501
+ async with story(
502
+ "Process Order",
503
+ failure_analyzer=analyzer,
504
+ background_analysis=True,
505
+ ):
506
+ ...
507
+ # Response is not delayed by LLM latency.
508
+ # LLMAnalysisReady fires when analysis is ready.
509
+ ```
510
+
511
+ ### `FailureAnalyzer` protocol
512
+
513
+ All built-in analyzers satisfy the `@runtime_checkable` `FailureAnalyzer` protocol. Use it to type-check custom analyzers:
514
+
515
+ ```python
516
+ from runtime_narrative import FailureAnalyzer
517
+
518
+ assert isinstance(my_analyzer, FailureAnalyzer)
519
+ ```
520
+
521
+ ---
522
+
523
+ ## Renderers
524
+
525
+ A renderer is any object with `handle(self, event: object) -> None` (sync) or `async def handle(self, event: object) -> None` (async). Pass a list to `story()`, middleware, or a decorator. Async renderers are awaited inside `async with story(...)` including for stage events.
526
+
527
+ Renderers never crash a story — if a renderer raises, a warning is printed to stderr and the next renderer continues.
528
+
529
+ ### `ConsoleRenderer` (default)
530
+
531
+ Colored terminal output for local development. Falls back to `print` and ASCII glyphs (`>`, `[ok]`, `[FAIL]`) when `typer` is absent or the terminal is not UTF-8 (e.g. Windows cp1252). Every line is tagged with a `[short_id]` (first 6 characters of `story_id`), color-coded per story family, and indented by nesting depth — see [Sub-stories and log capture](#sub-stories-and-log-capture) for how this looks with nested stages and sub-stories:
532
+
533
+ ```python
534
+ from runtime_narrative import ConsoleRenderer
535
+
536
+ with story("My Pipeline", renderers=[ConsoleRenderer()]):
537
+ ...
538
+ ```
539
+
540
+ ### `JsonRenderer`
541
+
542
+ One structured JSON object per lifecycle event. Suitable for log aggregators (Datadog, CloudWatch, Loki):
543
+
544
+ ```python
545
+ from runtime_narrative import JsonRenderer
546
+
547
+ with story("My Pipeline", renderers=[JsonRenderer()]):
548
+ ...
549
+
550
+ # Write to a file
551
+ with story("My Pipeline", renderers=[JsonRenderer(output=open("stories.jsonl", "a"))]):
552
+ ...
553
+ ```
554
+
555
+ On failure, `FailureOccurred` carries the full diagnostics payload: exact location, stack frame classifications, source snippet, local variables (rich mode), compressed stack summary, and traceback text.
556
+
557
+ ### `RotatingJsonRenderer`
558
+
559
+ Same as `JsonRenderer` but writes to a rotating log file using `path.1` / `path.2` semantics. No external dependencies:
560
+
561
+ ```python
562
+ from runtime_narrative import RotatingJsonRenderer
563
+
564
+ renderer = RotatingJsonRenderer(
565
+ "stories.jsonl",
566
+ max_bytes=10_485_760, # rotate at 10 MB (default)
567
+ backup_count=5, # keep .1 through .5 (default)
568
+ )
569
+ ```
570
+
571
+ ### `HtmlReportRenderer`
572
+
573
+ Writes a self-contained HTML report on `StoryCompleted`. Includes a story header, per-stage duration bar chart, and a failure detail section with traceback and optional LLM analysis:
574
+
575
+ ```python
576
+ from runtime_narrative import HtmlReportRenderer
577
+
578
+ with story("ETL Run", renderers=[HtmlReportRenderer("report.html", open_browser=True)]):
579
+ ...
580
+ ```
581
+
582
+ `open_browser=True` calls `webbrowser.open()` on the generated file after writing.
583
+
584
+ ### `SqliteStoryRenderer`
585
+
586
+ Persists all six lifecycle events to a SQLite database. No extra dependencies. See [Persistence and CLI](#persistence-and-cli):
587
+
588
+ ```python
589
+ from runtime_narrative import SqliteStoryRenderer
590
+
591
+ with story("Nightly ETL", renderers=[SqliteStoryRenderer("stories.db")]):
592
+ ...
593
+ ```
594
+
595
+ ### `OtelRenderer` (`[otel]` extra)
596
+
597
+ Maps narrative events to OpenTelemetry spans. Each story is a root span; each stage is a child span:
598
+
599
+ ```python
600
+ from runtime_narrative import OtelRenderer
601
+
602
+ renderer = OtelRenderer(
603
+ exclude_stages={"Health Check"}, # never create child spans for these stages
604
+ min_duration_ms=5.0, # suppress spans shorter than 5 ms
605
+ max_attribute_length=8192, # truncate long string attributes (default)
606
+ )
607
+ ```
608
+
609
+ Attributes on failure spans: `error.type`, `error.message`, `code.filepath`, `code.lineno`, `code.function`, `error.stack_trace`, `narrative.exception_chain`. `LLMAnalysisReady` adds a `llm_analysis_ready` span event with `narrative.llm_analysis`.
610
+
611
+ `exclude_stages` stages that fail still mark the root span `ERROR` — only the child span is suppressed.
612
+
613
+ ### `OtelLogRenderer` (`[otel]` extra)
614
+
615
+ Emits all six lifecycle events as OpenTelemetry log records via `opentelemetry._logs`:
616
+
617
+ | Event | Severity |
618
+ |---|---|
619
+ | `StoryStarted`, `StoryCompleted`, `LLMAnalysisReady` | `INFO` |
620
+ | `StageStarted`, `StageCompleted` | `DEBUG` |
621
+ | `FailureOccurred` | `ERROR` with `error.type`, `code.filepath`, `error.stack_trace`, etc. |
622
+
623
+ Automatically correlates `trace_id`/`span_id` from the ambient OTel context so logs link to their enclosing spans:
624
+
625
+ ```python
626
+ from runtime_narrative import OtelLogRenderer
627
+
628
+ renderer = OtelLogRenderer(logger_name="my_app")
629
+ ```
630
+
631
+ ### `OtelMetricsRenderer` (`[otel]` extra)
632
+
633
+ Emits four OTel instruments via the OpenTelemetry Metrics API:
634
+
635
+ | Instrument | Type | Labels |
636
+ |---|---|---|
637
+ | `narrative.stage.duration` | Histogram (s) | `story_name`, `stage_name` |
638
+ | `narrative.story.duration` | Histogram (s) | `story_name`, `success` |
639
+ | `narrative.story.failures` | Counter | `story_name`, `error_type` |
640
+ | `narrative.llm.analysis_latency` | Histogram (s) | `story_name` |
641
+
642
+ `narrative.llm.analysis_latency` measures the time between `FailureOccurred` and `LLMAnalysisReady` (only recorded when `background_analysis=True`):
643
+
644
+ ```python
645
+ from runtime_narrative import OtelMetricsRenderer
646
+
647
+ renderer = OtelMetricsRenderer(meter_name="my_app")
648
+ ```
649
+
650
+ ### `PrometheusRenderer` (`[prometheus]` extra)
651
+
652
+ Exposes four Prometheus metrics:
653
+
654
+ | Metric | Type | Labels |
655
+ |---|---|---|
656
+ | `narrative_story_duration_seconds` | Histogram | `story_name`, `success` |
657
+ | `narrative_stage_duration_seconds` | Histogram | `story_name`, `stage_name` |
658
+ | `narrative_story_failures_total` | Counter | `story_name`, `error_type` |
659
+ | `narrative_story_total` | Counter | `story_name`, `success` |
660
+
661
+ ```python
662
+ from runtime_narrative import PrometheusRenderer
663
+ from prometheus_client import CollectorRegistry, start_http_server
664
+
665
+ registry = CollectorRegistry()
666
+ renderer = PrometheusRenderer(registry=registry)
667
+ start_http_server(8000, registry=registry)
668
+ ```
669
+
670
+ ### Combining renderers
671
+
672
+ ```python
673
+ from runtime_narrative import story, JsonRenderer, SqliteStoryRenderer, OtelRenderer
674
+
675
+ with story("Nightly ETL", renderers=[
676
+ JsonRenderer(),
677
+ SqliteStoryRenderer("stories.db"),
678
+ OtelRenderer(),
679
+ ]):
680
+ ...
681
+ ```
682
+
683
+ ---
684
+
685
+ ## Framework integrations
686
+
687
+ ### FastAPI / Starlette
688
+
689
+ `RuntimeNarrativeMiddleware` wraps every HTTP request in `async with story(...)`. Route handlers only need to declare stages — no `story()` context required in handlers:
690
+
691
+ ```python
692
+ from fastapi import FastAPI
693
+ from runtime_narrative import RuntimeNarrativeMiddleware, JsonRenderer, AnthropicFailureAnalyzer
694
+
695
+ app = FastAPI()
696
+ app.add_middleware(
697
+ RuntimeNarrativeMiddleware,
698
+ renderers=[JsonRenderer()],
699
+ failure_analyzer=AnthropicFailureAnalyzer(),
700
+ runtime_environment="production",
701
+ )
702
+
703
+ @app.post("/orders")
704
+ async def create_order(payload: OrderIn):
705
+ with stage("Validate Input"):
706
+ validate(payload)
707
+ with stage("Persist Order"):
708
+ order = await db.insert(payload)
709
+ return {"id": order.id}
710
+ ```
711
+
712
+ Each request becomes a story named `"METHOD /path"` (e.g. `"POST /orders"`). When `renderers` is not passed, the middleware auto-selects `ConsoleRenderer` on a TTY and `JsonRenderer` otherwise.
713
+
714
+ When `opentelemetry-api` is installed, the middleware automatically extracts incoming W3C `traceparent`/`tracestate` headers so story spans become children of the upstream trace:
715
+
716
+ ```python
717
+ app.add_middleware(
718
+ RuntimeNarrativeMiddleware,
719
+ propagate_trace_context=True, # default; set False to disable
720
+ )
721
+ ```
722
+
723
+ Use `skip_if` to bypass story wrapping for specific routes (health checks, readiness probes, etc.):
724
+
725
+ ```python
726
+ app.add_middleware(
727
+ RuntimeNarrativeMiddleware,
728
+ renderers=[JsonRenderer()],
729
+ skip_if=lambda req: req.url.path in {"/health", "/ready"},
730
+ )
731
+ ```
732
+
733
+ Run: `uv run python examples/middleware_skip_if.py`
734
+
735
+ ### Django
736
+
737
+ **ASGI (async):**
738
+
739
+ ```python
740
+ # settings.py
741
+ MIDDLEWARE = [
742
+ "runtime_narrative.middleware_django.RuntimeNarrativeDjangoMiddleware",
743
+ ...
744
+ ]
745
+ ```
746
+
747
+ **WSGI (sync):**
748
+
749
+ ```python
750
+ # settings.py
751
+ MIDDLEWARE = [
752
+ "runtime_narrative.middleware_django.RuntimeNarrativeDjangoSyncMiddleware",
753
+ ...
754
+ ]
755
+ ```
756
+
757
+ Story name is `"METHOD /path"`. Requires `pip install "runtime-narrative[django]"`.
758
+
759
+ ### Celery
760
+
761
+ ```python
762
+ from celery import Celery
763
+ from runtime_narrative import NarrativeTask, connect_narrative, JsonRenderer
764
+
765
+ app = Celery("myapp")
766
+
767
+ # Option A — per task
768
+ @app.task(base=NarrativeTask)
769
+ def process_order(order_id: str) -> None:
770
+ with stage("Validate"): validate(order_id)
771
+ with stage("Charge"): charge(order_id)
772
+
773
+ # Option B — set defaults for all tasks globally
774
+ connect_narrative(
775
+ app,
776
+ renderers=[JsonRenderer()],
777
+ failure_analyzer=AnthropicFailureAnalyzer(),
778
+ )
779
+ ```
780
+
781
+ Story name is `"<task.name> [task_id=<id>]"`. Override options per task by setting `narrative_*` class attributes directly. Requires `pip install "runtime-narrative[celery]"`.
782
+
783
+ ### gRPC
784
+
785
+ ```python
786
+ import grpc
787
+ from runtime_narrative import RuntimeNarrativeAsyncInterceptor, JsonRenderer
788
+
789
+ # Async server
790
+ server = grpc.aio.server(
791
+ interceptors=[RuntimeNarrativeAsyncInterceptor(renderers=[JsonRenderer()])],
792
+ )
793
+
794
+ # Sync server
795
+ from runtime_narrative import RuntimeNarrativeInterceptor
796
+ server = grpc.server(
797
+ thread_pool,
798
+ interceptors=[RuntimeNarrativeInterceptor(renderers=[JsonRenderer()])],
799
+ )
800
+ ```
801
+
802
+ Story name is the full gRPC method path, e.g. `"/mypackage.MyService/MyMethod"`. Requires `pip install "runtime-narrative[grpc]"`.
803
+
804
+ ---
805
+
806
+ ## Async task groups
807
+
808
+ `NarrativeTaskGroup` runs concurrent `asyncio` tasks under a shared story. Tasks inherit the parent story context automatically via `ContextVar` copy, so `stage()` calls inside tasks are tracked normally:
809
+
810
+ ```python
811
+ import asyncio
812
+ from runtime_narrative import story, NarrativeTaskGroup, NarrativeTaskGroupError
813
+
814
+ async def main():
815
+ async with story("Parallel Pipeline"):
816
+ async with NarrativeTaskGroup() as tg:
817
+ tg.create_task(fetch_orders(), name="Fetch Orders")
818
+ tg.create_task(fetch_inventory(), name="Fetch Inventory")
819
+ # waits for both; stages from both appear in the timeline
820
+
821
+ asyncio.run(main())
822
+ ```
823
+
824
+ If tasks fail, `NarrativeTaskGroupError` is raised with `failed_tasks: dict[str, BaseException]`:
825
+
826
+ ```python
827
+ try:
828
+ async with NarrativeTaskGroup() as tg:
829
+ tg.create_task(risky_job(), name="Risky Job")
830
+ except NarrativeTaskGroupError as e:
831
+ for task_name, exc in e.failed_tasks.items():
832
+ print(f"{task_name} failed: {exc}")
833
+ ```
834
+
835
+ No extra dependencies. Python 3.9+.
836
+
837
+ ---
838
+
839
+ ## Persistence and CLI
840
+
841
+ `SqliteStoryRenderer` records all six lifecycle events to a local SQLite database. No extra dependencies:
842
+
843
+ ```python
844
+ from runtime_narrative import story, SqliteStoryRenderer
845
+
846
+ with story("Nightly ETL", renderers=[SqliteStoryRenderer("stories.db")]):
847
+ ...
848
+ ```
849
+
850
+ **Schema:**
851
+
852
+ | Table | Key columns |
853
+ |---|---|
854
+ | `stories` | `story_id`, `name`, `success`, `duration_seconds`, `started_at`, `completed_at` |
855
+ | `stages` | `story_id`, `stage_name`, `stage_index`, `parent_stage_name`, `duration_seconds`, `completed`, `failed` |
856
+ | `failures` | `story_id`, `stage_name`, `error_type`, `error_message`, `filename`, `lineno`, `traceback_text`, `llm_analysis` |
857
+
858
+ `LLMAnalysisReady` back-fills the `llm_analysis` column in `failures` so background analysis results are persisted even when they arrive after `StoryCompleted`.
859
+
860
+ **CLI** (registered as `runtime-narrative`):
861
+
862
+ ```bash
863
+ # List the 10 most recent failures
864
+ runtime-narrative failures --db stories.db
865
+
866
+ # Filter by stage name or story name (LIKE pattern)
867
+ runtime-narrative failures --last 25 --stage "Insert Records"
868
+ runtime-narrative failures --story "Nightly ETL"
869
+
870
+ # Show the full detail for one story
871
+ runtime-narrative story <story_id> --db stories.db
872
+ ```
873
+
874
+ The `--db` flag defaults to `runtime_narrative.db` in the current directory.
875
+
876
+ ---
877
+
878
+ ## Alert routing
879
+
880
+ `AlertRoutingRenderer` fans out `FailureOccurred` events to webhook destinations concurrently. Destination failures are logged to stderr and swallowed — they never crash the story:
881
+
882
+ ```python
883
+ from runtime_narrative import (
884
+ story,
885
+ AlertRoutingRenderer,
886
+ SlackWebhookDestination,
887
+ HttpWebhookDestination,
888
+ )
889
+
890
+ renderer = AlertRoutingRenderer(
891
+ destinations=[
892
+ SlackWebhookDestination("https://hooks.slack.com/services/..."),
893
+ HttpWebhookDestination(
894
+ "https://alerts.example.com/webhook",
895
+ headers={"Authorization": "Bearer ..."},
896
+ timeout=5.0,
897
+ ),
898
+ ],
899
+ only_stories={"Nightly ETL", "Payment Processor"}, # None = all stories
900
+ only_error_types={"ValueError", "TimeoutError"}, # None = all error types
901
+ )
902
+
903
+ async with story("Nightly ETL", renderers=[renderer]):
904
+ ...
905
+ ```
906
+
907
+ `SlackWebhookDestination` sends a Block Kit message with a header, error detail section, and an optional analysis section when `llm_analysis` is present.
908
+
909
+ `HttpWebhookDestination` POSTs a JSON payload containing: `story_id`, `story_name`, `stage_name`, `error_type`, `error_message`, `filename`, `lineno`, `function`, `llm_analysis`, `timestamp`.
910
+
911
+ ---
912
+
913
+ ## `dry_run` mode
914
+
915
+ `dry_run=True` suppresses exceptions raised inside stage bodies, marks all stages completed, and emits `StoryCompleted(success=True)`. Useful for smoke-testing instrumentation wiring without triggering real side effects:
916
+
917
+ ```python
918
+ with story("Nightly ETL", dry_run=True):
919
+ with stage("Load Warehouse"):
920
+ raise IOError("would connect to DB in prod") # suppressed
921
+ with stage("Transform"):
922
+ raise RuntimeError("would run transforms") # suppressed
923
+ # StoryCompleted(success=True) emitted for all stages
924
+ ```
925
+
926
+ ---
927
+
928
+ ## Testing utilities
929
+
930
+ `StoryRecorder` is a dual sync/async context manager that captures story events for assertions. No output is produced:
931
+
932
+ ```python
933
+ from runtime_narrative import stage
934
+ from runtime_narrative.testing import StoryRecorder
935
+
936
+ def test_pipeline_success():
937
+ with StoryRecorder("ETL") as recorder:
938
+ with stage("Load"): rows = [1, 2, 3]
939
+ with stage("Insert"): db.insert(rows)
940
+
941
+ recorder.assert_stages_completed(["Load", "Insert"])
942
+ recorder.assert_no_failure()
943
+ recorder.assert_story_completed(success=True)
944
+
945
+ def test_invalid_input_fails_at_validate():
946
+ import pytest
947
+
948
+ with pytest.raises(ValueError):
949
+ with StoryRecorder("ETL") as recorder:
950
+ with stage("Load"): pass
951
+ with stage("Validate"): raise ValueError("bad schema")
952
+
953
+ recorder.assert_stage_failed("Validate", error_type="ValueError")
954
+ recorder.assert_story_completed(success=False)
955
+ ```
956
+
957
+ Works as `async with StoryRecorder(...)` too. Pass any `story()` kwargs including `dry_run=True`:
958
+
959
+ ```python
960
+ with StoryRecorder("ETL", dry_run=True) as recorder:
961
+ run_pipeline() # side effects suppressed
962
+
963
+ recorder.assert_stages_completed(["Load", "Validate", "Insert"])
964
+ recorder.assert_no_failure()
965
+ ```
966
+
967
+ **Assertion methods:**
968
+
969
+ | Method | What it checks |
970
+ |---|---|
971
+ | `assert_stages_completed(names)` | All named stages appear in `StageCompleted` events |
972
+ | `assert_no_failure()` | No `FailureOccurred` event was emitted |
973
+ | `assert_stage_failed(name, error_type=None)` | A `FailureOccurred` event at that stage name; optionally checks `error_type` |
974
+ | `assert_story_completed(success=None)` | A `StoryCompleted` event was emitted; optionally checks the `success` flag |
975
+
976
+ ---
977
+
978
+ ## Custom renderers and analyzers
979
+
980
+ ### Custom renderer
981
+
982
+ Implement `handle(self, event: object)`. Async renderers (`async def handle`) are awaited inside `async with story(...)`:
983
+
984
+ ```python
985
+ class PagerDutyRenderer:
986
+ async def handle(self, event: object) -> None:
987
+ if type(event).__name__ == "FailureOccurred":
988
+ await pagerduty.trigger(
989
+ summary=f"{event.story_name} failed at {event.stage_name}",
990
+ details={"error": event.error_type, "analysis": event.llm_analysis},
991
+ )
992
+
993
+ async with story("Nightly ETL", renderers=[PagerDutyRenderer()]):
994
+ ...
995
+ ```
996
+
997
+ Six event types are emitted. Key fields on each:
998
+
999
+ | Event | Key fields |
1000
+ |---|---|
1001
+ | `StoryStarted` | `story_id`, `story_name`, `timestamp` |
1002
+ | `StageStarted` | `story_id`, `story_name`, `stage_name`, `timestamp`, `stage_index` (0-based), `parent_stage_name` |
1003
+ | `StageCompleted` | `story_id`, `story_name`, `stage_name`, `timestamp`, `duration_seconds`, `stage_index`, `parent_stage_name` |
1004
+ | `FailureOccurred` | `story_id`, `story_name`, `stage_name`, `error_type`, `error_message`, `filename`, `lineno`, `function`, `traceback_text`, `exception_chain`, `exact_cause`, `stage_timeline`, `progress_percent`, `llm_analysis`, `diagnostics_mode`, `stack_frames`, `source_snippet`, `compressed_stack_summary`, `locals_by_frame` |
1005
+ | `StoryCompleted` | `story_id`, `story_name`, `success`, `progress_percent`, `completed_stages`, `total_stages`, `timestamp` |
1006
+ | `LLMAnalysisReady` | `story_id`, `story_name`, `stage_name`, `llm_analysis`, `timestamp` |
1007
+
1008
+ `parent_stage_name` is `None` for top-level stages and set to the enclosing stage name for nested stages. `story_name` on stage events lets a renderer filter by story without a `story_id → story_name` side table (run: `uv run python examples/stage_story_name.py`).
1009
+
1010
+ ### Custom failure analyzer
1011
+
1012
+ Implement `analyze_failure(...)`. Add `analyze_failure_async(...)` for native async — otherwise the sync method is called via `asyncio.to_thread`:
1013
+
1014
+ ```python
1015
+ class MyAnalyzer:
1016
+ async def analyze_failure_async(
1017
+ self,
1018
+ *,
1019
+ story_name: str,
1020
+ stage_name: str,
1021
+ failure, # FailureSummary: .error_type, .error_message, .filename,
1022
+ # .lineno, .function, .source_line,
1023
+ # .traceback_text, .exception_chain
1024
+ stage_timeline: str,
1025
+ progress_percent: int,
1026
+ ) -> str | None:
1027
+ result = await my_llm_client.complete(build_prompt(failure))
1028
+ return result.text
1029
+
1030
+ def analyze_failure(self, *, story_name, stage_name, failure, stage_timeline, progress_percent):
1031
+ # sync fallback used when called from sync story()
1032
+ return requests.post(...).json()["text"]
1033
+
1034
+ with story("Import", failure_analyzer=MyAnalyzer()):
1035
+ ...
1036
+ ```
1037
+
1038
+ ---
1039
+
1040
+ ## Utilities
1041
+
1042
+ ### `has_active_story()`
1043
+
1044
+ Returns `True` when a `story()` context is active in the current sync or async context. Useful for library code that should behave differently when called under instrumentation:
1045
+
1046
+ ```python
1047
+ from runtime_narrative import has_active_story
1048
+
1049
+ def send_email(to: str, body: str) -> None:
1050
+ if has_active_story():
1051
+ # stage() is safe here
1052
+ with stage("Send Email", optional=True):
1053
+ _send(to, body)
1054
+ else:
1055
+ _send(to, body)
1056
+ ```
1057
+
1058
+ ### `stage(optional=True)`
1059
+
1060
+ When `optional=True`, a `stage()` outside an active story is a no-op — no exception, no events, no tracking. When inside a story it behaves normally. Ideal for shared utilities:
1061
+
1062
+ ```python
1063
+ from runtime_narrative import stage
1064
+
1065
+ def enrich_record(record: dict) -> dict:
1066
+ with stage("Enrich Record", optional=True):
1067
+ return _lookup(record)
1068
+ return record # reached only when no story active
1069
+ ```
1070
+
1071
+ Run: `uv run python examples/optional_stage.py`
1072
+
1073
+ ### `StoryRuntime.record_failure()`
1074
+
1075
+ Emits a `FailureOccurred` event (with full diagnostics) without owning exception propagation. Use this in saga/rollback flows where a compensating action fails but you want the story to complete successfully:
1076
+
1077
+ ```python
1078
+ async with story("Payment Saga", renderers=[JsonRenderer()]) as runtime:
1079
+ async with stage("Charge Card"):
1080
+ charge_id = await charge(order)
1081
+
1082
+ try:
1083
+ async with stage("Reserve Inventory"):
1084
+ await reserve(order)
1085
+ except InventoryError as exc:
1086
+ async with stage("Refund Charge"):
1087
+ await refund(charge_id)
1088
+ await runtime.record_failure(exc, stage_name="Reserve Inventory")
1089
+ # FailureOccurred emitted; story still completes success=True
1090
+ ```
1091
+
1092
+ Run: `uv run python examples/saga_record_failure.py`
1093
+
1094
+ ---
1095
+
1096
+ ## Sub-stories and log capture
1097
+
1098
+ ### Sub-stories
1099
+
1100
+ Opening a `story()` while another is already active (in the same sync/async context) makes it a **sub-story**: it inherits the parent's `renderers`, `diagnostics_config`, and `failure_analyzer` unless you pass your own, and its `StoryStarted`/`StoryCompleted`/`FailureOccurred` events carry `parent_story_id` and `root_story_id` so the whole call tree can be reconstructed from events alone — no new API, no tree data structure to maintain yourself:
1101
+
1102
+ ```python
1103
+ async def create_order(payload):
1104
+ async with story(f"POST /orders") as api_story:
1105
+ async with stage("Persist Order"):
1106
+ # Same story() primitive. Because api_story is already active,
1107
+ # this becomes a sub-story: parent_story_id == api_story.story_id,
1108
+ # root_story_id == api_story.story_id (or further up if api_story
1109
+ # is itself nested), and renderers/diagnostics are inherited.
1110
+ async with story("DB: INSERT orders") as db_story:
1111
+ async with stage("Execute Query"):
1112
+ await conn.execute("INSERT INTO orders ...")
1113
+ ```
1114
+
1115
+ Each sub-story succeeds or fails independently (a failed DB call doesn't automatically fail the API story unless the exception propagates or you call `record_failure`), and gets its own `duration_seconds` on `StoryCompleted`. `OtelRenderer` maps this to proper parent/child spans automatically.
1116
+
1117
+ Because linkage is derived from `ContextVar` state at the moment `story()` is entered — not from a shared registry — the same reusable function (e.g. a `execute_query()` helper) can be called from many different parent stories, including concurrently: `asyncio.Task` copies context at creation and each OS thread starts with a fresh top-level context, so concurrent API calls sharing one DB helper never cross-contaminate each other's story tree.
1118
+
1119
+ Run: `uv run python examples/substory_db_call.py`
1120
+
1121
+ ### `NarrativeLogHandler` — capture existing `logging` calls into a story
1122
+
1123
+ If your application already uses `logging.warning()`/`.error()`, `NarrativeLogHandler` routes those records into the same event pipeline as `story()`/`stage()` — one stream instead of two:
1124
+
1125
+ ```python
1126
+ import logging
1127
+ from runtime_narrative import NarrativeLogHandler
1128
+
1129
+ logging.getLogger().addHandler(NarrativeLogHandler(level=logging.WARNING))
1130
+ ```
1131
+
1132
+ Each captured record becomes a `LogRecorded` event (`story_id`, `root_story_id`, `stage_name`, `level`, `logger_name`, `message`, optional `exc_text`) emitted through the active story's renderers. Outside an active story, records fall through to an optional `fallback` handler so nothing is silently dropped:
1133
+
1134
+ ```python
1135
+ NarrativeLogHandler(level=logging.WARNING, fallback=logging.StreamHandler())
1136
+ ```
1137
+
1138
+ `ConsoleRenderer` prints every event (including `LogRecorded`) with a `[short_id]` tag — the first 6 characters of that event's `story_id` — so a specific call is identifiable when scanning or searching output. All events belonging to one story family (a root story and any sub-stories) additionally render in the same deterministic color, and lines are indented one level per stage/sub-story nesting depth, so the call tree (API call → DB sub-story → its own stages) is visible directly in the log output:
1139
+
1140
+ ```
1141
+ [ad8cc2] ▶ Story started: POST /orders
1142
+ [ad8cc2] ▶ Stage started: Persist Order
1143
+ [d17c63] ▶ Story started: DB: INSERT orders
1144
+ [d17c63] ▶ Stage started: Execute Query
1145
+ [d17c63] ✔ Stage completed: Execute Query (0.021s)
1146
+ [d17c63] ▶ Story ended: SUCCESS (0.034s)
1147
+ [ad8cc2] ✔ Stage completed: Persist Order (0.034s)
1148
+ [ad8cc2] ▶ Story ended: SUCCESS (0.052s)
1149
+ ```
1150
+
1151
+ Run: `uv run python examples/logging_bridge.py`
1152
+
1153
+ ---
1154
+
1155
+ ## Environment variables
1156
+
1157
+ | Variable | Values | Default | Effect |
1158
+ |---|---|---|---|
1159
+ | `RUNTIME_NARRATIVE_ENV` | `development`, `production` | `development` | Production caps tracebacks to 8 000 chars and forces lean mode |
1160
+ | `RUNTIME_NARRATIVE_FAILURE_DIAGNOSTICS` | `lean`, `rich` | `lean` | `rich` captures local variable values at the failing frames. Invalid values raise `ValueError` at story construction. |
1161
+ | `RUNTIME_NARRATIVE_ALLOW_RICH_IN_PRODUCTION` | `1`, `true` | off | Bypass production safeguard; allow rich diagnostics in production |
1162
+ | `RUNTIME_NARRATIVE_MODEL` | model name string | — | Default model for `AnthropicFailureAnalyzer`; also used by example scripts for `OllamaFailureAnalyzer` / `LLMFailureAnalyzer` |
1163
+ | `ANTHROPIC_API_KEY` | API key | — | Required by `AnthropicFailureAnalyzer`; read automatically if `api_key=` is not passed |
1164
+
1165
+ ---
1166
+
1167
+ ## Python compatibility
1168
+
1169
+ Python 3.9 – 3.13. Zero required dependencies beyond `python-dotenv`.