llmflow-core 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. llmflow_core-0.0.2/.gitignore +25 -0
  2. llmflow_core-0.0.2/PKG-INFO +347 -0
  3. llmflow_core-0.0.2/README.md +315 -0
  4. llmflow_core-0.0.2/examples/blog_pipeline/prompts/critique.md +11 -0
  5. llmflow_core-0.0.2/examples/blog_pipeline/prompts/outline.md +8 -0
  6. llmflow_core-0.0.2/examples/blog_pipeline/prompts/revise.md +12 -0
  7. llmflow_core-0.0.2/examples/blog_pipeline/schemas/critique.json +23 -0
  8. llmflow_core-0.0.2/examples/blog_pipeline/schemas/final_article.json +11 -0
  9. llmflow_core-0.0.2/examples/blog_pipeline/schemas/outline.json +14 -0
  10. llmflow_core-0.0.2/examples/blog_pipeline/tools.py +5 -0
  11. llmflow_core-0.0.2/examples/blog_pipeline/workflow.yaml +39 -0
  12. llmflow_core-0.0.2/pyproject.toml +65 -0
  13. llmflow_core-0.0.2/requirements.txt +10 -0
  14. llmflow_core-0.0.2/src/llmflow/__init__.py +45 -0
  15. llmflow_core-0.0.2/src/llmflow/artifacts.py +253 -0
  16. llmflow_core-0.0.2/src/llmflow/cli.py +188 -0
  17. llmflow_core-0.0.2/src/llmflow/errors.py +102 -0
  18. llmflow_core-0.0.2/src/llmflow/graph.py +63 -0
  19. llmflow_core-0.0.2/src/llmflow/hashing.py +11 -0
  20. llmflow_core-0.0.2/src/llmflow/providers/__init__.py +11 -0
  21. llmflow_core-0.0.2/src/llmflow/providers/base.py +93 -0
  22. llmflow_core-0.0.2/src/llmflow/providers/mock.py +54 -0
  23. llmflow_core-0.0.2/src/llmflow/registry.py +113 -0
  24. llmflow_core-0.0.2/src/llmflow/replay.py +89 -0
  25. llmflow_core-0.0.2/src/llmflow/runner.py +158 -0
  26. llmflow_core-0.0.2/src/llmflow/steps/__init__.py +16 -0
  27. llmflow_core-0.0.2/src/llmflow/steps/base.py +27 -0
  28. llmflow_core-0.0.2/src/llmflow/steps/llm.py +124 -0
  29. llmflow_core-0.0.2/src/llmflow/steps/tool.py +20 -0
  30. llmflow_core-0.0.2/src/llmflow/steps/validate.py +58 -0
  31. llmflow_core-0.0.2/src/llmflow/workflow.py +251 -0
  32. llmflow_core-0.0.2/tests/conftest.py +10 -0
  33. llmflow_core-0.0.2/tests/test_artifacts.py +135 -0
  34. llmflow_core-0.0.2/tests/test_cli.py +83 -0
  35. llmflow_core-0.0.2/tests/test_examples_blog_pipeline.py +83 -0
  36. llmflow_core-0.0.2/tests/test_graph.py +53 -0
  37. llmflow_core-0.0.2/tests/test_llm_step.py +56 -0
  38. llmflow_core-0.0.2/tests/test_providers.py +45 -0
  39. llmflow_core-0.0.2/tests/test_registry.py +43 -0
  40. llmflow_core-0.0.2/tests/test_replay.py +89 -0
  41. llmflow_core-0.0.2/tests/test_runner.py +98 -0
  42. llmflow_core-0.0.2/tests/test_tool_step.py +42 -0
  43. llmflow_core-0.0.2/tests/test_validate_step.py +58 -0
  44. llmflow_core-0.0.2/tests/test_workflow_load.py +103 -0
@@ -0,0 +1,25 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .dist-info/
6
+ .pytest_cache/
7
+ .mypy_cache/
8
+ .ruff_cache/
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+
14
+ # Build
15
+ build/
16
+ dist/
17
+
18
+ # Logs and artifacts
19
+ .runs/
20
+ *.log
21
+
22
+ # OS
23
+ .DS_Store
24
+ AGENTS.md
25
+ .coverage
@@ -0,0 +1,347 @@
1
+ Metadata-Version: 2.4
2
+ Name: llmflow-core
3
+ Version: 0.0.2
4
+ Summary: Deterministic LLM workflow engine
5
+ Project-URL: Homepage, https://github.com/ibrahim1023/llmflow-core
6
+ Project-URL: Repository, https://github.com/ibrahim1023/llmflow-core
7
+ Project-URL: Issues, https://github.com/ibrahim1023/llmflow-core/issues
8
+ Author: Ibrahim
9
+ License: MIT
10
+ Keywords: deterministic,llm,pipelines,workflow
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: jinja2>=3.1
21
+ Requires-Dist: jsonschema>=4.0
22
+ Requires-Dist: pydantic>=2.0
23
+ Requires-Dist: pyyaml>=6.0
24
+ Requires-Dist: rich>=13.0
25
+ Requires-Dist: typer>=0.9
26
+ Provides-Extra: dev
27
+ Requires-Dist: build>=1.2.2; extra == 'dev'
28
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
29
+ Requires-Dist: pytest>=7.0; extra == 'dev'
30
+ Requires-Dist: twine>=5.1.1; extra == 'dev'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # llmflow-core
34
+
35
+ Deterministic LLM workflow engine for file-defined, schema-validated pipelines.
36
+
37
+ ## Overview
38
+
39
+ `llmflow-core` executes explicit workflow DAGs from YAML and prompt files.
40
+ It is designed for predictable execution, strict output contracts, and replayable
41
+ artifacts.
42
+
43
+ Core guarantees:
44
+
45
+ - Stable topological execution order
46
+ - Fail-fast behavior on first step error
47
+ - JSON-schema validation for LLM outputs
48
+ - Run artifacts for audit and replay
49
+
50
+ ## Why this exists
51
+
52
+ Production LLM pipelines often fail on three basics:
53
+
54
+ - Step order is implicit and hard to reason about
55
+ - Outputs drift from expected structure
56
+ - Runs are difficult to reproduce and debug
57
+
58
+ `llmflow-core` addresses this with file-defined workflows, strict validation,
59
+ and deterministic run traces.
60
+
61
+ ## Installation
62
+
63
+ ```bash
64
+ python -m pip install -e .
65
+ ```
66
+
67
+ Install with test dependencies:
68
+
69
+ ```bash
70
+ python -m pip install -e .[dev]
71
+ ```
72
+
73
+ ## Quickstart
74
+ ### 1) Run with Python API
75
+
76
+ ```python
77
+ from llmflow import MockProvider, RunConfig, Runner, Workflow
78
+
79
+ workflow = Workflow.load("examples/blog_pipeline/workflow.yaml")
80
+
81
+ runner = Runner(
82
+ provider=MockProvider(default_output="{}", strict=False),
83
+ config=RunConfig(artifacts_dir=".runs", provider_name="mock"),
84
+ )
85
+
86
+ result = runner.run(
87
+ workflow,
88
+ inputs={"topic": "Deterministic AI", "audience": "Engineering managers"},
89
+ )
90
+
91
+ print(result.outputs)
92
+ print(result.run_dir)
93
+ ```
94
+
95
+ Note:
96
+
97
+ - `MockProvider(default_output=...)` returns the same JSON for every LLM step.
98
+ - If your workflow has different per-step schemas, use per-prompt mock responses
99
+ (see Example Workflow below).
100
+
101
+ ### 2) Run with CLI
102
+
103
+ ```bash
104
+ llmflow run examples/blog_pipeline/workflow.yaml \
105
+ --input topic="Deterministic AI" \
106
+ --input audience="Engineering managers" \
107
+ --mock-output '{"title":"Draft","summary":"S","body":"B"}'
108
+ ```
109
+
110
+ ### 3) Inspect and replay
111
+
112
+ ```bash
113
+ llmflow graph examples/blog_pipeline/workflow.yaml
114
+ llmflow replay .runs/run_YYYYMMDD_HHMMSS_<shortid>
115
+ ```
116
+
117
+ ## CLI
118
+
119
+ The CLI is a thin wrapper over the library API.
120
+
121
+ Commands:
122
+
123
+ - `llmflow run <workflow.yaml> --input key=value ...`
124
+ - `llmflow graph <workflow.yaml>`
125
+ - `llmflow replay <run_dir>`
126
+
127
+ Example:
128
+
129
+ ```bash
130
+ llmflow run examples/blog_pipeline/workflow.yaml \
131
+ --input topic="Deterministic AI" \
132
+ --input audience="Engineering managers" \
133
+ --mock-output '{"title":"Draft","summary":"S","body":"B"}'
134
+
135
+ llmflow graph examples/blog_pipeline/workflow.yaml
136
+ llmflow replay .runs/run_YYYYMMDD_HHMMSS_<shortid>
137
+ ```
138
+
139
+ CLI mock behavior:
140
+
141
+ - `--mock-output` and `--mock-output-file` are applied to all LLM steps in the run.
142
+ - For workflows with heterogeneous per-step schemas, prefer Python API tests with
143
+ prompt-specific mock responses.
144
+
145
+ ## Example workflow
146
+
147
+ Repository example: `examples/blog_pipeline`
148
+
149
+ Contents:
150
+
151
+ - `examples/blog_pipeline/workflow.yaml`
152
+ - `examples/blog_pipeline/prompts/outline.md`
153
+ - `examples/blog_pipeline/prompts/critique.md`
154
+ - `examples/blog_pipeline/prompts/revise.md`
155
+ - `examples/blog_pipeline/schemas/outline.json`
156
+ - `examples/blog_pipeline/schemas/critique.json`
157
+ - `examples/blog_pipeline/schemas/final_article.json`
158
+ - `examples/blog_pipeline/tools.py`
159
+
160
+ The end-to-end deterministic example run is validated by:
161
+
162
+ - `tests/test_examples_blog_pipeline.py`
163
+
164
+ ## Workflow YAML format
165
+ Minimal shape:
166
+
167
+ ```yaml
168
+ workflow:
169
+ name: blog_post_pipeline
170
+ version: "1.0"
171
+
172
+ inputs:
173
+ topic:
174
+ type: string
175
+ audience:
176
+ type: string
177
+
178
+ steps:
179
+ - id: outline
180
+ type: llm
181
+ prompt: prompts/outline.md
182
+ output_schema: schemas/outline.json
183
+ llm:
184
+ model: mock-model
185
+ temperature: 0
186
+
187
+ - id: critique
188
+ type: llm
189
+ depends_on: [outline]
190
+ prompt: prompts/critique.md
191
+ output_schema: schemas/critique.json
192
+ llm:
193
+ model: mock-model
194
+ temperature: 0
195
+
196
+ outputs:
197
+ article: critique
198
+ ```
199
+
200
+ Rules:
201
+ - `workflow.name` and `workflow.version` are required.
202
+ - `inputs` is a mapping of input names to type declarations.
203
+ - Each step needs a unique `id` and `type`.
204
+ - `depends_on` must reference existing step ids.
205
+ - `outputs` maps final output names to step ids.
206
+ - For `llm` steps, `prompt`, `output_schema`, and `llm.model` are required.
207
+
208
+ ## Replay
209
+
210
+ Replay reconstructs outputs from recorded artifacts and verifies they match the
211
+ recorded `outputs.json` exactly.
212
+
213
+ ```python
214
+ from llmflow import replay
215
+
216
+ result = replay(".runs/run_YYYYMMDD_HHMMSS_<shortid>")
217
+ print(result.outputs)
218
+ ```
219
+
220
+ Optional workflow override:
221
+
222
+ ```python
223
+ result = replay(
224
+ ".runs/run_YYYYMMDD_HHMMSS_<shortid>",
225
+ workflow_path="examples/blog_pipeline/workflow.yaml",
226
+ )
227
+ ```
228
+
229
+ ## Artifacts
230
+
231
+ Each run writes a folder under `.runs/`:
232
+
233
+ ```text
234
+ .runs/
235
+ run_YYYYMMDD_HHMMSS_<shortid>/
236
+ metadata.json
237
+ inputs.json
238
+ outputs.json
239
+ steps/
240
+ <step_id>/
241
+ output.json
242
+ rendered_prompt.md
243
+ llm_call.json
244
+ logs.txt
245
+ ```
246
+
247
+ `metadata.json` includes:
248
+
249
+ - `artifacts_version`
250
+ - engine version
251
+ - workflow name, version, and hash
252
+ - provider name
253
+ - execution order
254
+ - prompt hashes and step output hashes
255
+ - timestamps
256
+
257
+ Typical step artifacts:
258
+
259
+ - `steps/<step_id>/output.json`: Validated step output payload
260
+ - `steps/<step_id>/rendered_prompt.md`: Rendered prompt text for LLM steps
261
+ - `steps/<step_id>/llm_call.json`: Provider request/response metadata for LLM steps
262
+
263
+ ## Extending the engine
264
+
265
+ ### Providers
266
+ Implement `Provider.call(request) -> ProviderResponse` and pass the provider to
267
+ `Runner`.
268
+
269
+ ```python
270
+ from llmflow.providers import Provider, ProviderRequest, ProviderResponse
271
+
272
+
273
+ class StaticProvider(Provider):
274
+ def call(self, request: ProviderRequest) -> ProviderResponse:
275
+ return ProviderResponse(
276
+ model=request.model,
277
+ output_text='{"title":"Draft","summary":"S","body":"B"}',
278
+ raw={"provider": "static"},
279
+ )
280
+ ```
281
+
282
+ ### Tools
283
+ Register Python functions in `ToolRegistry`. Tool functions accept merged step
284
+ inputs and must return a `dict`.
285
+
286
+ ```python
287
+ from llmflow.registry import ToolRegistry
288
+
289
+ tools = ToolRegistry()
290
+ tools.register("summarize_topic", lambda inputs: {"topic_slug": inputs["topic"].lower()})
291
+ ```
292
+
293
+ ### Validators
294
+ Register custom validators in `ValidatorRegistry`. Validator functions accept
295
+ merged step inputs and should return `True`/`None` on success, or `False` on
296
+ failure.
297
+
298
+ ```python
299
+ from llmflow.registry import ValidatorRegistry
300
+
301
+ validators = ValidatorRegistry()
302
+ validators.register("has_summary", lambda inputs: bool(inputs.get("summary")))
303
+ ```
304
+
305
+ ### Custom steps
306
+ Register custom step classes in `StepRegistry` when you need a new execution
307
+ primitive beyond `llm`, `tool`, and `validate`.
308
+
309
+ ## Testing
310
+
311
+ Run all tests:
312
+
313
+ ```bash
314
+ pytest
315
+ ```
316
+
317
+ Run tests with coverage for core modules:
318
+
319
+ ```bash
320
+ pytest --cov=llmflow --cov-report=term-missing
321
+ ```
322
+
323
+ Determinism and replay checks:
324
+
325
+ ```bash
326
+ pytest tests/test_replay.py tests/test_examples_blog_pipeline.py
327
+ ```
328
+
329
+ Run a local command equivalent to CI:
330
+
331
+ ```bash
332
+ python -m pip install -e .[dev]
333
+ pytest --cov=llmflow --cov-report=term-missing
334
+ ```
335
+
336
+ ## Current status
337
+
338
+ Implemented through Phase 11:
339
+
340
+ - Core workflow loading and validation
341
+ - Graph ordering and cycle detection
342
+ - LLM/tool/validate steps
343
+ - Artifacts and metadata
344
+ - Runner and replay
345
+ - CLI (`run`, `graph`, `replay`)
346
+ - Example workflow (`examples/blog_pipeline`)
347
+ - CI workflow and coverage command (`pytest --cov=llmflow`)
@@ -0,0 +1,315 @@
1
+ # llmflow-core
2
+
3
+ Deterministic LLM workflow engine for file-defined, schema-validated pipelines.
4
+
5
+ ## Overview
6
+
7
+ `llmflow-core` executes explicit workflow DAGs from YAML and prompt files.
8
+ It is designed for predictable execution, strict output contracts, and replayable
9
+ artifacts.
10
+
11
+ Core guarantees:
12
+
13
+ - Stable topological execution order
14
+ - Fail-fast behavior on first step error
15
+ - JSON-schema validation for LLM outputs
16
+ - Run artifacts for audit and replay
17
+
18
+ ## Why this exists
19
+
20
+ Production LLM pipelines often fail on three basics:
21
+
22
+ - Step order is implicit and hard to reason about
23
+ - Outputs drift from expected structure
24
+ - Runs are difficult to reproduce and debug
25
+
26
+ `llmflow-core` addresses this with file-defined workflows, strict validation,
27
+ and deterministic run traces.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ python -m pip install -e .
33
+ ```
34
+
35
+ Install with test dependencies:
36
+
37
+ ```bash
38
+ python -m pip install -e .[dev]
39
+ ```
40
+
41
+ ## Quickstart
42
+ ### 1) Run with Python API
43
+
44
+ ```python
45
+ from llmflow import MockProvider, RunConfig, Runner, Workflow
46
+
47
+ workflow = Workflow.load("examples/blog_pipeline/workflow.yaml")
48
+
49
+ runner = Runner(
50
+ provider=MockProvider(default_output="{}", strict=False),
51
+ config=RunConfig(artifacts_dir=".runs", provider_name="mock"),
52
+ )
53
+
54
+ result = runner.run(
55
+ workflow,
56
+ inputs={"topic": "Deterministic AI", "audience": "Engineering managers"},
57
+ )
58
+
59
+ print(result.outputs)
60
+ print(result.run_dir)
61
+ ```
62
+
63
+ Note:
64
+
65
+ - `MockProvider(default_output=...)` returns the same JSON for every LLM step.
66
+ - If your workflow has different per-step schemas, use per-prompt mock responses
67
+ (see Example Workflow below).
68
+
69
+ ### 2) Run with CLI
70
+
71
+ ```bash
72
+ llmflow run examples/blog_pipeline/workflow.yaml \
73
+ --input topic="Deterministic AI" \
74
+ --input audience="Engineering managers" \
75
+ --mock-output '{"title":"Draft","summary":"S","body":"B"}'
76
+ ```
77
+
78
+ ### 3) Inspect and replay
79
+
80
+ ```bash
81
+ llmflow graph examples/blog_pipeline/workflow.yaml
82
+ llmflow replay .runs/run_YYYYMMDD_HHMMSS_<shortid>
83
+ ```
84
+
85
+ ## CLI
86
+
87
+ The CLI is a thin wrapper over the library API.
88
+
89
+ Commands:
90
+
91
+ - `llmflow run <workflow.yaml> --input key=value ...`
92
+ - `llmflow graph <workflow.yaml>`
93
+ - `llmflow replay <run_dir>`
94
+
95
+ Example:
96
+
97
+ ```bash
98
+ llmflow run examples/blog_pipeline/workflow.yaml \
99
+ --input topic="Deterministic AI" \
100
+ --input audience="Engineering managers" \
101
+ --mock-output '{"title":"Draft","summary":"S","body":"B"}'
102
+
103
+ llmflow graph examples/blog_pipeline/workflow.yaml
104
+ llmflow replay .runs/run_YYYYMMDD_HHMMSS_<shortid>
105
+ ```
106
+
107
+ CLI mock behavior:
108
+
109
+ - `--mock-output` and `--mock-output-file` are applied to all LLM steps in the run.
110
+ - For workflows with heterogeneous per-step schemas, prefer Python API tests with
111
+ prompt-specific mock responses.
112
+
113
+ ## Example workflow
114
+
115
+ Repository example: `examples/blog_pipeline`
116
+
117
+ Contents:
118
+
119
+ - `examples/blog_pipeline/workflow.yaml`
120
+ - `examples/blog_pipeline/prompts/outline.md`
121
+ - `examples/blog_pipeline/prompts/critique.md`
122
+ - `examples/blog_pipeline/prompts/revise.md`
123
+ - `examples/blog_pipeline/schemas/outline.json`
124
+ - `examples/blog_pipeline/schemas/critique.json`
125
+ - `examples/blog_pipeline/schemas/final_article.json`
126
+ - `examples/blog_pipeline/tools.py`
127
+
128
+ The end-to-end deterministic example run is validated by:
129
+
130
+ - `tests/test_examples_blog_pipeline.py`
131
+
132
+ ## Workflow YAML format
133
+ Minimal shape:
134
+
135
+ ```yaml
136
+ workflow:
137
+ name: blog_post_pipeline
138
+ version: "1.0"
139
+
140
+ inputs:
141
+ topic:
142
+ type: string
143
+ audience:
144
+ type: string
145
+
146
+ steps:
147
+ - id: outline
148
+ type: llm
149
+ prompt: prompts/outline.md
150
+ output_schema: schemas/outline.json
151
+ llm:
152
+ model: mock-model
153
+ temperature: 0
154
+
155
+ - id: critique
156
+ type: llm
157
+ depends_on: [outline]
158
+ prompt: prompts/critique.md
159
+ output_schema: schemas/critique.json
160
+ llm:
161
+ model: mock-model
162
+ temperature: 0
163
+
164
+ outputs:
165
+ article: critique
166
+ ```
167
+
168
+ Rules:
169
+ - `workflow.name` and `workflow.version` are required.
170
+ - `inputs` is a mapping of input names to type declarations.
171
+ - Each step needs a unique `id` and `type`.
172
+ - `depends_on` must reference existing step ids.
173
+ - `outputs` maps final output names to step ids.
174
+ - For `llm` steps, `prompt`, `output_schema`, and `llm.model` are required.
175
+
176
+ ## Replay
177
+
178
+ Replay reconstructs outputs from recorded artifacts and verifies they match the
179
+ recorded `outputs.json` exactly.
180
+
181
+ ```python
182
+ from llmflow import replay
183
+
184
+ result = replay(".runs/run_YYYYMMDD_HHMMSS_<shortid>")
185
+ print(result.outputs)
186
+ ```
187
+
188
+ Optional workflow override:
189
+
190
+ ```python
191
+ result = replay(
192
+ ".runs/run_YYYYMMDD_HHMMSS_<shortid>",
193
+ workflow_path="examples/blog_pipeline/workflow.yaml",
194
+ )
195
+ ```
196
+
197
+ ## Artifacts
198
+
199
+ Each run writes a folder under `.runs/`:
200
+
201
+ ```text
202
+ .runs/
203
+ run_YYYYMMDD_HHMMSS_<shortid>/
204
+ metadata.json
205
+ inputs.json
206
+ outputs.json
207
+ steps/
208
+ <step_id>/
209
+ output.json
210
+ rendered_prompt.md
211
+ llm_call.json
212
+ logs.txt
213
+ ```
214
+
215
+ `metadata.json` includes:
216
+
217
+ - `artifacts_version`
218
+ - engine version
219
+ - workflow name, version, and hash
220
+ - provider name
221
+ - execution order
222
+ - prompt hashes and step output hashes
223
+ - timestamps
224
+
225
+ Typical step artifacts:
226
+
227
+ - `steps/<step_id>/output.json`: Validated step output payload
228
+ - `steps/<step_id>/rendered_prompt.md`: Rendered prompt text for LLM steps
229
+ - `steps/<step_id>/llm_call.json`: Provider request/response metadata for LLM steps
230
+
231
+ ## Extending the engine
232
+
233
+ ### Providers
234
+ Implement `Provider.call(request) -> ProviderResponse` and pass the provider to
235
+ `Runner`.
236
+
237
+ ```python
238
+ from llmflow.providers import Provider, ProviderRequest, ProviderResponse
239
+
240
+
241
+ class StaticProvider(Provider):
242
+ def call(self, request: ProviderRequest) -> ProviderResponse:
243
+ return ProviderResponse(
244
+ model=request.model,
245
+ output_text='{"title":"Draft","summary":"S","body":"B"}',
246
+ raw={"provider": "static"},
247
+ )
248
+ ```
249
+
250
+ ### Tools
251
+ Register Python functions in `ToolRegistry`. Tool functions accept merged step
252
+ inputs and must return a `dict`.
253
+
254
+ ```python
255
+ from llmflow.registry import ToolRegistry
256
+
257
+ tools = ToolRegistry()
258
+ tools.register("summarize_topic", lambda inputs: {"topic_slug": inputs["topic"].lower()})
259
+ ```
260
+
261
+ ### Validators
262
+ Register custom validators in `ValidatorRegistry`. Validator functions accept
263
+ merged step inputs and should return `True`/`None` on success, or `False` on
264
+ failure.
265
+
266
+ ```python
267
+ from llmflow.registry import ValidatorRegistry
268
+
269
+ validators = ValidatorRegistry()
270
+ validators.register("has_summary", lambda inputs: bool(inputs.get("summary")))
271
+ ```
272
+
273
+ ### Custom steps
274
+ Register custom step classes in `StepRegistry` when you need a new execution
275
+ primitive beyond `llm`, `tool`, and `validate`.
276
+
277
+ ## Testing
278
+
279
+ Run all tests:
280
+
281
+ ```bash
282
+ pytest
283
+ ```
284
+
285
+ Run tests with coverage for core modules:
286
+
287
+ ```bash
288
+ pytest --cov=llmflow --cov-report=term-missing
289
+ ```
290
+
291
+ Determinism and replay checks:
292
+
293
+ ```bash
294
+ pytest tests/test_replay.py tests/test_examples_blog_pipeline.py
295
+ ```
296
+
297
+ Run a local command equivalent to CI:
298
+
299
+ ```bash
300
+ python -m pip install -e .[dev]
301
+ pytest --cov=llmflow --cov-report=term-missing
302
+ ```
303
+
304
+ ## Current status
305
+
306
+ Implemented through Phase 11:
307
+
308
+ - Core workflow loading and validation
309
+ - Graph ordering and cycle detection
310
+ - LLM/tool/validate steps
311
+ - Artifacts and metadata
312
+ - Runner and replay
313
+ - CLI (`run`, `graph`, `replay`)
314
+ - Example workflow (`examples/blog_pipeline`)
315
+ - CI workflow and coverage command (`pytest --cov=llmflow`)
@@ -0,0 +1,11 @@
1
+ You are critiquing an outline.
2
+
3
+ Topic: {{ inputs.topic }}
4
+ Audience: {{ inputs.audience }}
5
+ Draft title: {{ inputs.title }}
6
+ Sections: {{ inputs.sections | join(", ") }}
7
+
8
+ Return JSON with:
9
+ - strengths: array of strings
10
+ - weaknesses: array of strings
11
+ - revision_goals: array of strings
@@ -0,0 +1,8 @@
1
+ You are writing an outline for a blog post.
2
+
3
+ Topic: {{ inputs.topic }}
4
+ Audience: {{ inputs.audience }}
5
+
6
+ Return JSON with:
7
+ - title: string
8
+ - sections: array of section titles
@@ -0,0 +1,12 @@
1
+ You are revising a blog draft.
2
+
3
+ Topic: {{ inputs.topic }}
4
+ Audience: {{ inputs.audience }}
5
+ Strengths: {{ inputs.strengths | join(", ") }}
6
+ Weaknesses: {{ inputs.weaknesses | join(", ") }}
7
+ Revision goals: {{ inputs.revision_goals | join(", ") }}
8
+
9
+ Return JSON with:
10
+ - title: string
11
+ - summary: string
12
+ - body: string