llmflow-core 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmflow_core-0.0.2/.gitignore +25 -0
- llmflow_core-0.0.2/PKG-INFO +347 -0
- llmflow_core-0.0.2/README.md +315 -0
- llmflow_core-0.0.2/examples/blog_pipeline/prompts/critique.md +11 -0
- llmflow_core-0.0.2/examples/blog_pipeline/prompts/outline.md +8 -0
- llmflow_core-0.0.2/examples/blog_pipeline/prompts/revise.md +12 -0
- llmflow_core-0.0.2/examples/blog_pipeline/schemas/critique.json +23 -0
- llmflow_core-0.0.2/examples/blog_pipeline/schemas/final_article.json +11 -0
- llmflow_core-0.0.2/examples/blog_pipeline/schemas/outline.json +14 -0
- llmflow_core-0.0.2/examples/blog_pipeline/tools.py +5 -0
- llmflow_core-0.0.2/examples/blog_pipeline/workflow.yaml +39 -0
- llmflow_core-0.0.2/pyproject.toml +65 -0
- llmflow_core-0.0.2/requirements.txt +10 -0
- llmflow_core-0.0.2/src/llmflow/__init__.py +45 -0
- llmflow_core-0.0.2/src/llmflow/artifacts.py +253 -0
- llmflow_core-0.0.2/src/llmflow/cli.py +188 -0
- llmflow_core-0.0.2/src/llmflow/errors.py +102 -0
- llmflow_core-0.0.2/src/llmflow/graph.py +63 -0
- llmflow_core-0.0.2/src/llmflow/hashing.py +11 -0
- llmflow_core-0.0.2/src/llmflow/providers/__init__.py +11 -0
- llmflow_core-0.0.2/src/llmflow/providers/base.py +93 -0
- llmflow_core-0.0.2/src/llmflow/providers/mock.py +54 -0
- llmflow_core-0.0.2/src/llmflow/registry.py +113 -0
- llmflow_core-0.0.2/src/llmflow/replay.py +89 -0
- llmflow_core-0.0.2/src/llmflow/runner.py +158 -0
- llmflow_core-0.0.2/src/llmflow/steps/__init__.py +16 -0
- llmflow_core-0.0.2/src/llmflow/steps/base.py +27 -0
- llmflow_core-0.0.2/src/llmflow/steps/llm.py +124 -0
- llmflow_core-0.0.2/src/llmflow/steps/tool.py +20 -0
- llmflow_core-0.0.2/src/llmflow/steps/validate.py +58 -0
- llmflow_core-0.0.2/src/llmflow/workflow.py +251 -0
- llmflow_core-0.0.2/tests/conftest.py +10 -0
- llmflow_core-0.0.2/tests/test_artifacts.py +135 -0
- llmflow_core-0.0.2/tests/test_cli.py +83 -0
- llmflow_core-0.0.2/tests/test_examples_blog_pipeline.py +83 -0
- llmflow_core-0.0.2/tests/test_graph.py +53 -0
- llmflow_core-0.0.2/tests/test_llm_step.py +56 -0
- llmflow_core-0.0.2/tests/test_providers.py +45 -0
- llmflow_core-0.0.2/tests/test_registry.py +43 -0
- llmflow_core-0.0.2/tests/test_replay.py +89 -0
- llmflow_core-0.0.2/tests/test_runner.py +98 -0
- llmflow_core-0.0.2/tests/test_tool_step.py +42 -0
- llmflow_core-0.0.2/tests/test_validate_step.py +58 -0
- llmflow_core-0.0.2/tests/test_workflow_load.py +103 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.dist-info/
|
|
6
|
+
.pytest_cache/
|
|
7
|
+
.mypy_cache/
|
|
8
|
+
.ruff_cache/
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
|
|
14
|
+
# Build
|
|
15
|
+
build/
|
|
16
|
+
dist/
|
|
17
|
+
|
|
18
|
+
# Logs and artifacts
|
|
19
|
+
.runs/
|
|
20
|
+
*.log
|
|
21
|
+
|
|
22
|
+
# OS
|
|
23
|
+
.DS_Store
|
|
24
|
+
AGENTS.md
|
|
25
|
+
.coverage
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llmflow-core
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Deterministic LLM workflow engine
|
|
5
|
+
Project-URL: Homepage, https://github.com/ibrahim1023/llmflow-core
|
|
6
|
+
Project-URL: Repository, https://github.com/ibrahim1023/llmflow-core
|
|
7
|
+
Project-URL: Issues, https://github.com/ibrahim1023/llmflow-core/issues
|
|
8
|
+
Author: Ibrahim
|
|
9
|
+
License: MIT
|
|
10
|
+
Keywords: deterministic,llm,pipelines,workflow
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: jinja2>=3.1
|
|
21
|
+
Requires-Dist: jsonschema>=4.0
|
|
22
|
+
Requires-Dist: pydantic>=2.0
|
|
23
|
+
Requires-Dist: pyyaml>=6.0
|
|
24
|
+
Requires-Dist: rich>=13.0
|
|
25
|
+
Requires-Dist: typer>=0.9
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: build>=1.2.2; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: twine>=5.1.1; extra == 'dev'
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# llmflow-core
|
|
34
|
+
|
|
35
|
+
Deterministic LLM workflow engine for file-defined, schema-validated pipelines.
|
|
36
|
+
|
|
37
|
+
## Overview
|
|
38
|
+
|
|
39
|
+
`llmflow-core` executes explicit workflow DAGs from YAML and prompt files.
|
|
40
|
+
It is designed for predictable execution, strict output contracts, and replayable
|
|
41
|
+
artifacts.
|
|
42
|
+
|
|
43
|
+
Core guarantees:
|
|
44
|
+
|
|
45
|
+
- Stable topological execution order
|
|
46
|
+
- Fail-fast behavior on first step error
|
|
47
|
+
- JSON-schema validation for LLM outputs
|
|
48
|
+
- Run artifacts for audit and replay
|
|
49
|
+
|
|
50
|
+
## Why this exists
|
|
51
|
+
|
|
52
|
+
Production LLM pipelines often fail on three basics:
|
|
53
|
+
|
|
54
|
+
- Step order is implicit and hard to reason about
|
|
55
|
+
- Outputs drift from expected structure
|
|
56
|
+
- Runs are difficult to reproduce and debug
|
|
57
|
+
|
|
58
|
+
`llmflow-core` addresses this with file-defined workflows, strict validation,
|
|
59
|
+
and deterministic run traces.
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
python -m pip install -e .
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Install with test dependencies:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
python -m pip install -e .[dev]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Quickstart
|
|
74
|
+
### 1) Run with Python API
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from llmflow import MockProvider, RunConfig, Runner, Workflow
|
|
78
|
+
|
|
79
|
+
workflow = Workflow.load("examples/blog_pipeline/workflow.yaml")
|
|
80
|
+
|
|
81
|
+
runner = Runner(
|
|
82
|
+
provider=MockProvider(default_output="{}", strict=False),
|
|
83
|
+
config=RunConfig(artifacts_dir=".runs", provider_name="mock"),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
result = runner.run(
|
|
87
|
+
workflow,
|
|
88
|
+
inputs={"topic": "Deterministic AI", "audience": "Engineering managers"},
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
print(result.outputs)
|
|
92
|
+
print(result.run_dir)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Note:
|
|
96
|
+
|
|
97
|
+
- `MockProvider(default_output=...)` returns the same JSON for every LLM step.
|
|
98
|
+
- If your workflow has different per-step schemas, use per-prompt mock responses
|
|
99
|
+
(see Example Workflow below).
|
|
100
|
+
|
|
101
|
+
### 2) Run with CLI
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
llmflow run examples/blog_pipeline/workflow.yaml \
|
|
105
|
+
--input topic="Deterministic AI" \
|
|
106
|
+
--input audience="Engineering managers" \
|
|
107
|
+
--mock-output '{"title":"Draft","summary":"S","body":"B"}'
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### 3) Inspect and replay
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
llmflow graph examples/blog_pipeline/workflow.yaml
|
|
114
|
+
llmflow replay .runs/run_YYYYMMDD_HHMMSS_<shortid>
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## CLI
|
|
118
|
+
|
|
119
|
+
The CLI is a thin wrapper over the library API.
|
|
120
|
+
|
|
121
|
+
Commands:
|
|
122
|
+
|
|
123
|
+
- `llmflow run <workflow.yaml> --input key=value ...`
|
|
124
|
+
- `llmflow graph <workflow.yaml>`
|
|
125
|
+
- `llmflow replay <run_dir>`
|
|
126
|
+
|
|
127
|
+
Example:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
llmflow run examples/blog_pipeline/workflow.yaml \
|
|
131
|
+
--input topic="Deterministic AI" \
|
|
132
|
+
--input audience="Engineering managers" \
|
|
133
|
+
--mock-output '{"title":"Draft","summary":"S","body":"B"}'
|
|
134
|
+
|
|
135
|
+
llmflow graph examples/blog_pipeline/workflow.yaml
|
|
136
|
+
llmflow replay .runs/run_YYYYMMDD_HHMMSS_<shortid>
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
CLI mock behavior:
|
|
140
|
+
|
|
141
|
+
- `--mock-output` and `--mock-output-file` are applied to all LLM steps in the run.
|
|
142
|
+
- For workflows with heterogeneous per-step schemas, prefer Python API tests with
|
|
143
|
+
prompt-specific mock responses.
|
|
144
|
+
|
|
145
|
+
## Example workflow
|
|
146
|
+
|
|
147
|
+
Repository example: `examples/blog_pipeline`
|
|
148
|
+
|
|
149
|
+
Contents:
|
|
150
|
+
|
|
151
|
+
- `examples/blog_pipeline/workflow.yaml`
|
|
152
|
+
- `examples/blog_pipeline/prompts/outline.md`
|
|
153
|
+
- `examples/blog_pipeline/prompts/critique.md`
|
|
154
|
+
- `examples/blog_pipeline/prompts/revise.md`
|
|
155
|
+
- `examples/blog_pipeline/schemas/outline.json`
|
|
156
|
+
- `examples/blog_pipeline/schemas/critique.json`
|
|
157
|
+
- `examples/blog_pipeline/schemas/final_article.json`
|
|
158
|
+
- `examples/blog_pipeline/tools.py`
|
|
159
|
+
|
|
160
|
+
The end-to-end deterministic example run is validated by:
|
|
161
|
+
|
|
162
|
+
- `tests/test_examples_blog_pipeline.py`
|
|
163
|
+
|
|
164
|
+
## Workflow YAML format
|
|
165
|
+
Minimal shape:
|
|
166
|
+
|
|
167
|
+
```yaml
|
|
168
|
+
workflow:
|
|
169
|
+
name: blog_post_pipeline
|
|
170
|
+
version: "1.0"
|
|
171
|
+
|
|
172
|
+
inputs:
|
|
173
|
+
topic:
|
|
174
|
+
type: string
|
|
175
|
+
audience:
|
|
176
|
+
type: string
|
|
177
|
+
|
|
178
|
+
steps:
|
|
179
|
+
- id: outline
|
|
180
|
+
type: llm
|
|
181
|
+
prompt: prompts/outline.md
|
|
182
|
+
output_schema: schemas/outline.json
|
|
183
|
+
llm:
|
|
184
|
+
model: mock-model
|
|
185
|
+
temperature: 0
|
|
186
|
+
|
|
187
|
+
- id: critique
|
|
188
|
+
type: llm
|
|
189
|
+
depends_on: [outline]
|
|
190
|
+
prompt: prompts/critique.md
|
|
191
|
+
output_schema: schemas/critique.json
|
|
192
|
+
llm:
|
|
193
|
+
model: mock-model
|
|
194
|
+
temperature: 0
|
|
195
|
+
|
|
196
|
+
outputs:
|
|
197
|
+
article: critique
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Rules:
|
|
201
|
+
- `workflow.name` and `workflow.version` are required.
|
|
202
|
+
- `inputs` is a mapping of input names to type declarations.
|
|
203
|
+
- Each step needs a unique `id` and `type`.
|
|
204
|
+
- `depends_on` must reference existing step ids.
|
|
205
|
+
- `outputs` maps final output names to step ids.
|
|
206
|
+
- For `llm` steps, `prompt`, `output_schema`, and `llm.model` are required.
|
|
207
|
+
|
|
208
|
+
## Replay
|
|
209
|
+
|
|
210
|
+
Replay reconstructs outputs from recorded artifacts and verifies they match the
|
|
211
|
+
recorded `outputs.json` exactly.
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from llmflow import replay
|
|
215
|
+
|
|
216
|
+
result = replay(".runs/run_YYYYMMDD_HHMMSS_<shortid>")
|
|
217
|
+
print(result.outputs)
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
Optional workflow override:
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
result = replay(
|
|
224
|
+
".runs/run_YYYYMMDD_HHMMSS_<shortid>",
|
|
225
|
+
workflow_path="examples/blog_pipeline/workflow.yaml",
|
|
226
|
+
)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Artifacts
|
|
230
|
+
|
|
231
|
+
Each run writes a folder under `.runs/`:
|
|
232
|
+
|
|
233
|
+
```text
|
|
234
|
+
.runs/
|
|
235
|
+
run_YYYYMMDD_HHMMSS_<shortid>/
|
|
236
|
+
metadata.json
|
|
237
|
+
inputs.json
|
|
238
|
+
outputs.json
|
|
239
|
+
steps/
|
|
240
|
+
<step_id>/
|
|
241
|
+
output.json
|
|
242
|
+
rendered_prompt.md
|
|
243
|
+
llm_call.json
|
|
244
|
+
logs.txt
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
`metadata.json` includes:
|
|
248
|
+
|
|
249
|
+
- `artifacts_version`
|
|
250
|
+
- engine version
|
|
251
|
+
- workflow name, version, and hash
|
|
252
|
+
- provider name
|
|
253
|
+
- execution order
|
|
254
|
+
- prompt hashes and step output hashes
|
|
255
|
+
- timestamps
|
|
256
|
+
|
|
257
|
+
Typical step artifacts:
|
|
258
|
+
|
|
259
|
+
- `steps/<step_id>/output.json`: Validated step output payload
|
|
260
|
+
- `steps/<step_id>/rendered_prompt.md`: Rendered prompt text for LLM steps
|
|
261
|
+
- `steps/<step_id>/llm_call.json`: Provider request/response metadata for LLM steps
|
|
262
|
+
|
|
263
|
+
## Extending the engine
|
|
264
|
+
|
|
265
|
+
### Providers
|
|
266
|
+
Implement `Provider.call(request) -> ProviderResponse` and pass the provider to
|
|
267
|
+
`Runner`.
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
from llmflow.providers import Provider, ProviderRequest, ProviderResponse
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class StaticProvider(Provider):
|
|
274
|
+
def call(self, request: ProviderRequest) -> ProviderResponse:
|
|
275
|
+
return ProviderResponse(
|
|
276
|
+
model=request.model,
|
|
277
|
+
output_text='{"title":"Draft","summary":"S","body":"B"}',
|
|
278
|
+
raw={"provider": "static"},
|
|
279
|
+
)
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Tools
|
|
283
|
+
Register Python functions in `ToolRegistry`. Tool functions accept merged step
|
|
284
|
+
inputs and must return a `dict`.
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
from llmflow.registry import ToolRegistry
|
|
288
|
+
|
|
289
|
+
tools = ToolRegistry()
|
|
290
|
+
tools.register("summarize_topic", lambda inputs: {"topic_slug": inputs["topic"].lower()})
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
### Validators
|
|
294
|
+
Register custom validators in `ValidatorRegistry`. Validator functions accept
|
|
295
|
+
merged step inputs and should return `True`/`None` on success, or `False` on
|
|
296
|
+
failure.
|
|
297
|
+
|
|
298
|
+
```python
|
|
299
|
+
from llmflow.registry import ValidatorRegistry
|
|
300
|
+
|
|
301
|
+
validators = ValidatorRegistry()
|
|
302
|
+
validators.register("has_summary", lambda inputs: bool(inputs.get("summary")))
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Custom steps
|
|
306
|
+
Register custom step classes in `StepRegistry` when you need a new execution
|
|
307
|
+
primitive beyond `llm`, `tool`, and `validate`.
|
|
308
|
+
|
|
309
|
+
## Testing
|
|
310
|
+
|
|
311
|
+
Run all tests:
|
|
312
|
+
|
|
313
|
+
```bash
|
|
314
|
+
pytest
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
Run tests with coverage for core modules:
|
|
318
|
+
|
|
319
|
+
```bash
|
|
320
|
+
pytest --cov=llmflow --cov-report=term-missing
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
Determinism and replay checks:
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
pytest tests/test_replay.py tests/test_examples_blog_pipeline.py
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
Run a local command equivalent to CI:
|
|
330
|
+
|
|
331
|
+
```bash
|
|
332
|
+
python -m pip install -e .[dev]
|
|
333
|
+
pytest --cov=llmflow --cov-report=term-missing
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
## Current status
|
|
337
|
+
|
|
338
|
+
Implemented through Phase 11:
|
|
339
|
+
|
|
340
|
+
- Core workflow loading and validation
|
|
341
|
+
- Graph ordering and cycle detection
|
|
342
|
+
- LLM/tool/validate steps
|
|
343
|
+
- Artifacts and metadata
|
|
344
|
+
- Runner and replay
|
|
345
|
+
- CLI (`run`, `graph`, `replay`)
|
|
346
|
+
- Example workflow (`examples/blog_pipeline`)
|
|
347
|
+
- CI workflow and coverage command (`pytest --cov=llmflow`)
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# llmflow-core
|
|
2
|
+
|
|
3
|
+
Deterministic LLM workflow engine for file-defined, schema-validated pipelines.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
`llmflow-core` executes explicit workflow DAGs from YAML and prompt files.
|
|
8
|
+
It is designed for predictable execution, strict output contracts, and replayable
|
|
9
|
+
artifacts.
|
|
10
|
+
|
|
11
|
+
Core guarantees:
|
|
12
|
+
|
|
13
|
+
- Stable topological execution order
|
|
14
|
+
- Fail-fast behavior on first step error
|
|
15
|
+
- JSON-schema validation for LLM outputs
|
|
16
|
+
- Run artifacts for audit and replay
|
|
17
|
+
|
|
18
|
+
## Why this exists
|
|
19
|
+
|
|
20
|
+
Production LLM pipelines often fail on three basics:
|
|
21
|
+
|
|
22
|
+
- Step order is implicit and hard to reason about
|
|
23
|
+
- Outputs drift from expected structure
|
|
24
|
+
- Runs are difficult to reproduce and debug
|
|
25
|
+
|
|
26
|
+
`llmflow-core` addresses this with file-defined workflows, strict validation,
|
|
27
|
+
and deterministic run traces.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
python -m pip install -e .
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Install with test dependencies:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
python -m pip install -e .[dev]
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quickstart
|
|
42
|
+
### 1) Run with Python API
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from llmflow import MockProvider, RunConfig, Runner, Workflow
|
|
46
|
+
|
|
47
|
+
workflow = Workflow.load("examples/blog_pipeline/workflow.yaml")
|
|
48
|
+
|
|
49
|
+
runner = Runner(
|
|
50
|
+
provider=MockProvider(default_output="{}", strict=False),
|
|
51
|
+
config=RunConfig(artifacts_dir=".runs", provider_name="mock"),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
result = runner.run(
|
|
55
|
+
workflow,
|
|
56
|
+
inputs={"topic": "Deterministic AI", "audience": "Engineering managers"},
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
print(result.outputs)
|
|
60
|
+
print(result.run_dir)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Note:
|
|
64
|
+
|
|
65
|
+
- `MockProvider(default_output=...)` returns the same JSON for every LLM step.
|
|
66
|
+
- If your workflow has different per-step schemas, use per-prompt mock responses
|
|
67
|
+
(see Example Workflow below).
|
|
68
|
+
|
|
69
|
+
### 2) Run with CLI
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
llmflow run examples/blog_pipeline/workflow.yaml \
|
|
73
|
+
--input topic="Deterministic AI" \
|
|
74
|
+
--input audience="Engineering managers" \
|
|
75
|
+
--mock-output '{"title":"Draft","summary":"S","body":"B"}'
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 3) Inspect and replay
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
llmflow graph examples/blog_pipeline/workflow.yaml
|
|
82
|
+
llmflow replay .runs/run_YYYYMMDD_HHMMSS_<shortid>
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## CLI
|
|
86
|
+
|
|
87
|
+
The CLI is a thin wrapper over the library API.
|
|
88
|
+
|
|
89
|
+
Commands:
|
|
90
|
+
|
|
91
|
+
- `llmflow run <workflow.yaml> --input key=value ...`
|
|
92
|
+
- `llmflow graph <workflow.yaml>`
|
|
93
|
+
- `llmflow replay <run_dir>`
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
llmflow run examples/blog_pipeline/workflow.yaml \
|
|
99
|
+
--input topic="Deterministic AI" \
|
|
100
|
+
--input audience="Engineering managers" \
|
|
101
|
+
--mock-output '{"title":"Draft","summary":"S","body":"B"}'
|
|
102
|
+
|
|
103
|
+
llmflow graph examples/blog_pipeline/workflow.yaml
|
|
104
|
+
llmflow replay .runs/run_YYYYMMDD_HHMMSS_<shortid>
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
CLI mock behavior:
|
|
108
|
+
|
|
109
|
+
- `--mock-output` and `--mock-output-file` are applied to all LLM steps in the run.
|
|
110
|
+
- For workflows with heterogeneous per-step schemas, prefer Python API tests with
|
|
111
|
+
prompt-specific mock responses.
|
|
112
|
+
|
|
113
|
+
## Example workflow
|
|
114
|
+
|
|
115
|
+
Repository example: `examples/blog_pipeline`
|
|
116
|
+
|
|
117
|
+
Contents:
|
|
118
|
+
|
|
119
|
+
- `examples/blog_pipeline/workflow.yaml`
|
|
120
|
+
- `examples/blog_pipeline/prompts/outline.md`
|
|
121
|
+
- `examples/blog_pipeline/prompts/critique.md`
|
|
122
|
+
- `examples/blog_pipeline/prompts/revise.md`
|
|
123
|
+
- `examples/blog_pipeline/schemas/outline.json`
|
|
124
|
+
- `examples/blog_pipeline/schemas/critique.json`
|
|
125
|
+
- `examples/blog_pipeline/schemas/final_article.json`
|
|
126
|
+
- `examples/blog_pipeline/tools.py`
|
|
127
|
+
|
|
128
|
+
The end-to-end deterministic example run is validated by:
|
|
129
|
+
|
|
130
|
+
- `tests/test_examples_blog_pipeline.py`
|
|
131
|
+
|
|
132
|
+
## Workflow YAML format
|
|
133
|
+
Minimal shape:
|
|
134
|
+
|
|
135
|
+
```yaml
|
|
136
|
+
workflow:
|
|
137
|
+
name: blog_post_pipeline
|
|
138
|
+
version: "1.0"
|
|
139
|
+
|
|
140
|
+
inputs:
|
|
141
|
+
topic:
|
|
142
|
+
type: string
|
|
143
|
+
audience:
|
|
144
|
+
type: string
|
|
145
|
+
|
|
146
|
+
steps:
|
|
147
|
+
- id: outline
|
|
148
|
+
type: llm
|
|
149
|
+
prompt: prompts/outline.md
|
|
150
|
+
output_schema: schemas/outline.json
|
|
151
|
+
llm:
|
|
152
|
+
model: mock-model
|
|
153
|
+
temperature: 0
|
|
154
|
+
|
|
155
|
+
- id: critique
|
|
156
|
+
type: llm
|
|
157
|
+
depends_on: [outline]
|
|
158
|
+
prompt: prompts/critique.md
|
|
159
|
+
output_schema: schemas/critique.json
|
|
160
|
+
llm:
|
|
161
|
+
model: mock-model
|
|
162
|
+
temperature: 0
|
|
163
|
+
|
|
164
|
+
outputs:
|
|
165
|
+
article: critique
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Rules:
|
|
169
|
+
- `workflow.name` and `workflow.version` are required.
|
|
170
|
+
- `inputs` is a mapping of input names to type declarations.
|
|
171
|
+
- Each step needs a unique `id` and `type`.
|
|
172
|
+
- `depends_on` must reference existing step ids.
|
|
173
|
+
- `outputs` maps final output names to step ids.
|
|
174
|
+
- For `llm` steps, `prompt`, `output_schema`, and `llm.model` are required.
|
|
175
|
+
|
|
176
|
+
## Replay
|
|
177
|
+
|
|
178
|
+
Replay reconstructs outputs from recorded artifacts and verifies they match the
|
|
179
|
+
recorded `outputs.json` exactly.
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from llmflow import replay
|
|
183
|
+
|
|
184
|
+
result = replay(".runs/run_YYYYMMDD_HHMMSS_<shortid>")
|
|
185
|
+
print(result.outputs)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Optional workflow override:
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
result = replay(
|
|
192
|
+
".runs/run_YYYYMMDD_HHMMSS_<shortid>",
|
|
193
|
+
workflow_path="examples/blog_pipeline/workflow.yaml",
|
|
194
|
+
)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Artifacts
|
|
198
|
+
|
|
199
|
+
Each run writes a folder under `.runs/`:
|
|
200
|
+
|
|
201
|
+
```text
|
|
202
|
+
.runs/
|
|
203
|
+
run_YYYYMMDD_HHMMSS_<shortid>/
|
|
204
|
+
metadata.json
|
|
205
|
+
inputs.json
|
|
206
|
+
outputs.json
|
|
207
|
+
steps/
|
|
208
|
+
<step_id>/
|
|
209
|
+
output.json
|
|
210
|
+
rendered_prompt.md
|
|
211
|
+
llm_call.json
|
|
212
|
+
logs.txt
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
`metadata.json` includes:
|
|
216
|
+
|
|
217
|
+
- `artifacts_version`
|
|
218
|
+
- engine version
|
|
219
|
+
- workflow name, version, and hash
|
|
220
|
+
- provider name
|
|
221
|
+
- execution order
|
|
222
|
+
- prompt hashes and step output hashes
|
|
223
|
+
- timestamps
|
|
224
|
+
|
|
225
|
+
Typical step artifacts:
|
|
226
|
+
|
|
227
|
+
- `steps/<step_id>/output.json`: Validated step output payload
|
|
228
|
+
- `steps/<step_id>/rendered_prompt.md`: Rendered prompt text for LLM steps
|
|
229
|
+
- `steps/<step_id>/llm_call.json`: Provider request/response metadata for LLM steps
|
|
230
|
+
|
|
231
|
+
## Extending the engine
|
|
232
|
+
|
|
233
|
+
### Providers
|
|
234
|
+
Implement `Provider.call(request) -> ProviderResponse` and pass the provider to
|
|
235
|
+
`Runner`.
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
from llmflow.providers import Provider, ProviderRequest, ProviderResponse
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class StaticProvider(Provider):
|
|
242
|
+
def call(self, request: ProviderRequest) -> ProviderResponse:
|
|
243
|
+
return ProviderResponse(
|
|
244
|
+
model=request.model,
|
|
245
|
+
output_text='{"title":"Draft","summary":"S","body":"B"}',
|
|
246
|
+
raw={"provider": "static"},
|
|
247
|
+
)
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Tools
|
|
251
|
+
Register Python functions in `ToolRegistry`. Tool functions accept merged step
|
|
252
|
+
inputs and must return a `dict`.
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
from llmflow.registry import ToolRegistry
|
|
256
|
+
|
|
257
|
+
tools = ToolRegistry()
|
|
258
|
+
tools.register("summarize_topic", lambda inputs: {"topic_slug": inputs["topic"].lower()})
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Validators
|
|
262
|
+
Register custom validators in `ValidatorRegistry`. Validator functions accept
|
|
263
|
+
merged step inputs and should return `True`/`None` on success, or `False` on
|
|
264
|
+
failure.
|
|
265
|
+
|
|
266
|
+
```python
|
|
267
|
+
from llmflow.registry import ValidatorRegistry
|
|
268
|
+
|
|
269
|
+
validators = ValidatorRegistry()
|
|
270
|
+
validators.register("has_summary", lambda inputs: bool(inputs.get("summary")))
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Custom steps
|
|
274
|
+
Register custom step classes in `StepRegistry` when you need a new execution
|
|
275
|
+
primitive beyond `llm`, `tool`, and `validate`.
|
|
276
|
+
|
|
277
|
+
## Testing
|
|
278
|
+
|
|
279
|
+
Run all tests:
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
pytest
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
Run tests with coverage for core modules:
|
|
286
|
+
|
|
287
|
+
```bash
|
|
288
|
+
pytest --cov=llmflow --cov-report=term-missing
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Determinism and replay checks:
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
pytest tests/test_replay.py tests/test_examples_blog_pipeline.py
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
Run a local command equivalent to CI:
|
|
298
|
+
|
|
299
|
+
```bash
|
|
300
|
+
python -m pip install -e .[dev]
|
|
301
|
+
pytest --cov=llmflow --cov-report=term-missing
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Current status
|
|
305
|
+
|
|
306
|
+
Implemented through Phase 11:
|
|
307
|
+
|
|
308
|
+
- Core workflow loading and validation
|
|
309
|
+
- Graph ordering and cycle detection
|
|
310
|
+
- LLM/tool/validate steps
|
|
311
|
+
- Artifacts and metadata
|
|
312
|
+
- Runner and replay
|
|
313
|
+
- CLI (`run`, `graph`, `replay`)
|
|
314
|
+
- Example workflow (`examples/blog_pipeline`)
|
|
315
|
+
- CI workflow and coverage command (`pytest --cov=llmflow`)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
You are critiquing an outline.
|
|
2
|
+
|
|
3
|
+
Topic: {{ inputs.topic }}
|
|
4
|
+
Audience: {{ inputs.audience }}
|
|
5
|
+
Draft title: {{ inputs.title }}
|
|
6
|
+
Sections: {{ inputs.sections | join(", ") }}
|
|
7
|
+
|
|
8
|
+
Return JSON with:
|
|
9
|
+
- strengths: array of strings
|
|
10
|
+
- weaknesses: array of strings
|
|
11
|
+
- revision_goals: array of strings
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
You are revising a blog draft.
|
|
2
|
+
|
|
3
|
+
Topic: {{ inputs.topic }}
|
|
4
|
+
Audience: {{ inputs.audience }}
|
|
5
|
+
Strengths: {{ inputs.strengths | join(", ") }}
|
|
6
|
+
Weaknesses: {{ inputs.weaknesses | join(", ") }}
|
|
7
|
+
Revision goals: {{ inputs.revision_goals | join(", ") }}
|
|
8
|
+
|
|
9
|
+
Return JSON with:
|
|
10
|
+
- title: string
|
|
11
|
+
- summary: string
|
|
12
|
+
- body: string
|