conservation-guardian 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- conservation_guardian-0.1.0/PKG-INFO +115 -0
- conservation_guardian-0.1.0/README.md +98 -0
- conservation_guardian-0.1.0/pyproject.toml +27 -0
- conservation_guardian-0.1.0/setup.cfg +4 -0
- conservation_guardian-0.1.0/src/conservation_guardian/__init__.py +3 -0
- conservation_guardian-0.1.0/src/conservation_guardian/analyzer.py +117 -0
- conservation_guardian-0.1.0/src/conservation_guardian/budget.py +54 -0
- conservation_guardian-0.1.0/src/conservation_guardian/detector.py +113 -0
- conservation_guardian-0.1.0/src/conservation_guardian/profiler.py +108 -0
- conservation_guardian-0.1.0/src/conservation_guardian/report.py +81 -0
- conservation_guardian-0.1.0/src/conservation_guardian.egg-info/PKG-INFO +115 -0
- conservation_guardian-0.1.0/src/conservation_guardian.egg-info/SOURCES.txt +13 -0
- conservation_guardian-0.1.0/src/conservation_guardian.egg-info/dependency_links.txt +1 -0
- conservation_guardian-0.1.0/src/conservation_guardian.egg-info/top_level.txt +1 -0
- conservation_guardian-0.1.0/tests/test_guardian.py +258 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: conservation-guardian
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Generic Workflow Conservation Engine — analyze any workflow for cost efficiency and detect waste
|
|
5
|
+
Author: SuperInstance
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/SuperInstance/conservation-guardian
|
|
8
|
+
Keywords: workflow,cost,optimization,conservation,guardian,efficiency,waste-detection
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# Conservation Guardian
|
|
19
|
+
|
|
20
|
+
A **generic** Workflow Conservation Engine — analyze any workflow for cost efficiency and detect waste.
|
|
21
|
+
|
|
22
|
+
> Your workflow costs $12/day. Two nodes account for 78% of tokens. They both call GPT-4 for tasks GPT-4o-mini handles.
|
|
23
|
+
|
|
24
|
+
Conservation Guardian is a framework-agnostic Python library that analyzes workflow execution for cost efficiency and detects waste. It doesn't change your workflows — it tells you what to change and why. Works with any workflow engine (Dify, n8n, LangGraph, Temporal, custom DAGs, etc.).
|
|
25
|
+
|
|
26
|
+
## What It Does
|
|
27
|
+
|
|
28
|
+
**Budget tracking** — Set hard limits on tokens per run, cost per day, and node count. Know immediately when a workflow crosses a threshold.
|
|
29
|
+
|
|
30
|
+
**DAG analysis** — Parse any workflow JSON and surface:
|
|
31
|
+
- Redundant LLM calls (same model, same upstream, same job)
|
|
32
|
+
- Dead branches (conditional paths that never execute)
|
|
33
|
+
|
|
34
|
+
**Per-node profiling** — Track tokens in/out, latency, and cost for every node across runs. Spot degradation before it hurts.
|
|
35
|
+
|
|
36
|
+
**Waste detection** — Find the expensive stuff nobody notices:
|
|
37
|
+
- **Overprompted nodes** — 4,200 tokens in, 180 out. You're paying for context the model ignores.
|
|
38
|
+
- **Idle nodes** — Running every time, contributing 0.1% of value.
|
|
39
|
+
- **Model mismatch** — Using GPT-4 for classification that GPT-4o-mini handles in 12ms.
|
|
40
|
+
|
|
41
|
+
**Reports** — Markdown summaries you can paste into Slack, Notion, or a PR comment.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install conservation-guardian
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from conservation_guardian.budget import WorkflowBudget
|
|
53
|
+
from conservation_guardian.analyzer import WorkflowDAG
|
|
54
|
+
from conservation_guardian.profiler import Profiler, NodeSample
|
|
55
|
+
from conservation_guardian.detector import WasteDetector
|
|
56
|
+
from conservation_guardian.report import render_report
|
|
57
|
+
|
|
58
|
+
# 1. Load a workflow (generic — works with any engine's JSON)
|
|
59
|
+
dag = WorkflowDAG.from_dict(workflow_json)
|
|
60
|
+
print(f"{len(dag.llm_nodes())} LLM nodes, {len(dag.redundant_llm_calls())} redundant")
|
|
61
|
+
|
|
62
|
+
# 2. Profile some runs
|
|
63
|
+
profiler = Profiler()
|
|
64
|
+
profiler.record(NodeSample(
|
|
65
|
+
node_id="summarizer",
|
|
66
|
+
input_tokens=4200,
|
|
67
|
+
output_tokens=180,
|
|
68
|
+
latency_ms=820.0,
|
|
69
|
+
cost_usd=0.015,
|
|
70
|
+
))
|
|
71
|
+
|
|
72
|
+
# 3. Detect waste
|
|
73
|
+
detector = WasteDetector(profiler)
|
|
74
|
+
findings = detector.detect()
|
|
75
|
+
for f in findings:
|
|
76
|
+
print(f"[{f.severity}] {f.message}")
|
|
77
|
+
print(f" → {f.suggestion}")
|
|
78
|
+
|
|
79
|
+
# 4. Generate report
|
|
80
|
+
budget = WorkflowBudget()
|
|
81
|
+
report = render_report(budget=budget, dag=dag, profiler=profiler, findings=findings)
|
|
82
|
+
print(report)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Differences from dify-workflow-guardian
|
|
86
|
+
|
|
87
|
+
- **Framework-agnostic**: No Dify-specific assumptions in the analyzer
|
|
88
|
+
- **Extended node types**: Recognizes `llm`, `llm-chain`, `chat-model`, `switch`, `conditional`, and more
|
|
89
|
+
- **Same API**: Drop-in replacement — just change the import package name
|
|
90
|
+
|
|
91
|
+
## Module Structure
|
|
92
|
+
|
|
93
|
+
| File | Purpose |
|
|
94
|
+
|------|---------|
|
|
95
|
+
| `budget.py` | `WorkflowBudget` — token/cost/node limits and daily tracking |
|
|
96
|
+
| `analyzer.py` | `WorkflowDAG` — parse workflow JSON, find redundancies and dead branches |
|
|
97
|
+
| `profiler.py` | `Profiler`, `NodeProfile`, `NodeSample` — per-node stats and trends |
|
|
98
|
+
| `detector.py` | `WasteDetector`, `WasteFinding` — surface actionable waste |
|
|
99
|
+
| `report.py` | `render_report()` — Markdown conservation reports |
|
|
100
|
+
|
|
101
|
+
## Tests
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
python -m pytest tests/ -v
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Philosophy
|
|
108
|
+
|
|
109
|
+
Conservation Guardian doesn't optimize your workflows. It tells you where the money goes and what to do about it. The fixes are yours to make — but at least you'll know where to look.
|
|
110
|
+
|
|
111
|
+
Built for [SuperInstance](https://github.com/SuperInstance).
|
|
112
|
+
|
|
113
|
+
## License
|
|
114
|
+
|
|
115
|
+
MIT
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Conservation Guardian
|
|
2
|
+
|
|
3
|
+
A **generic** Workflow Conservation Engine — analyze any workflow for cost efficiency and detect waste.
|
|
4
|
+
|
|
5
|
+
> Your workflow costs $12/day. Two nodes account for 78% of tokens. They both call GPT-4 for tasks GPT-4o-mini handles.
|
|
6
|
+
|
|
7
|
+
Conservation Guardian is a framework-agnostic Python library that analyzes workflow execution for cost efficiency and detects waste. It doesn't change your workflows — it tells you what to change and why. Works with any workflow engine (Dify, n8n, LangGraph, Temporal, custom DAGs, etc.).
|
|
8
|
+
|
|
9
|
+
## What It Does
|
|
10
|
+
|
|
11
|
+
**Budget tracking** — Set hard limits on tokens per run, cost per day, and node count. Know immediately when a workflow crosses a threshold.
|
|
12
|
+
|
|
13
|
+
**DAG analysis** — Parse any workflow JSON and surface:
|
|
14
|
+
- Redundant LLM calls (same model, same upstream, same job)
|
|
15
|
+
- Dead branches (conditional paths that never execute)
|
|
16
|
+
|
|
17
|
+
**Per-node profiling** — Track tokens in/out, latency, and cost for every node across runs. Spot degradation before it hurts.
|
|
18
|
+
|
|
19
|
+
**Waste detection** — Find the expensive stuff nobody notices:
|
|
20
|
+
- **Overprompted nodes** — 4,200 tokens in, 180 out. You're paying for context the model ignores.
|
|
21
|
+
- **Idle nodes** — Running every time, contributing 0.1% of value.
|
|
22
|
+
- **Model mismatch** — Using GPT-4 for classification that GPT-4o-mini handles in 12ms.
|
|
23
|
+
|
|
24
|
+
**Reports** — Markdown summaries you can paste into Slack, Notion, or a PR comment.
|
|
25
|
+
|
|
26
|
+
## Install
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install conservation-guardian
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Quick Start
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from conservation_guardian.budget import WorkflowBudget
|
|
36
|
+
from conservation_guardian.analyzer import WorkflowDAG
|
|
37
|
+
from conservation_guardian.profiler import Profiler, NodeSample
|
|
38
|
+
from conservation_guardian.detector import WasteDetector
|
|
39
|
+
from conservation_guardian.report import render_report
|
|
40
|
+
|
|
41
|
+
# 1. Load a workflow (generic — works with any engine's JSON)
|
|
42
|
+
dag = WorkflowDAG.from_dict(workflow_json)
|
|
43
|
+
print(f"{len(dag.llm_nodes())} LLM nodes, {len(dag.redundant_llm_calls())} redundant")
|
|
44
|
+
|
|
45
|
+
# 2. Profile some runs
|
|
46
|
+
profiler = Profiler()
|
|
47
|
+
profiler.record(NodeSample(
|
|
48
|
+
node_id="summarizer",
|
|
49
|
+
input_tokens=4200,
|
|
50
|
+
output_tokens=180,
|
|
51
|
+
latency_ms=820.0,
|
|
52
|
+
cost_usd=0.015,
|
|
53
|
+
))
|
|
54
|
+
|
|
55
|
+
# 3. Detect waste
|
|
56
|
+
detector = WasteDetector(profiler)
|
|
57
|
+
findings = detector.detect()
|
|
58
|
+
for f in findings:
|
|
59
|
+
print(f"[{f.severity}] {f.message}")
|
|
60
|
+
print(f" → {f.suggestion}")
|
|
61
|
+
|
|
62
|
+
# 4. Generate report
|
|
63
|
+
budget = WorkflowBudget()
|
|
64
|
+
report = render_report(budget=budget, dag=dag, profiler=profiler, findings=findings)
|
|
65
|
+
print(report)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Differences from dify-workflow-guardian
|
|
69
|
+
|
|
70
|
+
- **Framework-agnostic**: No Dify-specific assumptions in the analyzer
|
|
71
|
+
- **Extended node types**: Recognizes `llm`, `llm-chain`, `chat-model`, `switch`, `conditional`, and more
|
|
72
|
+
- **Same API**: Drop-in replacement — just change the import package name
|
|
73
|
+
|
|
74
|
+
## Module Structure
|
|
75
|
+
|
|
76
|
+
| File | Purpose |
|
|
77
|
+
|------|---------|
|
|
78
|
+
| `budget.py` | `WorkflowBudget` — token/cost/node limits and daily tracking |
|
|
79
|
+
| `analyzer.py` | `WorkflowDAG` — parse workflow JSON, find redundancies and dead branches |
|
|
80
|
+
| `profiler.py` | `Profiler`, `NodeProfile`, `NodeSample` — per-node stats and trends |
|
|
81
|
+
| `detector.py` | `WasteDetector`, `WasteFinding` — surface actionable waste |
|
|
82
|
+
| `report.py` | `render_report()` — Markdown conservation reports |
|
|
83
|
+
|
|
84
|
+
## Tests
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
python -m pytest tests/ -v
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Philosophy
|
|
91
|
+
|
|
92
|
+
Conservation Guardian doesn't optimize your workflows. It tells you where the money goes and what to do about it. The fixes are yours to make — but at least you'll know where to look.
|
|
93
|
+
|
|
94
|
+
Built for [SuperInstance](https://github.com/SuperInstance).
|
|
95
|
+
|
|
96
|
+
## License
|
|
97
|
+
|
|
98
|
+
MIT
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "conservation-guardian"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Generic Workflow Conservation Engine — analyze any workflow for cost efficiency and detect waste"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{name = "SuperInstance"}]
|
|
13
|
+
keywords = ["workflow", "cost", "optimization", "conservation", "guardian", "efficiency", "waste-detection"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
Homepage = "https://github.com/SuperInstance/conservation-guardian"
|
|
25
|
+
|
|
26
|
+
[tool.setuptools.packages.find]
|
|
27
|
+
where = ["src"]
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Analyze workflow DAG for inefficiencies.
|
|
2
|
+
|
|
3
|
+
Generic version — works with any workflow engine that exposes nodes and edges.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class WorkflowNode:
|
|
14
|
+
id: str
|
|
15
|
+
type: str # e.g. "llm", "tool", "if-else", "code", "http", "transform"
|
|
16
|
+
title: str = ""
|
|
17
|
+
upstream: list[str] = field(default_factory=list)
|
|
18
|
+
downstream: list[str] = field(default_factory=list)
|
|
19
|
+
data: dict = field(default_factory=dict)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class WorkflowDAG:
|
|
24
|
+
"""Directed acyclic graph representing a workflow.
|
|
25
|
+
|
|
26
|
+
Can be constructed from any engine's JSON via ``from_dict`` by adapting
|
|
27
|
+
the node/edge field mappings, or built programmatically.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
nodes: dict[str, WorkflowNode] = field(default_factory=dict)
|
|
31
|
+
entry_node: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_dict(cls, raw: dict) -> "WorkflowDAG":
|
|
35
|
+
"""Build a DAG from a generic workflow JSON.
|
|
36
|
+
|
|
37
|
+
Expects ``graph.nodes`` and ``graph.edges``, or a flat structure
|
|
38
|
+
with ``nodes`` / ``edges`` at the top level. Each node should have
|
|
39
|
+
``id`` and ``data.type`` (or just ``type``). Each edge should have
|
|
40
|
+
``sourceId``/``targetId`` or ``source``/``target``.
|
|
41
|
+
"""
|
|
42
|
+
dag = cls()
|
|
43
|
+
graph = raw.get("graph", raw)
|
|
44
|
+
nodes_list = graph.get("nodes", [])
|
|
45
|
+
edges = graph.get("edges", [])
|
|
46
|
+
|
|
47
|
+
node_map: dict[str, WorkflowNode] = {}
|
|
48
|
+
for n in nodes_list:
|
|
49
|
+
node = WorkflowNode(
|
|
50
|
+
id=n["id"],
|
|
51
|
+
type=n.get("data", {}).get("type", n.get("type", "unknown")),
|
|
52
|
+
title=n.get("data", {}).get("title", n.get("title", "")),
|
|
53
|
+
data=n.get("data", {}),
|
|
54
|
+
)
|
|
55
|
+
node_map[node.id] = node
|
|
56
|
+
|
|
57
|
+
for edge in edges:
|
|
58
|
+
src = edge.get("sourceId") or edge.get("source")
|
|
59
|
+
tgt = edge.get("targetId") or edge.get("target")
|
|
60
|
+
if src in node_map and tgt in node_map:
|
|
61
|
+
node_map[src].downstream.append(tgt)
|
|
62
|
+
node_map[tgt].upstream.append(src)
|
|
63
|
+
|
|
64
|
+
dag.nodes = node_map
|
|
65
|
+
entries = [nid for nid, n in node_map.items() if not n.upstream]
|
|
66
|
+
dag.entry_node = entries[0] if entries else None
|
|
67
|
+
return dag
|
|
68
|
+
|
|
69
|
+
def llm_nodes(self) -> list[WorkflowNode]:
|
|
70
|
+
"""Return all nodes whose type indicates LLM usage."""
|
|
71
|
+
return [n for n in self.nodes.values() if n.type in ("llm", "llm-chain", "chat-model")]
|
|
72
|
+
|
|
73
|
+
def redundant_llm_calls(self) -> list[tuple[WorkflowNode, WorkflowNode]]:
|
|
74
|
+
"""Detect LLM nodes that appear to do the same work.
|
|
75
|
+
|
|
76
|
+
Heuristic: same model provider/name and same upstream source.
|
|
77
|
+
"""
|
|
78
|
+
llms = self.llm_nodes()
|
|
79
|
+
redundant: list[tuple[WorkflowNode, WorkflowNode]] = []
|
|
80
|
+
for i, a in enumerate(llms):
|
|
81
|
+
for b in llms[i + 1:]:
|
|
82
|
+
if (
|
|
83
|
+
a.data.get("model", {}).get("provider") == b.data.get("model", {}).get("provider")
|
|
84
|
+
and a.data.get("model", {}).get("name") == b.data.get("model", {}).get("name")
|
|
85
|
+
and set(a.upstream) == set(b.upstream)
|
|
86
|
+
):
|
|
87
|
+
redundant.append((a, b))
|
|
88
|
+
return redundant
|
|
89
|
+
|
|
90
|
+
def dead_branches(self) -> list[list[str]]:
|
|
91
|
+
"""Return paths that can never execute.
|
|
92
|
+
|
|
93
|
+
Simplified heuristic: branches from if-else nodes that lead only to
|
|
94
|
+
leaf (sink) nodes with no further processing.
|
|
95
|
+
"""
|
|
96
|
+
dead: list[list[str]] = []
|
|
97
|
+
for node in self.nodes.values():
|
|
98
|
+
if node.type not in ("if-else", "switch", "conditional"):
|
|
99
|
+
continue
|
|
100
|
+
for child_id in node.downstream:
|
|
101
|
+
path = self._walk_to_end(child_id)
|
|
102
|
+
if not path:
|
|
103
|
+
continue
|
|
104
|
+
if all(len(self.nodes[nid].downstream) == 0 for nid in path if nid in self.nodes):
|
|
105
|
+
dead.append([node.id] + path)
|
|
106
|
+
return dead
|
|
107
|
+
|
|
108
|
+
def _walk_to_end(self, start_id: str) -> list[str]:
|
|
109
|
+
visited: list[str] = []
|
|
110
|
+
current = start_id
|
|
111
|
+
while current and current not in visited:
|
|
112
|
+
visited.append(current)
|
|
113
|
+
node = self.nodes.get(current)
|
|
114
|
+
if not node or not node.downstream:
|
|
115
|
+
break
|
|
116
|
+
current = node.downstream[0]
|
|
117
|
+
return visited
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Budget enforcement for workflow runs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import date
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class WorkflowBudget:
|
|
12
|
+
"""Token, cost, and node-count limits for workflow execution."""
|
|
13
|
+
|
|
14
|
+
max_tokens_per_run: int = 500_000
|
|
15
|
+
max_cost_per_day: float = 50.0
|
|
16
|
+
max_nodes_per_workflow: int = 100
|
|
17
|
+
|
|
18
|
+
_daily_spend: dict[date, float] = field(default_factory=dict, repr=False)
|
|
19
|
+
_run_token_counts: list[int] = field(default_factory=list, repr=False)
|
|
20
|
+
|
|
21
|
+
price_input_per_1k: float = 0.03
|
|
22
|
+
price_output_per_1k: float = 0.06
|
|
23
|
+
|
|
24
|
+
def record_run(self, input_tokens: int, output_tokens: int) -> float:
|
|
25
|
+
total = input_tokens + output_tokens
|
|
26
|
+
self._run_token_counts.append(total)
|
|
27
|
+
cost = self._cost(input_tokens, output_tokens)
|
|
28
|
+
today = date.today()
|
|
29
|
+
self._daily_spend[today] = self._daily_spend.get(today, 0.0) + cost
|
|
30
|
+
return cost
|
|
31
|
+
|
|
32
|
+
def _cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
33
|
+
return (input_tokens * self.price_input_per_1k + output_tokens * self.price_output_per_1k) / 1_000
|
|
34
|
+
|
|
35
|
+
def is_within_budget(self, input_tokens: int, output_tokens: int) -> bool:
|
|
36
|
+
total = input_tokens + output_tokens
|
|
37
|
+
if total > self.max_tokens_per_run:
|
|
38
|
+
return False
|
|
39
|
+
cost = self._cost(input_tokens, output_tokens)
|
|
40
|
+
today = date.today()
|
|
41
|
+
if self._daily_spend.get(today, 0.0) + cost > self.max_cost_per_day:
|
|
42
|
+
return False
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
def check_node_count(self, node_count: int) -> bool:
|
|
46
|
+
return node_count <= self.max_nodes_per_workflow
|
|
47
|
+
|
|
48
|
+
def daily_spend(self, day: Optional[date] = None) -> float:
|
|
49
|
+
return self._daily_spend.get(day or date.today(), 0.0)
|
|
50
|
+
|
|
51
|
+
def avg_tokens_per_run(self) -> float:
|
|
52
|
+
if not self._run_token_counts:
|
|
53
|
+
return 0.0
|
|
54
|
+
return sum(self._run_token_counts) / len(self._run_token_counts)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Waste detection: find nodes that burn tokens without proportional value."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from .profiler import NodeProfile, Profiler
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class WasteFinding:
|
|
13
|
+
node_id: str
|
|
14
|
+
node_title: str
|
|
15
|
+
category: str # "overprompted", "idle", "expensive_model", "redundant"
|
|
16
|
+
severity: str # "low", "medium", "high"
|
|
17
|
+
message: str
|
|
18
|
+
suggestion: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
DEFAULT_MAX_IO_RATIO = 15.0
|
|
22
|
+
DEFAULT_IDLE_THRESHOLD = 0.1
|
|
23
|
+
DEFAULT_EXPENSIVE_MODEL_RATIO = 0.6
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WasteDetector:
|
|
27
|
+
"""Analyze profiler data to surface actionable waste findings."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
profiler: Profiler,
|
|
32
|
+
*,
|
|
33
|
+
max_io_ratio: float = DEFAULT_MAX_IO_RATIO,
|
|
34
|
+
idle_threshold: float = DEFAULT_IDLE_THRESHOLD,
|
|
35
|
+
) -> None:
|
|
36
|
+
self.profiler = profiler
|
|
37
|
+
self.max_io_ratio = max_io_ratio
|
|
38
|
+
self.idle_threshold = idle_threshold
|
|
39
|
+
|
|
40
|
+
def detect(self) -> list[WasteFinding]:
|
|
41
|
+
findings: list[WasteFinding] = []
|
|
42
|
+
profiles = self.profiler.all_profiles()
|
|
43
|
+
if not profiles:
|
|
44
|
+
return findings
|
|
45
|
+
|
|
46
|
+
total_cost = sum(p.total_cost for p in profiles)
|
|
47
|
+
|
|
48
|
+
for p in profiles:
|
|
49
|
+
findings.extend(self._check_overprompted(p))
|
|
50
|
+
findings.extend(self._check_idle(p, total_cost))
|
|
51
|
+
|
|
52
|
+
findings.extend(self._check_expensive_model_concentration(profiles, total_cost))
|
|
53
|
+
return findings
|
|
54
|
+
|
|
55
|
+
def _check_overprompted(self, p: NodeProfile) -> list[WasteFinding]:
|
|
56
|
+
ratio = p.input_output_ratio
|
|
57
|
+
if ratio > self.max_io_ratio and p.avg_input_tokens > 200:
|
|
58
|
+
return [WasteFinding(
|
|
59
|
+
node_id=p.node_id,
|
|
60
|
+
node_title=p.node_title,
|
|
61
|
+
category="overprompted",
|
|
62
|
+
severity="high" if ratio > 30 else "medium",
|
|
63
|
+
message=(
|
|
64
|
+
f"Node '{p.node_title or p.node_id}' receives {p.avg_input_tokens:,.0f} tokens avg "
|
|
65
|
+
f"but outputs {p.avg_output_tokens:,.0f} (ratio {ratio:.1f}×)."
|
|
66
|
+
),
|
|
67
|
+
suggestion="Consider extractive pre-filtering, summarization, or reducing the prompt template size.",
|
|
68
|
+
)]
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
def _check_idle(self, p: NodeProfile, total_cost: float) -> list[WasteFinding]:
|
|
72
|
+
if total_cost == 0:
|
|
73
|
+
return []
|
|
74
|
+
fraction = p.total_cost / total_cost
|
|
75
|
+
if fraction < self.idle_threshold and p.run_count > 5:
|
|
76
|
+
return [WasteFinding(
|
|
77
|
+
node_id=p.node_id,
|
|
78
|
+
node_title=p.node_title,
|
|
79
|
+
category="idle",
|
|
80
|
+
severity="low",
|
|
81
|
+
message=f"Node '{p.node_title or p.node_id}' accounts for only {fraction:.1%} of cost over {p.run_count} runs.",
|
|
82
|
+
suggestion="Consider removing or conditionally bypassing this node.",
|
|
83
|
+
)]
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
def _check_expensive_model_concentration(
|
|
87
|
+
self, profiles: list[NodeProfile], total_cost: float
|
|
88
|
+
) -> list[WasteFinding]:
|
|
89
|
+
findings: list[WasteFinding] = []
|
|
90
|
+
if not profiles or total_cost == 0:
|
|
91
|
+
return findings
|
|
92
|
+
|
|
93
|
+
top = self.profiler.top_by_cost(3)
|
|
94
|
+
top_cost = sum(p.total_cost for p in top)
|
|
95
|
+
fraction = top_cost / total_cost
|
|
96
|
+
|
|
97
|
+
if fraction > DEFAULT_EXPENSIVE_MODEL_RATIO and len(top) <= 2:
|
|
98
|
+
names = ", ".join(f"'{p.node_title or p.node_id}'" for p in top)
|
|
99
|
+
findings.append(WasteFinding(
|
|
100
|
+
node_id=",".join(p.node_id for p in top),
|
|
101
|
+
node_title=names,
|
|
102
|
+
category="expensive_model",
|
|
103
|
+
severity="high",
|
|
104
|
+
message=(
|
|
105
|
+
f"Two nodes ({names}) account for {fraction:.0%} of tokens. "
|
|
106
|
+
f"If they use an expensive model, consider downgrading for simple tasks."
|
|
107
|
+
),
|
|
108
|
+
suggestion=(
|
|
109
|
+
"Tasks like classification, extraction, or short summarization "
|
|
110
|
+
"often run fine on cheaper models."
|
|
111
|
+
),
|
|
112
|
+
))
|
|
113
|
+
return findings
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Per-node profiling: tokens, latency, cost, and historical trends."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import statistics
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class NodeSample:
|
|
13
|
+
"""A single profiling observation for a node execution."""
|
|
14
|
+
|
|
15
|
+
node_id: str
|
|
16
|
+
input_tokens: int
|
|
17
|
+
output_tokens: int
|
|
18
|
+
latency_ms: float
|
|
19
|
+
cost_usd: float
|
|
20
|
+
node_title: str = ""
|
|
21
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class NodeProfile:
|
|
26
|
+
"""Aggregate stats for a single workflow node across runs."""
|
|
27
|
+
|
|
28
|
+
node_id: str
|
|
29
|
+
node_title: str = ""
|
|
30
|
+
samples: list[NodeSample] = field(default_factory=list, repr=False)
|
|
31
|
+
|
|
32
|
+
def record(self, sample: NodeSample) -> None:
|
|
33
|
+
self.samples.append(sample)
|
|
34
|
+
if sample.node_title and not self.node_title:
|
|
35
|
+
self.node_title = sample.node_title
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def run_count(self) -> int:
|
|
39
|
+
return len(self.samples)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def avg_input_tokens(self) -> float:
|
|
43
|
+
return statistics.mean(s.input_tokens for s in self.samples) if self.samples else 0.0
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def avg_output_tokens(self) -> float:
|
|
47
|
+
return statistics.mean(s.output_tokens for s in self.samples) if self.samples else 0.0
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def avg_latency_ms(self) -> float:
|
|
51
|
+
return statistics.mean(s.latency_ms for s in self.samples) if self.samples else 0.0
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def avg_cost(self) -> float:
|
|
55
|
+
return statistics.mean(s.cost_usd for s in self.samples) if self.samples else 0.0
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def total_cost(self) -> float:
|
|
59
|
+
return sum(s.cost_usd for s in self.samples)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def total_tokens(self) -> int:
|
|
63
|
+
return sum(s.input_tokens + s.output_tokens for s in self.samples)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def input_output_ratio(self) -> float:
|
|
67
|
+
avg_out = self.avg_output_tokens
|
|
68
|
+
return self.avg_input_tokens / avg_out if avg_out > 0 else float("inf")
|
|
69
|
+
|
|
70
|
+
def cost_trend(self, last_n: int = 10) -> list[float]:
|
|
71
|
+
return [s.cost_usd for s in self.samples[-last_n:]]
|
|
72
|
+
|
|
73
|
+
def latency_trend(self, last_n: int = 10) -> list[float]:
|
|
74
|
+
return [s.latency_ms for s in self.samples[-last_n:]]
|
|
75
|
+
|
|
76
|
+
def is_degrading(self, window: int = 5) -> bool:
|
|
77
|
+
"""Return *True* if latency is trending upward over the last *window* runs."""
|
|
78
|
+
if len(self.samples) < window * 2:
|
|
79
|
+
return False
|
|
80
|
+
recent = [s.latency_ms for s in self.samples[-window:]]
|
|
81
|
+
earlier = [s.latency_ms for s in self.samples[-window * 2:-window]]
|
|
82
|
+
return statistics.mean(recent) > statistics.mean(earlier) * 1.2
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class Profiler:
|
|
86
|
+
"""Collects and queries per-node profiles."""
|
|
87
|
+
|
|
88
|
+
def __init__(self) -> None:
|
|
89
|
+
self._profiles: dict[str, NodeProfile] = {}
|
|
90
|
+
|
|
91
|
+
def record(self, sample: NodeSample) -> None:
|
|
92
|
+
profile = self._profiles.get(sample.node_id)
|
|
93
|
+
if profile is None:
|
|
94
|
+
profile = NodeProfile(node_id=sample.node_id, node_title=sample.node_title)
|
|
95
|
+
self._profiles[sample.node_id] = profile
|
|
96
|
+
profile.record(sample)
|
|
97
|
+
|
|
98
|
+
def get(self, node_id: str) -> Optional[NodeProfile]:
|
|
99
|
+
return self._profiles.get(node_id)
|
|
100
|
+
|
|
101
|
+
def all_profiles(self) -> list[NodeProfile]:
|
|
102
|
+
return list(self._profiles.values())
|
|
103
|
+
|
|
104
|
+
def top_by_cost(self, n: int = 5) -> list[NodeProfile]:
|
|
105
|
+
return sorted(self._profiles.values(), key=lambda p: p.total_cost, reverse=True)[:n]
|
|
106
|
+
|
|
107
|
+
def top_by_tokens(self, n: int = 5) -> list[NodeProfile]:
|
|
108
|
+
return sorted(self._profiles.values(), key=lambda p: p.total_tokens, reverse=True)[:n]
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Markdown conservation reports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from .analyzer import WorkflowDAG
|
|
8
|
+
from .budget import WorkflowBudget
|
|
9
|
+
from .detector import WasteFinding
|
|
10
|
+
from .profiler import Profiler
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def render_report(
|
|
14
|
+
*,
|
|
15
|
+
budget: Optional[WorkflowBudget] = None,
|
|
16
|
+
dag: Optional[WorkflowDAG] = None,
|
|
17
|
+
profiler: Optional[Profiler] = None,
|
|
18
|
+
findings: Optional[list[WasteFinding]] = None,
|
|
19
|
+
workflow_name: str = "Workflow",
|
|
20
|
+
) -> str:
|
|
21
|
+
"""Render a full Markdown conservation report."""
|
|
22
|
+
parts: list[str] = []
|
|
23
|
+
parts.append(f"# Conservation Report — {workflow_name}\n")
|
|
24
|
+
|
|
25
|
+
if budget is not None:
|
|
26
|
+
parts.append("\n## Budget Summary\n")
|
|
27
|
+
parts.append(f"- **Max tokens / run:** {budget.max_tokens_per_run:,}")
|
|
28
|
+
parts.append(f"- **Max cost / day:** ${budget.max_cost_per_day:.2f}")
|
|
29
|
+
parts.append(f"- **Max nodes / workflow:** {budget.max_nodes_per_workflow}")
|
|
30
|
+
parts.append(f"- **Today's spend:** ${budget.daily_spend():.4f}")
|
|
31
|
+
parts.append(f"- **Avg tokens / run:** {budget.avg_tokens_per_run():,.0f}")
|
|
32
|
+
|
|
33
|
+
if dag is not None:
|
|
34
|
+
parts.append("\n## DAG Analysis\n")
|
|
35
|
+
parts.append(f"- **Total nodes:** {len(dag.nodes)}")
|
|
36
|
+
parts.append(f"- **LLM nodes:** {len(dag.llm_nodes())}")
|
|
37
|
+
|
|
38
|
+
redundant = dag.redundant_llm_calls()
|
|
39
|
+
if redundant:
|
|
40
|
+
parts.append(f"- **Redundant LLM calls:** {len(redundant)}")
|
|
41
|
+
for a, b in redundant:
|
|
42
|
+
parts.append(f" - `{a.title or a.id}` ↔ `{b.title or b.id}` (same model & upstream)")
|
|
43
|
+
else:
|
|
44
|
+
parts.append("- **Redundant LLM calls:** None detected ✅")
|
|
45
|
+
|
|
46
|
+
dead = dag.dead_branches()
|
|
47
|
+
if dead:
|
|
48
|
+
parts.append(f"- **Dead branches:** {len(dead)}")
|
|
49
|
+
for path in dead:
|
|
50
|
+
labels = [dag.nodes[nid].title or nid for nid in path if nid in dag.nodes]
|
|
51
|
+
parts.append(f" - `{' → '.join(labels)}`")
|
|
52
|
+
else:
|
|
53
|
+
parts.append("- **Dead branches:** None detected ✅")
|
|
54
|
+
|
|
55
|
+
if profiler is not None:
|
|
56
|
+
parts.append("\n## Top Nodes by Cost\n")
|
|
57
|
+
top = profiler.top_by_cost(5)
|
|
58
|
+
if top:
|
|
59
|
+
parts.append("| Node | Runs | Avg In | Avg Out | Avg Cost | Total Cost |")
|
|
60
|
+
parts.append("|------|------|--------|---------|----------|------------|")
|
|
61
|
+
for p in top:
|
|
62
|
+
parts.append(
|
|
63
|
+
f"| {p.node_title or p.node_id} | {p.run_count} | "
|
|
64
|
+
f"{p.avg_input_tokens:,.0f} | {p.avg_output_tokens:,.0f} | "
|
|
65
|
+
f"${p.avg_cost:.4f} | ${p.total_cost:.4f} |"
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
parts.append("_No profiling data yet._")
|
|
69
|
+
|
|
70
|
+
if findings:
|
|
71
|
+
parts.append("\n## Waste Findings\n")
|
|
72
|
+
for f in findings:
|
|
73
|
+
icon = {"high": "🔴", "medium": "🟡", "low": "🟢"}.get(f.severity, "⚪")
|
|
74
|
+
parts.append(f"\n### {icon} {f.category.replace('_', ' ').title()} — {f.node_title or f.node_id}\n")
|
|
75
|
+
parts.append(f"\n{f.message}")
|
|
76
|
+
parts.append(f"\n> **Suggestion:** {f.suggestion}\n")
|
|
77
|
+
elif profiler is not None:
|
|
78
|
+
parts.append("\n## Waste Findings\n")
|
|
79
|
+
parts.append("_No waste detected._ ✅")
|
|
80
|
+
|
|
81
|
+
return "\n".join(parts) + "\n"
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: conservation-guardian
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Generic Workflow Conservation Engine — analyze any workflow for cost efficiency and detect waste
|
|
5
|
+
Author: SuperInstance
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/SuperInstance/conservation-guardian
|
|
8
|
+
Keywords: workflow,cost,optimization,conservation,guardian,efficiency,waste-detection
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# Conservation Guardian
|
|
19
|
+
|
|
20
|
+
A **generic** Workflow Conservation Engine — analyze any workflow for cost efficiency and detect waste.
|
|
21
|
+
|
|
22
|
+
> Your workflow costs $12/day. Two nodes account for 78% of tokens. They both call GPT-4 for tasks GPT-4o-mini handles.
|
|
23
|
+
|
|
24
|
+
Conservation Guardian is a framework-agnostic Python library that analyzes workflow execution for cost efficiency and detects waste. It doesn't change your workflows — it tells you what to change and why. Works with any workflow engine (Dify, n8n, LangGraph, Temporal, custom DAGs, etc.).
|
|
25
|
+
|
|
26
|
+
## What It Does
|
|
27
|
+
|
|
28
|
+
**Budget tracking** — Set hard limits on tokens per run, cost per day, and node count. Know immediately when a workflow crosses a threshold.
|
|
29
|
+
|
|
30
|
+
**DAG analysis** — Parse any workflow JSON and surface:
|
|
31
|
+
- Redundant LLM calls (same model, same upstream, same job)
|
|
32
|
+
- Dead branches (conditional paths that never execute)
|
|
33
|
+
|
|
34
|
+
**Per-node profiling** — Track tokens in/out, latency, and cost for every node across runs. Spot degradation before it hurts.
|
|
35
|
+
|
|
36
|
+
**Waste detection** — Find the expensive stuff nobody notices:
|
|
37
|
+
- **Overprompted nodes** — 4,200 tokens in, 180 out. You're paying for context the model ignores.
|
|
38
|
+
- **Idle nodes** — Running every time, contributing 0.1% of value.
|
|
39
|
+
- **Model mismatch** — Using GPT-4 for classification that GPT-4o-mini handles in 12ms.
|
|
40
|
+
|
|
41
|
+
**Reports** — Markdown summaries you can paste into Slack, Notion, or a PR comment.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install conservation-guardian
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from conservation_guardian.budget import WorkflowBudget
|
|
53
|
+
from conservation_guardian.analyzer import WorkflowDAG
|
|
54
|
+
from conservation_guardian.profiler import Profiler, NodeSample
|
|
55
|
+
from conservation_guardian.detector import WasteDetector
|
|
56
|
+
from conservation_guardian.report import render_report
|
|
57
|
+
|
|
58
|
+
# 1. Load a workflow (generic — works with any engine's JSON)
|
|
59
|
+
dag = WorkflowDAG.from_dict(workflow_json)
|
|
60
|
+
print(f"{len(dag.llm_nodes())} LLM nodes, {len(dag.redundant_llm_calls())} redundant")
|
|
61
|
+
|
|
62
|
+
# 2. Profile some runs
|
|
63
|
+
profiler = Profiler()
|
|
64
|
+
profiler.record(NodeSample(
|
|
65
|
+
node_id="summarizer",
|
|
66
|
+
input_tokens=4200,
|
|
67
|
+
output_tokens=180,
|
|
68
|
+
latency_ms=820.0,
|
|
69
|
+
cost_usd=0.015,
|
|
70
|
+
))
|
|
71
|
+
|
|
72
|
+
# 3. Detect waste
|
|
73
|
+
detector = WasteDetector(profiler)
|
|
74
|
+
findings = detector.detect()
|
|
75
|
+
for f in findings:
|
|
76
|
+
print(f"[{f.severity}] {f.message}")
|
|
77
|
+
print(f" → {f.suggestion}")
|
|
78
|
+
|
|
79
|
+
# 4. Generate report
|
|
80
|
+
budget = WorkflowBudget()
|
|
81
|
+
report = render_report(budget=budget, dag=dag, profiler=profiler, findings=findings)
|
|
82
|
+
print(report)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Differences from dify-workflow-guardian
|
|
86
|
+
|
|
87
|
+
- **Framework-agnostic**: No Dify-specific assumptions in the analyzer
|
|
88
|
+
- **Extended node types**: Recognizes `llm`, `llm-chain`, `chat-model`, `switch`, `conditional`, and more
|
|
89
|
+
- **Same API**: Drop-in replacement — just change the import package name
|
|
90
|
+
|
|
91
|
+
## Module Structure
|
|
92
|
+
|
|
93
|
+
| File | Purpose |
|
|
94
|
+
|------|---------|
|
|
95
|
+
| `budget.py` | `WorkflowBudget` — token/cost/node limits and daily tracking |
|
|
96
|
+
| `analyzer.py` | `WorkflowDAG` — parse workflow JSON, find redundancies and dead branches |
|
|
97
|
+
| `profiler.py` | `Profiler`, `NodeProfile`, `NodeSample` — per-node stats and trends |
|
|
98
|
+
| `detector.py` | `WasteDetector`, `WasteFinding` — surface actionable waste |
|
|
99
|
+
| `report.py` | `render_report()` — Markdown conservation reports |
|
|
100
|
+
|
|
101
|
+
## Tests
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
python -m pytest tests/ -v
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Philosophy
|
|
108
|
+
|
|
109
|
+
Conservation Guardian doesn't optimize your workflows. It tells you where the money goes and what to do about it. The fixes are yours to make — but at least you'll know where to look.
|
|
110
|
+
|
|
111
|
+
Built for [SuperInstance](https://github.com/SuperInstance).
|
|
112
|
+
|
|
113
|
+
## License
|
|
114
|
+
|
|
115
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/conservation_guardian/__init__.py
|
|
4
|
+
src/conservation_guardian/analyzer.py
|
|
5
|
+
src/conservation_guardian/budget.py
|
|
6
|
+
src/conservation_guardian/detector.py
|
|
7
|
+
src/conservation_guardian/profiler.py
|
|
8
|
+
src/conservation_guardian/report.py
|
|
9
|
+
src/conservation_guardian.egg-info/PKG-INFO
|
|
10
|
+
src/conservation_guardian.egg-info/SOURCES.txt
|
|
11
|
+
src/conservation_guardian.egg-info/dependency_links.txt
|
|
12
|
+
src/conservation_guardian.egg-info/top_level.txt
|
|
13
|
+
tests/test_guardian.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
conservation_guardian
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""Tests for the guardian module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from conservation_guardian.budget import WorkflowBudget
|
|
8
|
+
from conservation_guardian.analyzer import WorkflowDAG
|
|
9
|
+
from conservation_guardian.profiler import Profiler, NodeSample
|
|
10
|
+
from conservation_guardian.detector import WasteDetector, WasteFinding
|
|
11
|
+
from conservation_guardian.report import render_report
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Budget
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
class TestWorkflowBudget:
|
|
19
|
+
def test_default_limits(self):
|
|
20
|
+
b = WorkflowBudget()
|
|
21
|
+
assert b.max_tokens_per_run == 500_000
|
|
22
|
+
assert b.max_cost_per_day == 50.0
|
|
23
|
+
assert b.max_nodes_per_workflow == 100
|
|
24
|
+
|
|
25
|
+
def test_is_within_budget_ok(self):
|
|
26
|
+
b = WorkflowBudget()
|
|
27
|
+
assert b.is_within_budget(100_000, 50_000) is True
|
|
28
|
+
|
|
29
|
+
def test_is_within_budget_exceeds_tokens(self):
|
|
30
|
+
b = WorkflowBudget(max_tokens_per_run=1_000)
|
|
31
|
+
assert b.is_within_budget(800, 300) is False
|
|
32
|
+
|
|
33
|
+
def test_is_within_budget_exceeds_daily_cost(self):
|
|
34
|
+
b = WorkflowBudget(max_cost_per_day=0.01)
|
|
35
|
+
b.record_run(100_000, 50_000)
|
|
36
|
+
assert b.is_within_budget(100_000, 50_000) is False
|
|
37
|
+
|
|
38
|
+
def test_record_run_returns_cost(self):
|
|
39
|
+
b = WorkflowBudget()
|
|
40
|
+
cost = b.record_run(1_000, 1_000)
|
|
41
|
+
assert cost > 0
|
|
42
|
+
assert b.daily_spend() == cost
|
|
43
|
+
|
|
44
|
+
def test_check_node_count(self):
|
|
45
|
+
b = WorkflowBudget(max_nodes_per_workflow=5)
|
|
46
|
+
assert b.check_node_count(5) is True
|
|
47
|
+
assert b.check_node_count(6) is False
|
|
48
|
+
|
|
49
|
+
def test_avg_tokens_per_run(self):
|
|
50
|
+
b = WorkflowBudget()
|
|
51
|
+
b.record_run(1_000, 500)
|
|
52
|
+
b.record_run(2_000, 500)
|
|
53
|
+
assert b.avg_tokens_per_run() == 2_000.0
|
|
54
|
+
|
|
55
|
+
def test_avg_tokens_no_runs(self):
|
|
56
|
+
b = WorkflowBudget()
|
|
57
|
+
assert b.avg_tokens_per_run() == 0.0
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# Analyzer
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
class TestWorkflowDAG:
|
|
65
|
+
@pytest.fixture()
|
|
66
|
+
def sample_dag(self) -> WorkflowDAG:
|
|
67
|
+
raw = {
|
|
68
|
+
"graph": {
|
|
69
|
+
"nodes": [
|
|
70
|
+
{"id": "start", "data": {"type": "start", "title": "Start"}},
|
|
71
|
+
{"id": "llm1", "data": {"type": "llm", "title": "Draft Email", "model": {"provider": "openai", "name": "gpt-4"}}},
|
|
72
|
+
{"id": "llm2", "data": {"type": "llm", "title": "Draft Email 2", "model": {"provider": "openai", "name": "gpt-4"}}},
|
|
73
|
+
{"id": "if1", "data": {"type": "if-else", "title": "Check Urgency"}},
|
|
74
|
+
{"id": "tool1", "data": {"type": "tool", "title": "Send Slack"}},
|
|
75
|
+
{"id": "end", "data": {"type": "end", "title": "End"}},
|
|
76
|
+
],
|
|
77
|
+
"edges": [
|
|
78
|
+
{"sourceId": "start", "targetId": "llm1"},
|
|
79
|
+
{"sourceId": "start", "targetId": "llm2"},
|
|
80
|
+
{"sourceId": "llm1", "targetId": "if1"},
|
|
81
|
+
{"sourceId": "if1", "targetId": "tool1"},
|
|
82
|
+
{"sourceId": "if1", "targetId": "end"},
|
|
83
|
+
],
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return WorkflowDAG.from_dict(raw)
|
|
87
|
+
|
|
88
|
+
def test_parse_nodes(self, sample_dag: WorkflowDAG):
|
|
89
|
+
assert len(sample_dag.nodes) == 6
|
|
90
|
+
assert sample_dag.entry_node == "start"
|
|
91
|
+
|
|
92
|
+
def test_llm_nodes(self, sample_dag: WorkflowDAG):
|
|
93
|
+
llms = sample_dag.llm_nodes()
|
|
94
|
+
assert len(llms) == 2
|
|
95
|
+
|
|
96
|
+
def test_redundant_llm_calls(self, sample_dag: WorkflowDAG):
|
|
97
|
+
redundant = sample_dag.redundant_llm_calls()
|
|
98
|
+
assert len(redundant) == 1
|
|
99
|
+
a, b = redundant[0]
|
|
100
|
+
assert "llm" in a.id and "llm" in b.id
|
|
101
|
+
|
|
102
|
+
def test_from_empty_dict(self):
|
|
103
|
+
dag = WorkflowDAG.from_dict({})
|
|
104
|
+
assert len(dag.nodes) == 0
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
# Profiler
|
|
109
|
+
# ---------------------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
class TestProfiler:
|
|
112
|
+
@pytest.fixture()
|
|
113
|
+
def profiler_with_data(self) -> Profiler:
|
|
114
|
+
p = Profiler()
|
|
115
|
+
for i in range(20):
|
|
116
|
+
p.record(NodeSample(
|
|
117
|
+
node_id="summarizer",
|
|
118
|
+
input_tokens=4_200,
|
|
119
|
+
output_tokens=180,
|
|
120
|
+
latency_ms=800.0 + i * 10,
|
|
121
|
+
cost_usd=0.015,
|
|
122
|
+
node_title="Summarizer",
|
|
123
|
+
))
|
|
124
|
+
p.record(NodeSample(
|
|
125
|
+
node_id="classifier",
|
|
126
|
+
input_tokens=500,
|
|
127
|
+
output_tokens=10,
|
|
128
|
+
latency_ms=200.0,
|
|
129
|
+
cost_usd=0.002,
|
|
130
|
+
node_title="Classifier",
|
|
131
|
+
))
|
|
132
|
+
return p
|
|
133
|
+
|
|
134
|
+
def test_run_count(self, profiler_with_data: Profiler):
|
|
135
|
+
p = profiler_with_data.get("summarizer")
|
|
136
|
+
assert p is not None
|
|
137
|
+
assert p.run_count == 20
|
|
138
|
+
|
|
139
|
+
def test_avg_tokens(self, profiler_with_data: Profiler):
|
|
140
|
+
p = profiler_with_data.get("summarizer")
|
|
141
|
+
assert p.avg_input_tokens == 4_200.0
|
|
142
|
+
assert p.avg_output_tokens == 180.0
|
|
143
|
+
|
|
144
|
+
def test_input_output_ratio(self, profiler_with_data: Profiler):
|
|
145
|
+
p = profiler_with_data.get("summarizer")
|
|
146
|
+
assert p.input_output_ratio == pytest.approx(4200 / 180, rel=0.01)
|
|
147
|
+
|
|
148
|
+
def test_top_by_cost(self, profiler_with_data: Profiler):
|
|
149
|
+
top = profiler_with_data.top_by_cost(1)
|
|
150
|
+
assert top[0].node_id == "summarizer"
|
|
151
|
+
|
|
152
|
+
def test_is_degrading(self, profiler_with_data: Profiler):
|
|
153
|
+
p = profiler_with_data.get("summarizer")
|
|
154
|
+
# Record more with much higher latency to trigger degradation
|
|
155
|
+
for i in range(10):
|
|
156
|
+
profiler_with_data.record(NodeSample(
|
|
157
|
+
node_id="summarizer",
|
|
158
|
+
input_tokens=4_200,
|
|
159
|
+
output_tokens=180,
|
|
160
|
+
latency_ms=2000.0 + i * 100,
|
|
161
|
+
cost_usd=0.015,
|
|
162
|
+
))
|
|
163
|
+
assert p.is_degrading() is True
|
|
164
|
+
|
|
165
|
+
def test_not_degrading(self):
|
|
166
|
+
p = Profiler()
|
|
167
|
+
for i in range(20):
|
|
168
|
+
p.record(NodeSample(
|
|
169
|
+
node_id="stable",
|
|
170
|
+
input_tokens=100,
|
|
171
|
+
output_tokens=100,
|
|
172
|
+
latency_ms=500.0,
|
|
173
|
+
cost_usd=0.01,
|
|
174
|
+
))
|
|
175
|
+
profile = p.get("stable")
|
|
176
|
+
assert profile is not None
|
|
177
|
+
assert profile.is_degrading() is False
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
# Detector
|
|
182
|
+
# ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
class TestWasteDetector:
|
|
185
|
+
@pytest.fixture()
|
|
186
|
+
def detector(self) -> WasteDetector:
|
|
187
|
+
p = Profiler()
|
|
188
|
+
for _ in range(10):
|
|
189
|
+
p.record(NodeSample(
|
|
190
|
+
node_id="summarizer",
|
|
191
|
+
input_tokens=4_200,
|
|
192
|
+
output_tokens=180,
|
|
193
|
+
latency_ms=800.0,
|
|
194
|
+
cost_usd=0.015,
|
|
195
|
+
node_title="Summarizer",
|
|
196
|
+
))
|
|
197
|
+
for _ in range(10):
|
|
198
|
+
p.record(NodeSample(
|
|
199
|
+
node_id="rephraser",
|
|
200
|
+
input_tokens=500,
|
|
201
|
+
output_tokens=500,
|
|
202
|
+
latency_ms=300.0,
|
|
203
|
+
cost_usd=0.005,
|
|
204
|
+
node_title="Rephraser",
|
|
205
|
+
))
|
|
206
|
+
return WasteDetector(p)
|
|
207
|
+
|
|
208
|
+
def test_detect_finds_overprompted(self, detector: WasteDetector):
|
|
209
|
+
findings = detector.detect()
|
|
210
|
+
categories = [f.category for f in findings]
|
|
211
|
+
assert "overprompted" in categories
|
|
212
|
+
|
|
213
|
+
def test_overprompted_message(self, detector: WasteDetector):
|
|
214
|
+
findings = [f for f in detector.detect() if f.category == "overprompted"]
|
|
215
|
+
assert len(findings) == 1
|
|
216
|
+
assert "4,200" in findings[0].message
|
|
217
|
+
assert "180" in findings[0].message
|
|
218
|
+
|
|
219
|
+
def test_no_findings_on_empty(self):
|
|
220
|
+
p = Profiler()
|
|
221
|
+
d = WasteDetector(p)
|
|
222
|
+
assert d.detect() == []
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ---------------------------------------------------------------------------
|
|
226
|
+
# Report
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
class TestReport:
|
|
230
|
+
def test_renders_markdown(self):
|
|
231
|
+
b = WorkflowBudget()
|
|
232
|
+
p = Profiler()
|
|
233
|
+
p.record(NodeSample(
|
|
234
|
+
node_id="test",
|
|
235
|
+
input_tokens=1000,
|
|
236
|
+
output_tokens=100,
|
|
237
|
+
latency_ms=500.0,
|
|
238
|
+
cost_usd=0.01,
|
|
239
|
+
node_title="Test Node",
|
|
240
|
+
))
|
|
241
|
+
findings = [WasteFinding(
|
|
242
|
+
node_id="test",
|
|
243
|
+
node_title="Test Node",
|
|
244
|
+
category="overprompted",
|
|
245
|
+
severity="high",
|
|
246
|
+
message="Test message",
|
|
247
|
+
suggestion="Test suggestion",
|
|
248
|
+
)]
|
|
249
|
+
report = render_report(budget=b, profiler=p, findings=findings, workflow_name="TestFlow")
|
|
250
|
+
assert "# Conservation Report — TestFlow" in report
|
|
251
|
+
assert "Budget Summary" in report
|
|
252
|
+
assert "Top Nodes by Cost" in report
|
|
253
|
+
assert "Waste Findings" in report
|
|
254
|
+
assert "Test Node" in report
|
|
255
|
+
|
|
256
|
+
def test_empty_report(self):
|
|
257
|
+
report = render_report(workflow_name="Empty")
|
|
258
|
+
assert "Conservation Report" in report
|