cannyforge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. cannyforge-0.1.0/LICENSE +21 -0
  2. cannyforge-0.1.0/PKG-INFO +474 -0
  3. cannyforge-0.1.0/README.md +433 -0
  4. cannyforge-0.1.0/cannyforge/__init__.py +94 -0
  5. cannyforge-0.1.0/cannyforge/adapters/__init__.py +10 -0
  6. cannyforge-0.1.0/cannyforge/adapters/crewai.py +73 -0
  7. cannyforge-0.1.0/cannyforge/adapters/langchain.py +84 -0
  8. cannyforge-0.1.0/cannyforge/bundled_skills/calendar-manager/SKILL.md +43 -0
  9. cannyforge-0.1.0/cannyforge/bundled_skills/calendar-manager/assets/templates.yaml +19 -0
  10. cannyforge-0.1.0/cannyforge/bundled_skills/content-summarizer/SKILL.md +39 -0
  11. cannyforge-0.1.0/cannyforge/bundled_skills/email-writer/SKILL.md +41 -0
  12. cannyforge-0.1.0/cannyforge/bundled_skills/email-writer/assets/templates.yaml +29 -0
  13. cannyforge-0.1.0/cannyforge/bundled_skills/web-searcher/SKILL.md +42 -0
  14. cannyforge-0.1.0/cannyforge/cli.py +382 -0
  15. cannyforge-0.1.0/cannyforge/core.py +468 -0
  16. cannyforge-0.1.0/cannyforge/dashboard.py +133 -0
  17. cannyforge-0.1.0/cannyforge/export.py +139 -0
  18. cannyforge-0.1.0/cannyforge/knowledge.py +883 -0
  19. cannyforge-0.1.0/cannyforge/learning.py +773 -0
  20. cannyforge-0.1.0/cannyforge/llm.py +553 -0
  21. cannyforge-0.1.0/cannyforge/mcp_server.py +151 -0
  22. cannyforge-0.1.0/cannyforge/registry.py +151 -0
  23. cannyforge-0.1.0/cannyforge/services/__init__.py +0 -0
  24. cannyforge-0.1.0/cannyforge/services/crm_service.py +91 -0
  25. cannyforge-0.1.0/cannyforge/services/email_service.py +98 -0
  26. cannyforge-0.1.0/cannyforge/services/mock_calendar_mcp.py +233 -0
  27. cannyforge-0.1.0/cannyforge/services/service_base.py +99 -0
  28. cannyforge-0.1.0/cannyforge/services/slack_service.py +70 -0
  29. cannyforge-0.1.0/cannyforge/services/web_search_api.py +200 -0
  30. cannyforge-0.1.0/cannyforge/skills.py +739 -0
  31. cannyforge-0.1.0/cannyforge/storage.py +435 -0
  32. cannyforge-0.1.0/cannyforge/tools.py +226 -0
  33. cannyforge-0.1.0/cannyforge/workers.py +92 -0
  34. cannyforge-0.1.0/cannyforge.egg-info/PKG-INFO +474 -0
  35. cannyforge-0.1.0/cannyforge.egg-info/SOURCES.txt +48 -0
  36. cannyforge-0.1.0/cannyforge.egg-info/dependency_links.txt +1 -0
  37. cannyforge-0.1.0/cannyforge.egg-info/entry_points.txt +2 -0
  38. cannyforge-0.1.0/cannyforge.egg-info/requires.txt +29 -0
  39. cannyforge-0.1.0/cannyforge.egg-info/top_level.txt +1 -0
  40. cannyforge-0.1.0/pyproject.toml +61 -0
  41. cannyforge-0.1.0/setup.cfg +4 -0
  42. cannyforge-0.1.0/tests/test_declarative_skill.py +185 -0
  43. cannyforge-0.1.0/tests/test_integration.py +103 -0
  44. cannyforge-0.1.0/tests/test_knowledge.py +224 -0
  45. cannyforge-0.1.0/tests/test_learning.py +224 -0
  46. cannyforge-0.1.0/tests/test_llm.py +410 -0
  47. cannyforge-0.1.0/tests/test_production.py +846 -0
  48. cannyforge-0.1.0/tests/test_skill_loader.py +132 -0
  49. cannyforge-0.1.0/tests/test_spec_compliance.py +123 -0
  50. cannyforge-0.1.0/tests/test_tools.py +143 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 XiweiZhou
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,474 @@
1
+ Metadata-Version: 2.4
2
+ Name: cannyforge
3
+ Version: 0.1.0
4
+ Summary: Self-improving agents with closed-loop learning — agents that learn to get it right
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/cannyforge/cannyforge
7
+ Project-URL: Documentation, https://github.com/cannyforge/cannyforge#readme
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: python-dotenv>=1.0.0
19
+ Requires-Dist: requests>=2.31.0
20
+ Requires-Dist: pydantic>=2.5.0
21
+ Requires-Dist: pyyaml>=6.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
25
+ Provides-Extra: claude
26
+ Requires-Dist: anthropic>=0.39.0; extra == "claude"
27
+ Provides-Extra: openai
28
+ Requires-Dist: openai>=1.0.0; extra == "openai"
29
+ Provides-Extra: deepseek
30
+ Requires-Dist: openai>=1.0.0; extra == "deepseek"
31
+ Provides-Extra: mcp
32
+ Requires-Dist: mcp[cli]>=1.0.0; extra == "mcp"
33
+ Provides-Extra: dashboard
34
+ Requires-Dist: streamlit>=1.30.0; extra == "dashboard"
35
+ Provides-Extra: all
36
+ Requires-Dist: anthropic>=0.39.0; extra == "all"
37
+ Requires-Dist: openai>=1.0.0; extra == "all"
38
+ Requires-Dist: mcp[cli]>=1.0.0; extra == "all"
39
+ Requires-Dist: streamlit>=1.30.0; extra == "all"
40
+ Dynamic: license-file
41
+
42
+ # CannyForge
43
+
44
+ **Self-Improving Agents Through Closed-Loop Learning**
45
+
46
+ CannyForge demonstrates how autonomous agents can genuinely learn from experience through closed-loop feedback. Skills are defined declaratively via [AgentSkills.io](https://agentskills.io/specification)-compliant `SKILL.md` files -- no Python subclassing required. The engine handles execution, error detection, pattern learning, rule application, and rule lifecycle automatically.
47
+
48
+ ## Install
49
+
50
+ ```bash
51
+ pip install cannyforge # from PyPI
52
+ cannyforge demo # run the 3-act demo
53
+ cannyforge run "write email" # execute a task
54
+ ```
55
+
56
+ Or install from source:
57
+
58
+ ```bash
59
+ git clone https://github.com/cannyforge/cannyforge.git
60
+ cd cannyforge
61
+ pip install -e .
62
+ ```
63
+
64
+ ## CLI
65
+
66
+ ```bash
67
+ cannyforge demo # animated terminal demo
68
+ cannyforge demo --speed 0 # instant (CI)
69
+ cannyforge run "task" # execute one task
70
+ cannyforge new-skill name # scaffold a skill
71
+ cannyforge stats # show KB state
72
+ cannyforge rules email_writer # inspect rules
73
+ cannyforge learn # trigger learning
74
+ cannyforge export # export training data
75
+ cannyforge install github:user/repo/path/to/skill # install from GitHub
76
+ cannyforge serve # start MCP server
77
+ cannyforge dashboard # launch Streamlit dashboard
78
+ ```
79
+
80
+ ## Quick Start (code)
81
+
82
+ ```python
83
+ from cannyforge import CannyForge
84
+ forge = CannyForge()
85
+ result = forge.execute("Write an email about the 3 PM meeting")
86
+ print(result.success, result.output) # False, then True after learning
87
+ ```
88
+
89
+ ## Core Concept
90
+
91
+ ```
92
+ Task --> [Apply Rules] --> Execute --> Outcome --> Learn --> Update Rules
93
+ ^ |
94
+ +-------------------- Knowledge Base <-------------+
95
+ ```
96
+
97
+ **The key insight**: Knowledge must flow back into execution. Rules learned from past errors are evaluated against new tasks and actively prevent predicted failures -- and rules that stop working are automatically retired.
98
+
99
+ > **skill** — warm start: templates and structure ready from day one
100
+ > **forge** — calibration: watches every execution, builds rules, enforces them, and retires what doesn't work
101
+
102
+ ## Run the Animated Demo
103
+
104
+ ```bash
105
+ cannyforge demo # normal speed
106
+ cannyforge demo --speed 0 # instant (CI / quick review)
107
+ cannyforge demo --speed 2 # slow (presentations)
108
+ cannyforge demo --seed 7 # different random sequence
109
+ ```
110
+
111
+ The demo runs three acts in your terminal:
112
+ - **Act I** — Tasks execute with zero rules. Same errors repeat. Auto-learn fires mid-stream.
113
+ - **Act II** — Rules active. Forge enforces what it learned.
114
+ - **Act III** — A poorly-calibrated rule degrades ACTIVE → PROBATION → DORMANT, then gets resurrected when the same errors resurface.
115
+
116
+ ## Run Tests
117
+
118
+ ```bash
119
+ pytest tests/ -v
120
+ ```
121
+
122
+ 258 tests across 9 test files covering skill loading, knowledge rules, declarative execution, learning, LLM integration, multi-step execution, integration, spec compliance, and production readiness.
123
+
124
+ ## How Learning Works
125
+
126
+ ### 1. Automatic Trigger
127
+
128
+ CannyForge monitors errors per skill and auto-triggers a learning cycle when enough uncovered signal accumulates -- no manual call needed:
129
+
130
+ ```python
131
+ forge = CannyForge()
132
+
133
+ # Just execute tasks. Learning triggers automatically when:
134
+ # - 2+ distinct error types appear that no existing rule covers, OR
135
+ # - 20+ raw errors accumulate since the last cycle
136
+ result = forge.execute("Write email about the 3 PM meeting")
137
+ # TimezoneError logged → uncovered signal accumulates
138
+ # ...after enough failures, forge.run_learning_cycle() fires automatically
139
+ ```
140
+
141
+ ### 2. Pattern Detection
142
+
143
+ ```python
144
+ # Can also trigger manually
145
+ metrics = forge.run_learning_cycle(min_frequency=3, min_confidence=0.3)
146
+
147
+ # Generated rule:
148
+ # IF task.description matches '\d{1,2}\s*(am|pm)'
149
+ # AND context.has_timezone == False
150
+ # THEN add_field(context.timezone, 'UTC')
151
+ # flag(_flags, 'timezone_added')
152
+ ```
153
+
154
+ ### 3. Rule Application
155
+
156
+ ```python
157
+ # Rules apply before execution (PREVENTION), after (VALIDATION),
158
+ # or on mid-execution failure (RECOVERY)
159
+ result = forge.execute("Send email about 2 PM meeting")
160
+ print(result.rules_applied) # ['rule_timezoneerror_1']
161
+ ```
162
+
163
+ ### 4. Adaptive Confidence Updates
164
+
165
+ Rule confidence uses an adaptive exponential moving average. The prior dominates early (when few observations exist), observations dominate later:
166
+
167
+ ```
168
+ prior_weight = 2.0 / (applications + 2)
169
+ confidence = prior_weight × prior + (1 − prior_weight) × effectiveness
170
+ ```
171
+
172
+ This allows rules to recover from initial bad luck and converge correctly without being locked in by early results.
173
+
174
+ ### 5. Rule Lifecycle
175
+
176
+ Rules that underperform are demoted, not deleted. The knowledge is preserved for resurrection:
177
+
178
+ ```
179
+ ACTIVE → effectiveness < threshold, n≥5 → PROBATION
180
+ PROBATION → effectiveness ≥ threshold×1.1 → ACTIVE (hysteresis)
181
+ PROBATION → n≥15 AND eff < threshold×0.7 → DORMANT
182
+ DORMANT → same error type resurfaces → ACTIVE (resurrection)
183
+ ```
184
+
185
+ Thresholds differ by rule type — PREVENTION rules are held to a higher standard (0.45) than RECOVERY rules (0.25), which face harder attribution problems.
186
+
187
+ Dormant rules fire the resurrection path in `add_rule()` the next time the learning cycle regenerates a rule for the same error type. The resurrected rule starts with partial confidence (`min(new_conf × 0.6, 0.5)`), not a full reset, so the degradation history informs the restart.
188
+
189
+ ## Creating a New Skill
190
+
191
+ Create a directory under `skills/` with a single `SKILL.md` file:
192
+
193
+ ```
194
+ skills/
195
+ my-new-skill/
196
+ SKILL.md # required -- defines the skill
197
+ assets/ # optional -- templates, data files
198
+ templates.yaml
199
+ scripts/ # optional -- custom Python handler
200
+ handler.py
201
+ ```
202
+
203
+ ### Minimal SKILL.md
204
+
205
+ ```markdown
206
+ ---
207
+ name: my-new-skill
208
+ description: What this skill does.
209
+ metadata:
210
+ triggers:
211
+ - keyword1
212
+ - keyword2
213
+ output_type: result_type
214
+ ---
215
+
216
+ # My New Skill
217
+
218
+ Detailed description in markdown.
219
+ ```
220
+
221
+ That's it. CannyForge auto-discovers the skill, matches tasks to it via triggers, and wires up the learning loop. No code changes needed.
222
+
223
+ ### Execution Tiers (priority order)
224
+
225
+ 1. **`scripts/handler.py`** — full control via custom Python (highest priority)
226
+ 2. **LLM-powered** — when an `llm_provider` is passed to `CannyForge()`, uses multi-step tool-calling loop
227
+ 3. **Template-based** — intent matching against `assets/templates.yaml` (fallback)
228
+
229
+ ### Optional: Templates
230
+
231
+ ```yaml
232
+ greeting:
233
+ match: [hello, hi]
234
+ subject: "Greeting"
235
+ body: "Hello there!"
236
+
237
+ default:
238
+ match: []
239
+ subject: "General"
240
+ body: "Default output"
241
+ ```
242
+
243
+ ### Optional: Custom Handler
244
+
245
+ ```python
246
+ from cannyforge.skills import ExecutionResult, ExecutionStatus, SkillOutput
247
+
248
+ def execute(context, metadata):
249
+ return ExecutionResult(
250
+ status=ExecutionStatus.SUCCESS,
251
+ output=SkillOutput(content={"key": "value"}, output_type="custom"),
252
+ )
253
+ ```
254
+
255
+ ## Architecture
256
+
257
+ ### Declarative Skills (AgentSkills.io Spec)
258
+
259
+ Skills are defined via `SKILL.md` with YAML frontmatter following the [AgentSkills.io specification](https://agentskills.io/specification). CannyForge-specific extensions live under the `metadata` field:
260
+
261
+ | Field | Purpose |
262
+ |-------|---------|
263
+ | `name` | Hyphenated lowercase identifier (e.g. `email-writer`) |
264
+ | `description` | What the skill does |
265
+ | `license` | License type |
266
+ | `metadata.triggers` | Keywords for task-to-skill matching |
267
+ | `metadata.output_type` | Output category |
268
+ | `metadata.context_fields` | Typed execution context fields with defaults |
269
+
270
+ ### Included Skills
271
+
272
+ | Skill | Triggers | Output Type |
273
+ |-------|----------|-------------|
274
+ | `email-writer` | email, write email, compose, draft email | email |
275
+ | `calendar-manager` | calendar, schedule, meeting, book, reserve | calendar_event |
276
+ | `web-searcher` | search, find, research, look up, query | search_results |
277
+ | `content-summarizer` | summarize, summary, abstract, condense, extract | summary |
278
+
279
+ ### Core Components
280
+
281
+ **`skills.py`** -- Declarative Skill System
282
+ - `ExecutionContext`: Dynamic properties via `__getattr__`/`__setattr__`, backward-compatible with rule dicts
283
+ - `DeclarativeSkill`: Three-tier execution (handler → LLM → template), multi-step loop bounded by `max_steps`
284
+ - `SkillLoader`: Scans `skills/` directory, parses frontmatter, creates skill instances
285
+ - `SkillRegistry`: Trigger-based task matching with scoring (match count + earliest position)
286
+ - `StepRecord`: Per-step tracking of tool calls, tool results, errors, and recovery applied
287
+
288
+ **`knowledge.py`** -- Actionable Knowledge System
289
+ - `RuleStatus`: `ACTIVE` / `PROBATION` / `DORMANT` lifecycle states
290
+ - Rules with `Condition → Action` structure; conditions: `contains`, `matches`, `equals`, `gt`, `lt`
291
+ - `effective_confidence`: confidence × staleness decay (10% per 30 days idle, floor 50%)
292
+ - `PATTERN_LIBRARY`: Backbone intelligence shared across all skills — `TimezoneError`, `SpamTriggerError`, `AttachmentError`, `ConflictError`, `PreferenceError`, `PoorQueryError`, `LowCredibilityError`
293
+ - Adaptive EMA confidence updates in `record_outcome()`; lifecycle transitions in `_check_lifecycle()`
294
+ - `add_rule()` detects dormant resurrection and probation boost via semantic match (same `source_error_type` + `rule_type`)
295
+
296
+ **`learning.py`** -- Pattern Detection and Learning Engine
297
+ - `PatternDetector`: Groups errors by type, filters by `min_frequency` and `min_confidence = frequency / total_errors`
298
+ - `LearningEngine.run_learning_cycle()`: Two passes — PREVENTION rules from error repo, RECOVERY rules from step error repo
299
+ - Dormant-aware `already_has_rule` check: dormant rules are allowed to be re-derived and resurrected
300
+
301
+ **`core.py`** -- Unified Interface
302
+ - `_maybe_auto_learn()`: Per-skill uncovered-error tracking, auto-triggers learning cycle
303
+ - Dynamic error classification derived from `PATTERN_LIBRARY` (keyword → error type)
304
+ - LLM-based error classification when a provider is available
305
+ - `reset()`: Clears stats and learning data; for clean KB state pass `data_dir=tempfile.mkdtemp()` at construction
306
+
307
+ **`llm.py`** -- LLM Providers
308
+ - `LLMProvider` ABC with `ClaudeProvider`, `OpenAIProvider`, `DeepSeekProvider`, `MockProvider`
309
+ - `MockProvider` supports `step_responses` list for deterministic multi-step test scenarios
310
+
311
+ **`storage.py`** -- Storage Backends
312
+ - `JSONFileBackend`: Default file-based storage (JSONL for errors/successes, JSON for rules)
313
+ - `SQLiteBackend`: Thread-safe relational storage with automatic schema migration
314
+
315
+ **`adapters/`** -- Framework Integration
316
+ - `langchain.py`: `CannyForgeTool` wraps any skill as a LangChain tool
317
+ - `crewai.py`: `CannyForgeCrewTool` wraps any skill as a CrewAI tool
318
+
319
+ ## Project Structure
320
+
321
+ ```
322
+ cannyforge/
323
+ ├── pyproject.toml # Project config, pytest settings
324
+ ├── CLAUDE.md # Developer guide
325
+
326
+ ├── cannyforge/ # Main package
327
+ │ ├── __init__.py # Public API exports
328
+ │ ├── cli.py # CLI entry point (11 commands)
329
+ │ ├── core.py # CannyForge orchestrator
330
+ │ ├── knowledge.py # Rules, conditions, actions, PATTERN_LIBRARY
331
+ │ ├── skills.py # DeclarativeSkill, SkillLoader, SkillRegistry
332
+ │ ├── learning.py # ErrorRecord, PatternDetector, LearningEngine
333
+ │ ├── llm.py # LLM providers (Claude, OpenAI, DeepSeek, Mock)
334
+ │ ├── tools.py # ToolDefinition, ToolExecutor, ToolRegistry
335
+ │ ├── storage.py # Storage backends (JSON, SQLite)
336
+ │ ├── workers.py # Background learning workers
337
+ │ ├── registry.py # Community skill registry
338
+ │ ├── mcp_server.py # MCP server
339
+ │ ├── export.py # Training data export (DPO, Anthropic)
340
+ │ ├── dashboard.py # Streamlit monitoring dashboard
341
+ │ ├── adapters/ # Framework adapters
342
+ │ │ ├── langchain.py # LangChain integration
343
+ │ │ └── crewai.py # CrewAI integration
344
+ │ ├── services/ # External services (mock + real)
345
+ │ │ ├── slack_service.py
346
+ │ │ ├── email_service.py
347
+ │ │ └── crm_service.py
348
+ │ └── bundled_skills/ # Built-in skills
349
+ │ ├── email-writer/
350
+ │ ├── calendar-manager/
351
+ │ ├── web-searcher/
352
+ │ └── content-summarizer/
353
+
354
+ ├── scenarios/
355
+ │ ├── demo.py # Animated terminal demo (3 acts)
356
+ │ └── scenario_email.py # Ablation scenario
357
+
358
+ ├── examples/
359
+ │ └── quickstart.py # Quickstart example
360
+
361
+ ├── tests/ # 258 tests
362
+ │ ├── conftest.py # Shared fixtures
363
+ │ ├── test_skill_loader.py
364
+ │ ├── test_knowledge.py
365
+ │ ├── test_declarative_skill.py
366
+ │ ├── test_learning.py
367
+ │ ├── test_llm.py
368
+ │ ├── test_tools.py
369
+ │ ├── test_integration.py
370
+ │ ├── test_spec_compliance.py
371
+ │ └── test_production.py # Production readiness tests
372
+
373
+ └── .github/workflows/ci.yml # CI: test (Python 3.10-3.12) + spec validation
374
+ ```
375
+
376
+ ## Usage Examples
377
+
378
+ ### Basic Execution
379
+
380
+ ```python
381
+ from cannyforge import CannyForge
382
+
383
+ forge = CannyForge()
384
+
385
+ result = forge.execute("Write a professional email about the project")
386
+ print(f"Skill: {result.skill_name}")
387
+ print(f"Success: {result.success}")
388
+ print(f"Rules applied: {result.rules_applied}")
389
+ print(f"Output: {result.output}")
390
+ ```
391
+
392
+ ### With LLM Provider
393
+
394
+ ```python
395
+ from cannyforge import CannyForge, ClaudeProvider
396
+
397
+ forge = CannyForge(llm_provider=ClaudeProvider())
398
+
399
+ # Skills now use the three-tier execution:
400
+ # 1. Custom handler (if present)
401
+ # 2. LLM multi-step tool loop
402
+ # 3. Template fallback
403
+ result = forge.execute("Write an email about the meeting at 3 PM")
404
+ ```
405
+
406
+ ### Learning Cycle (manual)
407
+
408
+ ```python
409
+ # Auto-learning fires automatically, but you can also trigger manually
410
+ metrics = forge.run_learning_cycle(min_frequency=3, min_confidence=0.3)
411
+ print(f"Patterns detected: {metrics.patterns_detected}")
412
+ print(f"Rules generated: {metrics.rules_generated}")
413
+ ```
414
+
415
+ ### Statistics
416
+
417
+ ```python
418
+ stats = forge.get_statistics()
419
+ print(f"Success rate: {stats['execution']['success_rate']:.1%}")
420
+ print(f"Total rules: {stats['learning']['total_rules']}")
421
+
422
+ # Rule lifecycle breakdown
423
+ kb_stats = forge.knowledge_base.get_statistics()
424
+ print(kb_stats['rules_by_status']) # {'active': N, 'probation': N, 'dormant': N}
425
+ ```
426
+
427
+ ### Rule Inspection
428
+
429
+ ```python
430
+ for rule in forge.knowledge_base.get_rules("email_writer"):
431
+ print(f"{rule.name}: {rule.status.value} "
432
+ f"eff={rule.effectiveness:.2f} conf={rule.effective_confidence:.2f}")
433
+ ```
434
+
435
+ ## Validation
436
+
437
+ CannyForge uses ablation testing to prove learning effectiveness (see `scenarios/scenario_email.py`):
438
+
439
+ - **Constant error rate**: No predetermined decay — improvement comes only from rules preventing errors
440
+ - **Train/test split**: Rules learned on training tasks, evaluated on held-out tasks
441
+ - **Ablation control**: Direct comparison with vs without learning applied
442
+
443
+ ## CI/CD
444
+
445
+ GitHub Actions runs on every push and PR to `main`:
446
+
447
+ - **test**: Runs full test suite on Python 3.10, 3.11, 3.12
448
+ - **spec-validation**: Validates all `SKILL.md` files against spec requirements
449
+
450
+ ## Limitations and Future Work
451
+
452
+ **Current limitations**:
453
+ - Pattern confidence is `frequency / total_errors` — minority error types can fall below threshold when dominated by a high-frequency type
454
+ - Attribution problem: all rules in `applied_rules` are credited/blamed equally; true causal attribution requires controlled experiments
455
+ - `PATTERN_LIBRARY` must be extended manually to support new error types
456
+
457
+ **Future directions**:
458
+ - Causal inference for pattern attribution
459
+ - Meta-learning across scenarios
460
+ - Multi-agent collaborative learning
461
+ - Real-world API integration
462
+
463
+ ## Further Reading
464
+
465
+ - Blog post: [From Prompt Tweaks to Learning Machines: The Agent Skill Primitive](https://medium.com/@xiweizhou/from-prompt-tweaks-to-learning-machines-the-agent-skill-primitive-93c8fa9dec8c?sk=ac888430da699bce7b635456ae2b1166)
466
+ - Technical appendix: `docs/TECHNICAL_APPENDIX_EMAIL_SCENARIO_WALKTHROUGH.md`
467
+
468
+ ## License
469
+
470
+ See LICENSE file for details.
471
+
472
+ ---
473
+
474
+ **CannyForge** -- Agents that genuinely learn from experience through closed-loop feedback.