mmar-mage 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mmar_mage-0.0.1/.env.example +36 -0
  2. mmar_mage-0.0.1/.github/workflows/ci.yml +43 -0
  3. mmar_mage-0.0.1/.github/workflows/release.yml +76 -0
  4. mmar_mage-0.0.1/.gitignore +46 -0
  5. mmar_mage-0.0.1/CLAUDE.md +389 -0
  6. mmar_mage-0.0.1/LICENSE +21 -0
  7. mmar_mage-0.0.1/Makefile +56 -0
  8. mmar_mage-0.0.1/PKG-INFO +546 -0
  9. mmar_mage-0.0.1/README.md +509 -0
  10. mmar_mage-0.0.1/TODO.md +659 -0
  11. mmar_mage-0.0.1/configs/care_default.toml +41 -0
  12. mmar_mage-0.0.1/configs/care_local.toml +40 -0
  13. mmar_mage-0.0.1/configs/care_research.toml +62 -0
  14. mmar_mage-0.0.1/configs/deep_local.toml +10 -0
  15. mmar_mage-0.0.1/configs/deep_local_airi.toml +11 -0
  16. mmar_mage-0.0.1/configs/deep_openrouter.toml +12 -0
  17. mmar_mage-0.0.1/configs/deep_research.toml +12 -0
  18. mmar_mage-0.0.1/configs/fast.toml +7 -0
  19. mmar_mage-0.0.1/docs/AGENT_SKILLS.md +160 -0
  20. mmar_mage-0.0.1/docs/ARCHITECTURE.md +160 -0
  21. mmar_mage-0.0.1/docs/CARE_INTEGRATION.md +265 -0
  22. mmar_mage-0.0.1/docs/IMPROVEMENT_PLAN.md +805 -0
  23. mmar_mage-0.0.1/docs/RELEASE_NOTES.md +172 -0
  24. mmar_mage-0.0.1/docs/RESEARCH_CONTEXT.md +869 -0
  25. mmar_mage-0.0.1/evaluation/README.md +234 -0
  26. mmar_mage-0.0.1/evaluation/__init__.py +59 -0
  27. mmar_mage-0.0.1/evaluation/baseline_generator.py +308 -0
  28. mmar_mage-0.0.1/evaluation/category_chain_generator.py +411 -0
  29. mmar_mage-0.0.1/evaluation/chain_executor.py +516 -0
  30. mmar_mage-0.0.1/evaluation/chain_generator.py +205 -0
  31. mmar_mage-0.0.1/evaluation/config.py +149 -0
  32. mmar_mage-0.0.1/evaluation/eval_config.toml +52 -0
  33. mmar_mage-0.0.1/evaluation/runner.py +406 -0
  34. mmar_mage-0.0.1/evaluation/tasks.py +300 -0
  35. mmar_mage-0.0.1/examples/README.md +69 -0
  36. mmar_mage-0.0.1/examples/agent_features/config.toml +20 -0
  37. mmar_mage-0.0.1/examples/agent_features/run.py +94 -0
  38. mmar_mage-0.0.1/examples/deep_mode_full/config.toml +14 -0
  39. mmar_mage-0.0.1/examples/deep_mode_full/run.py +57 -0
  40. mmar_mage-0.0.1/examples/deep_mode_local/config.toml +7 -0
  41. mmar_mage-0.0.1/examples/deep_mode_local/run.py +47 -0
  42. mmar_mage-0.0.1/examples/deep_mode_memory_research/config.toml +11 -0
  43. mmar_mage-0.0.1/examples/deep_mode_memory_research/run.py +51 -0
  44. mmar_mage-0.0.1/examples/deep_mode_web_research/config.toml +10 -0
  45. mmar_mage-0.0.1/examples/deep_mode_web_research/run.py +53 -0
  46. mmar_mage-0.0.1/examples/fast_mode/config.toml +7 -0
  47. mmar_mage-0.0.1/examples/fast_mode/run.py +47 -0
  48. mmar_mage-0.0.1/mmar_mage/__init__.py +249 -0
  49. mmar_mage-0.0.1/mmar_mage/agents/__init__.py +57 -0
  50. mmar_mage-0.0.1/mmar_mage/agents/capability_lookup_agent.py +525 -0
  51. mmar_mage-0.0.1/mmar_mage/agents/chain_editor.py +106 -0
  52. mmar_mage-0.0.1/mmar_mage/agents/chain_verifier.py +259 -0
  53. mmar_mage-0.0.1/mmar_mage/agents/dag_builder.py +71 -0
  54. mmar_mage-0.0.1/mmar_mage/agents/domain_analyzer.py +66 -0
  55. mmar_mage-0.0.1/mmar_mage/agents/feedback_recall_agent.py +244 -0
  56. mmar_mage-0.0.1/mmar_mage/agents/mcts_constructor.py +574 -0
  57. mmar_mage-0.0.1/mmar_mage/agents/memory_research_agent.py +290 -0
  58. mmar_mage-0.0.1/mmar_mage/agents/needs_analysis.py +54 -0
  59. mmar_mage-0.0.1/mmar_mage/agents/self_refiner.py +256 -0
  60. mmar_mage-0.0.1/mmar_mage/agents/skill_synthesizer.py +137 -0
  61. mmar_mage-0.0.1/mmar_mage/agents/step_critic.py +162 -0
  62. mmar_mage-0.0.1/mmar_mage/agents/step_describer.py +260 -0
  63. mmar_mage-0.0.1/mmar_mage/agents/step_planner.py +258 -0
  64. mmar_mage-0.0.1/mmar_mage/agents/template_assembler_agent.py +122 -0
  65. mmar_mage-0.0.1/mmar_mage/agents/tool_implementation_generator.py +148 -0
  66. mmar_mage-0.0.1/mmar_mage/agents/tool_spec_generator.py +174 -0
  67. mmar_mage-0.0.1/mmar_mage/agents/topology_selector.py +105 -0
  68. mmar_mage-0.0.1/mmar_mage/agents/tot_planner.py +276 -0
  69. mmar_mage-0.0.1/mmar_mage/agents/web_research_agent.py +323 -0
  70. mmar_mage-0.0.1/mmar_mage/benchmark/__init__.py +30 -0
  71. mmar_mage-0.0.1/mmar_mage/benchmark/reporter.py +47 -0
  72. mmar_mage-0.0.1/mmar_mage/benchmark/runner.py +89 -0
  73. mmar_mage-0.0.1/mmar_mage/benchmark/schemas.py +116 -0
  74. mmar_mage-0.0.1/mmar_mage/benchmark/synthesizer.py +117 -0
  75. mmar_mage-0.0.1/mmar_mage/bootstrap.py +161 -0
  76. mmar_mage-0.0.1/mmar_mage/carl_export.py +859 -0
  77. mmar_mage-0.0.1/mmar_mage/chain_edit.py +124 -0
  78. mmar_mage-0.0.1/mmar_mage/chain_repair.py +88 -0
  79. mmar_mage-0.0.1/mmar_mage/cli.py +538 -0
  80. mmar_mage-0.0.1/mmar_mage/code_generator.py +271 -0
  81. mmar_mage-0.0.1/mmar_mage/cost.py +498 -0
  82. mmar_mage-0.0.1/mmar_mage/evolve/__init__.py +39 -0
  83. mmar_mage-0.0.1/mmar_mage/evolve/benchmark_synthesizer.py +306 -0
  84. mmar_mage-0.0.1/mmar_mage/evolve/engine.py +452 -0
  85. mmar_mage-0.0.1/mmar_mage/evolve/grader.py +284 -0
  86. mmar_mage-0.0.1/mmar_mage/evolve/operators.py +409 -0
  87. mmar_mage-0.0.1/mmar_mage/evolve/schemas.py +131 -0
  88. mmar_mage-0.0.1/mmar_mage/exceptions.py +80 -0
  89. mmar_mage-0.0.1/mmar_mage/generator.py +2557 -0
  90. mmar_mage-0.0.1/mmar_mage/llm.py +502 -0
  91. mmar_mage-0.0.1/mmar_mage/memory.py +1292 -0
  92. mmar_mage-0.0.1/mmar_mage/profiles.py +203 -0
  93. mmar_mage-0.0.1/mmar_mage/prompt_loader.py +92 -0
  94. mmar_mage-0.0.1/mmar_mage/prompts.py +669 -0
  95. mmar_mage-0.0.1/mmar_mage/prompts_data/prompts.yaml +1362 -0
  96. mmar_mage-0.0.1/mmar_mage/schemas.py +1738 -0
  97. mmar_mage-0.0.1/mmar_mage/security/__init__.py +29 -0
  98. mmar_mage-0.0.1/mmar_mage/security/sandbox.py +367 -0
  99. mmar_mage-0.0.1/mmar_mage/skills/__init__.py +46 -0
  100. mmar_mage-0.0.1/mmar_mage/skills/discovery.py +443 -0
  101. mmar_mage-0.0.1/mmar_mage/skills/manifest.py +149 -0
  102. mmar_mage-0.0.1/mmar_mage/skills/registry.py +267 -0
  103. mmar_mage-0.0.1/mmar_mage/templates/__init__.py +12 -0
  104. mmar_mage-0.0.1/mmar_mage/templates/skeletons.py +101 -0
  105. mmar_mage-0.0.1/mmar_mage/topology/__init__.py +32 -0
  106. mmar_mage-0.0.1/mmar_mage/topology/library.py +180 -0
  107. mmar_mage-0.0.1/mmar_mage/topology/schemas.py +55 -0
  108. mmar_mage-0.0.1/pyproject.toml +85 -0
  109. mmar_mage-0.0.1/tests/__init__.py +0 -0
  110. mmar_mage-0.0.1/tests/conftest.py +271 -0
  111. mmar_mage-0.0.1/tests/test_allowed_step_types.py +101 -0
  112. mmar_mage-0.0.1/tests/test_anti_patterns.py +51 -0
  113. mmar_mage-0.0.1/tests/test_benchmark_profiles.py +109 -0
  114. mmar_mage-0.0.1/tests/test_benchmark_runner.py +99 -0
  115. mmar_mage-0.0.1/tests/test_benchmark_schemas.py +45 -0
  116. mmar_mage-0.0.1/tests/test_benchmark_stage7.py +72 -0
  117. mmar_mage-0.0.1/tests/test_benchmark_synthesizer.py +66 -0
  118. mmar_mage-0.0.1/tests/test_cancel_retry.py +353 -0
  119. mmar_mage-0.0.1/tests/test_capability_lookup.py +97 -0
  120. mmar_mage-0.0.1/tests/test_capability_p1.py +314 -0
  121. mmar_mage-0.0.1/tests/test_capability_unified.py +609 -0
  122. mmar_mage-0.0.1/tests/test_care_metadata.py +363 -0
  123. mmar_mage-0.0.1/tests/test_care_presets.py +209 -0
  124. mmar_mage-0.0.1/tests/test_care_replay_bundle.py +331 -0
  125. mmar_mage-0.0.1/tests/test_carl_export.py +575 -0
  126. mmar_mage-0.0.1/tests/test_carl_export_field_maps.py +208 -0
  127. mmar_mage-0.0.1/tests/test_carl_load_policy.py +109 -0
  128. mmar_mage-0.0.1/tests/test_carl_loadable_smoke.py +116 -0
  129. mmar_mage-0.0.1/tests/test_carl_round_trip.py +149 -0
  130. mmar_mage-0.0.1/tests/test_category_chain_generator.py +401 -0
  131. mmar_mage-0.0.1/tests/test_chain_edit.py +359 -0
  132. mmar_mage-0.0.1/tests/test_chain_executor.py +468 -0
  133. mmar_mage-0.0.1/tests/test_chain_verifier_deps.py +72 -0
  134. mmar_mage-0.0.1/tests/test_checkpoint_promotion.py +59 -0
  135. mmar_mage-0.0.1/tests/test_cli.py +93 -0
  136. mmar_mage-0.0.1/tests/test_cli_validate.py +353 -0
  137. mmar_mage-0.0.1/tests/test_code_generator.py +264 -0
  138. mmar_mage-0.0.1/tests/test_config.py +219 -0
  139. mmar_mage-0.0.1/tests/test_cost.py +246 -0
  140. mmar_mage-0.0.1/tests/test_deep_mode.py +232 -0
  141. mmar_mage-0.0.1/tests/test_demo_bootstrap.py +224 -0
  142. mmar_mage-0.0.1/tests/test_describer_examples.py +85 -0
  143. mmar_mage-0.0.1/tests/test_digest_capping.py +124 -0
  144. mmar_mage-0.0.1/tests/test_ecosystem_writes.py +427 -0
  145. mmar_mage-0.0.1/tests/test_ecosystem_writes_p2.py +613 -0
  146. mmar_mage-0.0.1/tests/test_evaluation.py +1176 -0
  147. mmar_mage-0.0.1/tests/test_evolve.py +376 -0
  148. mmar_mage-0.0.1/tests/test_evolve_operators.py +188 -0
  149. mmar_mage-0.0.1/tests/test_execution_feedback.py +119 -0
  150. mmar_mage-0.0.1/tests/test_fallback_templates.py +129 -0
  151. mmar_mage-0.0.1/tests/test_fast_mode.py +109 -0
  152. mmar_mage-0.0.1/tests/test_feedback.py +151 -0
  153. mmar_mage-0.0.1/tests/test_feedback_applicability_gate.py +95 -0
  154. mmar_mage-0.0.1/tests/test_flag_orthogonality.py +133 -0
  155. mmar_mage-0.0.1/tests/test_generator.py +162 -0
  156. mmar_mage-0.0.1/tests/test_heterogeneous_steps.py +199 -0
  157. mmar_mage-0.0.1/tests/test_integration_p2.py +438 -0
  158. mmar_mage-0.0.1/tests/test_intermediate_artifact_save.py +293 -0
  159. mmar_mage-0.0.1/tests/test_iterative_step_describing.py +76 -0
  160. mmar_mage-0.0.1/tests/test_llm_only_mode.py +272 -0
  161. mmar_mage-0.0.1/tests/test_memory.py +115 -0
  162. mmar_mage-0.0.1/tests/test_memory_applicability_gate.py +197 -0
  163. mmar_mage-0.0.1/tests/test_memory_benchmarks.py +191 -0
  164. mmar_mage-0.0.1/tests/test_memory_research.py +280 -0
  165. mmar_mage-0.0.1/tests/test_memory_search_mode.py +138 -0
  166. mmar_mage-0.0.1/tests/test_memory_typed_entities.py +260 -0
  167. mmar_mage-0.0.1/tests/test_multi_provider.py +334 -0
  168. mmar_mage-0.0.1/tests/test_orchestration_primitives.py +143 -0
  169. mmar_mage-0.0.1/tests/test_parallel_topology_sampling.py +104 -0
  170. mmar_mage-0.0.1/tests/test_pareto_selection.py +110 -0
  171. mmar_mage-0.0.1/tests/test_per_stage_entrypoints.py +471 -0
  172. mmar_mage-0.0.1/tests/test_plan_scoring.py +102 -0
  173. mmar_mage-0.0.1/tests/test_preflight_cost.py +305 -0
  174. mmar_mage-0.0.1/tests/test_profile_demos.py +116 -0
  175. mmar_mage-0.0.1/tests/test_prompt_loader.py +292 -0
  176. mmar_mage-0.0.1/tests/test_quality_enhancements.py +1194 -0
  177. mmar_mage-0.0.1/tests/test_reflective_mutation.py +213 -0
  178. mmar_mage-0.0.1/tests/test_replan_policy.py +131 -0
  179. mmar_mage-0.0.1/tests/test_replay_from.py +328 -0
  180. mmar_mage-0.0.1/tests/test_sandbox.py +129 -0
  181. mmar_mage-0.0.1/tests/test_save_chain_roundtrip.py +215 -0
  182. mmar_mage-0.0.1/tests/test_schemas.py +321 -0
  183. mmar_mage-0.0.1/tests/test_simplicity_bias.py +113 -0
  184. mmar_mage-0.0.1/tests/test_skill_execution_modes.py +128 -0
  185. mmar_mage-0.0.1/tests/test_skill_synthesis.py +122 -0
  186. mmar_mage-0.0.1/tests/test_skills.py +403 -0
  187. mmar_mage-0.0.1/tests/test_step_config_required.py +120 -0
  188. mmar_mage-0.0.1/tests/test_step_metrics.py +133 -0
  189. mmar_mage-0.0.1/tests/test_step_type_round_trips.py +212 -0
  190. mmar_mage-0.0.1/tests/test_streaming_progress.py +437 -0
  191. mmar_mage-0.0.1/tests/test_suggested_naming.py +366 -0
  192. mmar_mage-0.0.1/tests/test_suggested_naming_golden.py +114 -0
  193. mmar_mage-0.0.1/tests/test_template_assembler.py +126 -0
  194. mmar_mage-0.0.1/tests/test_template_skeletons.py +47 -0
  195. mmar_mage-0.0.1/tests/test_tool_implementation.py +95 -0
  196. mmar_mage-0.0.1/tests/test_tool_step_generation.py +104 -0
  197. mmar_mage-0.0.1/tests/test_tool_synthesis.py +156 -0
  198. mmar_mage-0.0.1/tests/test_topology.py +292 -0
  199. mmar_mage-0.0.1/tests/test_web_research.py +289 -0
  200. mmar_mage-0.0.1/tests/test_web_source_grounding.py +123 -0
  201. mmar_mage-0.0.1/uv.lock +1340 -0
@@ -0,0 +1,36 @@
1
+ # LLM provider
2
+ MAGE_API_KEY=sk-...
3
+ OPENAI_API_KEY=sk-... # fallback
4
+ MAGE_BASE_URL= # optional: OpenRouter / vLLM / Ollama endpoint
5
+ MAGE_MODEL=gpt-4o
6
+
7
+ # Provider selection: openai | openrouter | local | custom
8
+ MAGE_PROVIDER=openai
9
+
10
+ # OpenRouter
11
+ MAGE_OPENROUTER_SITE_URL=https://github.com/Glazkoff/carl-mage
12
+ MAGE_OPENROUTER_APP_NAME=carl-mage
13
+
14
+ # Local / self-hosted OpenAI-compatible (e.g. vLLM, AIRI, Ollama)
15
+ # MAGE_BASE_URL=https://inference.airi.net:46783/v1
16
+ # MAGE_API_KEY=<your-local-server-api-key>
17
+ # MAGE_SSL_VERIFY=false # set false if server uses self-signed cert
18
+ # MAGE_HTTP_TIMEOUT=120.0
19
+
20
+ # Model — use "__auto__" to detect first available model from the server
21
+ # MAGE_MODEL=__auto__
22
+
23
+ # Memory (gigaevo-memory)
24
+ MAGE_ENABLE_MEMORY=true
25
+ MAGE_MEMORY_BASE_URL=http://localhost:8002
26
+
27
+ # Deep Research
28
+ MAGE_ENABLE_MEMORY_RESEARCH=true
29
+ MAGE_ENABLE_WEB_RESEARCH=false
30
+ MAGE_WEB_SEARCH_API_KEY= # Tavily / SerpAPI / Brave API key
31
+ MAGE_WEB_SEARCH_PROVIDER=tavily # tavily | serpapi | brave
32
+
33
+ # Generation quality
34
+ MAGE_MEMORY_RELEVANCE_THRESHOLD=0.4
35
+ MAGE_MEMORY_RECALL_TOP_K=5
36
+ MAGE_COLD_START_CANDIDATES=3
@@ -0,0 +1,43 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+ workflow_dispatch:
9
+
10
+ permissions:
11
+ contents: read
12
+
13
+ jobs:
14
+ lint-and-test:
15
+ name: Lint + tests (py${{ matrix.python }})
16
+ runs-on: ubuntu-latest
17
+ strategy:
18
+ fail-fast: false
19
+ matrix:
20
+ python: ["3.12", "3.13"]
21
+ steps:
22
+ - name: Checkout repository
23
+ uses: actions/checkout@v5
24
+
25
+ - name: Install uv
26
+ uses: astral-sh/setup-uv@v6
27
+ with:
28
+ enable-cache: true
29
+
30
+ - name: Set up Python ${{ matrix.python }}
31
+ run: uv python install ${{ matrix.python }}
32
+
33
+ - name: Sync dev dependencies
34
+ run: uv sync --group dev
35
+
36
+ - name: Run ruff
37
+ run: uv run ruff check mmar_mage/ tests/ examples/
38
+
39
+ - name: Run mypy
40
+ run: uv run mypy mmar_mage/
41
+
42
+ - name: Run pytest
43
+ run: uv run pytest tests/ -q --maxfail=1
@@ -0,0 +1,76 @@
1
+ name: Release to PyPI
2
+
3
+ # Publish `mmar-mage` to PyPI whenever the version in pyproject.toml is not yet
4
+ # on PyPI. Runs on every push to main and gates on the live PyPI index, so a
5
+ # bumped-but-never-published version (e.g. a prior failed run) still ships.
6
+ on:
7
+ push:
8
+ branches: [main]
9
+
10
+ jobs:
11
+ check-version:
12
+ runs-on: ubuntu-latest
13
+ outputs:
14
+ should_publish: ${{ steps.check.outputs.should_publish }}
15
+ version: ${{ steps.check.outputs.version }}
16
+ steps:
17
+ - uses: actions/checkout@v5
18
+
19
+ - name: Check whether version is already on PyPI
20
+ id: check
21
+ run: |
22
+ version=$(grep -m1 '^version = ' pyproject.toml | sed -E 's/version = "(.*)"/\1/')
23
+ echo "pyproject version: $version"
24
+ echo "version=$version" >> "$GITHUB_OUTPUT"
25
+
26
+ status=$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/mmar-mage/$version/json")
27
+ echo "PyPI lookup HTTP status: $status"
28
+ case "$status" in
29
+ 200)
30
+ echo "Version $version already on PyPI — nothing to publish."
31
+ echo "should_publish=false" >> "$GITHUB_OUTPUT"
32
+ ;;
33
+ 404)
34
+ echo "Version $version not on PyPI — will publish."
35
+ echo "should_publish=true" >> "$GITHUB_OUTPUT"
36
+ ;;
37
+ *)
38
+ echo "Unexpected status $status from PyPI; refusing to guess." >&2
39
+ exit 1
40
+ ;;
41
+ esac
42
+
43
+ release:
44
+ needs: check-version
45
+ if: needs.check-version.outputs.should_publish == 'true'
46
+ runs-on: ubuntu-latest
47
+ permissions:
48
+ contents: write # push the v<version> tag
49
+ steps:
50
+ - uses: actions/checkout@v5
51
+
52
+ - name: Set up Python
53
+ uses: actions/setup-python@v6
54
+ with:
55
+ python-version: "3.12"
56
+
57
+ - name: Install build tooling
58
+ run: python -m pip install --upgrade build
59
+
60
+ - name: Build sdist + wheel
61
+ run: python -m build
62
+
63
+ - name: Publish to PyPI
64
+ uses: pypa/gh-action-pypi-publish@release/v1
65
+ with:
66
+ password: ${{ secrets.PYPI_API_TOKEN }}
67
+
68
+ - name: Tag the release
69
+ run: |
70
+ version="${{ needs.check-version.outputs.version }}"
71
+ if git ls-remote --exit-code --tags origin "refs/tags/v$version" >/dev/null 2>&1; then
72
+ echo "Tag v$version already exists — skipping."
73
+ else
74
+ git tag "v$version"
75
+ git push origin "v$version"
76
+ fi
@@ -0,0 +1,46 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ dist/
8
+ build/
9
+ *.egg
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+ *.swo
21
+
22
+ # Claude Code (local config, runtime locks, worktrees)
23
+ .claude/
24
+
25
+ # Testing
26
+ .pytest_cache/
27
+ .coverage
28
+ htmlcov/
29
+
30
+ # Ruff
31
+ .ruff_cache/
32
+
33
+ # mypy
34
+ .mypy_cache/
35
+
36
+ # OS
37
+ .DS_Store
38
+ Thumbs.db
39
+
40
+ # Examples output
41
+ examples/**/result.json
42
+
43
+ # Environment
44
+ .env
45
+ .env.local
46
+ experiments/baselines/_staged_data/
@@ -0,0 +1,389 @@
1
+ # CLAUDE.md — Project Context for Claude Code
2
+
3
+ ## Project Overview
4
+
5
+ **MAGE** (Mesh Architecture Generation Engine) converts natural-language queries into structured
6
+ [CARL](https://github.com/Glazkoff/carl) reasoning chains (JSON). Part of the MMAR ecosystem
7
+ alongside `mmar-carl` (chain execution) and `gigaevo-memory` (memory service).
8
+
9
+ - **Package**: `mmar-mage` v0.1.0
10
+ - **Python**: ≥ 3.12
11
+ - **License**: MIT
12
+ - **Author**: glazkov (glazkov@airi.net)
13
+
14
+ ## Quick Commands
15
+
16
+ ```bash
17
+ # Setup
18
+ uv sync --group dev # install all deps (creates .venv automatically)
19
+
20
+ # Quality
21
+ uv run pytest tests/ -v # ~320 tests
22
+ uv run ruff check mmar_mage/ tests/ examples/ # lint
23
+ uv run ruff format mmar_mage/ tests/ # auto-format
24
+ uv run mypy mmar_mage/ # type-check
25
+
26
+ # Or via Makefile
27
+ make install # uv sync --group dev
28
+ make test # pytest
29
+ make lint # ruff check
30
+ make all # lint + typecheck + tests
31
+ make examples # run all example scripts
32
+ ```
33
+
34
+ ## Architecture
35
+
36
+ ```
37
+ User query (str)
38
+
39
+
40
+ ┌────────────────────────────────────────────────┐
41
+ │ MAGEGenerator │
42
+ │ │
43
+ │ mode=fast ──► single LLM call (with retries) │
44
+ │ mode=deep ──► agentic pipeline: │
45
+ │ 0a. MemoryResearchAgent → digest (opt) │
46
+ │ 0b. WebResearchAgent → digest (opt) │
47
+ │ 0c. CapabilityLookupAgent → tools (opt) │
48
+ │ 0d. TemplateAssemblerAgent → skel (opt) │
49
+ │ 0e. FeedbackRecallAgent → digest (opt) │
50
+ │ 0f. SkillDiscoveryAgent → URIs (opt) │
51
+ │ 1. DomainAnalyzer → DomainAnalysis │
52
+ │ 2. StepPlanner → StepPlan │
53
+ │ 3. DAGBuilder | ToTPlanner → DAGStructure │
54
+ │ 4. StepDescriber | MCTSConstructor │
55
+ │ 4.5 StepCriticAgent → revise (opt) │
56
+ │ 5. Serialization → CARLChainSchema │
57
+ │ + default ReplanPolicy attach (opt) │
58
+ │ 5.5 ChainVerifier → fix (opt) │
59
+ │ 5.6 SelfRefiner → refine (opt) │
60
+ │ │
61
+ │ + validate_carl_json() — MAGE-internal types │
62
+ │ + to_carl_compat() — eval/aggregator → llm, │
63
+ │ flat fields → nested step_config │
64
+ │ + validate_carl_json(strict_carl=True) │
65
+ │ + ReasoningChain.from_json() (if installed) │
66
+ │ + MemoryManager.save_chain() (optional) │
67
+ └────────────────────────────────────────────────┘
68
+
69
+ agent-features integration (carl-experiments @ agent-features):
70
+ • Step types: llm/tool/mcp/memory/transform/conditional/structured_output
71
+ + agent_skill, evaluation, agent_handoff, parallel_sampling, supervisor,
72
+ debate, human_input, tool_discovery, mcp_resource
73
+ • Skill discovery via SkillDiscoveryAgent + built-in SkillRegistry (PDF,
74
+ DOCX, PPTX, XLSX, mcp-builder, ...) + optional web fallback
75
+ • Cost estimation via mmar_mage.cost.estimate_chain_cost (dry-run)
76
+ • Evolve loop has budget guard (max_cost_usd / max_tokens_per_chain)
77
+ • RE-PLAN: per-step checkpoint hints + chain-level default policy synth
78
+
79
+
80
+ MAGEResult { chain_json, chain_dict, memory_key, mode, metadata }
81
+ ```
82
+
83
+ ### Edit mode (NL-driven chain editing)
84
+
85
+ Besides *generating*, MAGE can **edit an existing chain**:
86
+ `MAGEGenerator.edit(instruction, *, entity_id=None, chain=None, save=False)` →
87
+ `MAGEEditResult`. It resolves the target (`entity_id` → `MemoryManager.load_chain`;
88
+ else `search_similar`, disambiguating on ties → `needs_disambiguation` +
89
+ `candidates`), asks `ChainEditPlanner` (`CHAIN_EDIT_PROMPT`) for a **minimal** edit
90
+ plan, applies it via `mmar_mage.chain_edit.apply_edit_plan` — which reuses
91
+ `evolve.operators._apply_edit` for the 5 structural ops and adds 3 lightweight ones
92
+ (`edit_field` / `set_dependencies` / `set_chain_field`) — re-validates with
93
+ `validate_or_repair`, and (with `save=True`) versions it via
94
+ `MemoryManager.save_chain(entity_id=…)`. CLI: `mage edit`. Convenience:
95
+ `edit_chain(...)`. Note: `chain_edit` lazy-imports `evolve.operators` inside
96
+ `apply_edit` to avoid the `evolve → engine → generator → chain_edit` import cycle
97
+ (same trick as `chain_repair`'s lazy `from .generator import validate_carl_json`).
98
+
99
+ ### Key Design Principles
100
+
101
+ 1. **Every deep-mode stage is independently toggleable** via `MAGEConfig` booleans
102
+ (`enable_domain_analysis`, `enable_step_planning`, `enable_dag_optimization`,
103
+ `enable_step_descriptions`, `enable_memory_research`, `enable_web_research`).
104
+ Disabled stages use deterministic fallbacks.
105
+
106
+ 2. **Memory research and web research are COMPLETELY SEPARATE features**.
107
+ They are independent stages (0a and 0b), can be enabled/disabled individually,
108
+ and feed their digests into StepPlanner as optional context.
109
+
110
+ 3. **Structured output everywhere** — all LLM calls use `response_format=json_object`,
111
+ validated against Pydantic models before propagation.
112
+
113
+ 4. **Graceful degradation** — memory and web research failures are non-fatal (`try/except`
114
+ with logging, generation continues without the research results).
115
+
116
+ 5. **Cold-start multi-sampling** — when `memory_research` is enabled but finds no hits
117
+ (`was_cold_start=True`), generates `cold_start_candidates` plan variants and uses
118
+ `PLAN_SCORER_PROMPT` to pick the best.
119
+
120
+ ## File Structure
121
+
122
+ ```
123
+ mmar_mage/
124
+ __init__.py # public API: MAGEGenerator, MAGEConfig, MAGEResult, etc.
125
+ generator.py # MAGEGenerator orchestrator (fast + deep modes)
126
+ schemas.py # Pydantic models (MAGEConfig, MAGEResult, DomainAnalysis, etc.)
127
+ llm.py # LLMClient — async wrapper around AsyncOpenAI
128
+ prompts.py # ALL LLM prompt templates (system messages)
129
+ memory.py # MemoryManager — gigaevo-memory wrapper (incl. save/recall_benchmark)
130
+ cli.py # Typer CLI: `mage generate`, `mage evolve`, `mage version`
131
+ exceptions.py # MAGEError, MAGEValidationError, MAGELLMError, MAGEMemoryError
132
+ carl_export.py # MAGE-format → CARL-loadable JSON (eval/aggregator → llm, nested step_config)
133
+ cost.py # estimate_chain_cost / fits_in_budget (dry-run cost)
134
+ agents/ # see Agents section
135
+ skills/
136
+ __init__.py
137
+ registry.py # SkillRegistry + built-in catalog (PDF, DOCX, PPTX, XLSX, ...)
138
+ discovery.py # SkillDiscoveryAgent (gate → rerank → web fallback)
139
+ evolve/
140
+ __init__.py
141
+ schemas.py # EvolveConfig, EvolveResult, Individual, ChainScore
142
+ benchmark_synthesizer.py # Stage 1: synthesize benchmark per query
143
+ grader.py # Stage 3: structural rubric + LLM-judge (+ real-execution)
144
+ operators.py # Mutation/crossover/LLM-repair operators on chain_dict
145
+ engine.py # ChainEvolver — main evolutionary loop
146
+
147
+ configs/ # Preset TOML configurations
148
+ fast.toml # Fast mode, no research
149
+ deep_local.toml # Deep mode, no research, no memory
150
+ deep_research.toml # Deep mode + memory research + web research
151
+ deep_openrouter.toml # Deep mode via OpenRouter
152
+ deep_local_airi.toml # Deep mode via AIRI inference server
153
+
154
+ examples/ # Runnable example scripts
155
+ README.md
156
+ fast_mode/ # run.py + config.toml → result.json
157
+ deep_mode_local/
158
+ deep_mode_web_research/
159
+ deep_mode_memory_research/
160
+ deep_mode_full/
161
+
162
+ tests/
163
+ conftest.py # Shared fixtures, mock LLM responses, sample data
164
+ test_schemas.py # MAGEConfig, MAGEResult, Pydantic model tests
165
+ test_fast_mode.py # Fast mode generation tests
166
+ test_deep_mode.py # Deep mode pipeline tests
167
+ test_generator.py # Generator validation, edge cases
168
+ test_memory.py # MemoryManager tests
169
+ test_cli.py # CLI command tests
170
+ test_config.py # Config loading (TOML, env, factories)
171
+ test_memory_research.py # MemoryResearchAgent tests
172
+ test_web_research.py # WebResearchAgent tests
173
+
174
+ docs/ARCHITECTURE.md # Detailed architecture documentation
175
+ ```
176
+
177
+ ## Code Style & Conventions
178
+
179
+ - **Formatter/Linter**: ruff, line-length=120, ignore E501
180
+ - **Ruff rules**: E, F, I, N, W, UP, B
181
+ - **Type checking**: mypy, python 3.12, ignore_missing_imports=true
182
+ - **Async**: all LLM calls are async (`AsyncOpenAI`), pytest uses `asyncio_mode = "auto"`
183
+ - **Docstrings**: NumPy-style with Parameters/Returns/Raises sections
184
+ - **CARL step_type**: MUST be lowercase (`"llm"`, not `"LLM"`) — this is enforced in
185
+ `PlannedStep`, `DAGStep`, and `CARLStepSchema` defaults
186
+ - **Imports**: use `from __future__ import annotations` everywhere
187
+ - **Dependencies**: Pydantic v2 (`model_validate`, `model_dump`), not v1 API
188
+
189
+ ## Testing
190
+
191
+ - **94 tests**, **87% coverage**, all passing
192
+ - **Mock target**: `mmar_mage.llm.AsyncOpenAI` — always mock at this path
193
+ - **Pattern**: `conftest.py` defines `mock_openai_fast` and `mock_openai_deep` fixtures
194
+ that return `AsyncMock` for `chat.completions.create` with side_effect lists
195
+ - **All LLM and HTTP calls must be mocked** — no real API calls in tests
196
+ - **Coverage requirement**: ≥ 80%
197
+
198
+ ### Test fixture structure
199
+
200
+ ```python
201
+ # conftest.py provides:
202
+ # - SAMPLE_CARL_CHAIN — valid 3-step CARL chain dict
203
+ # - SAMPLE_DOMAIN_ANALYSIS — finance domain analysis
204
+ # - SAMPLE_STEP_PLAN — 3-step plan
205
+ # - SAMPLE_DAG — funnel pattern DAG
206
+ # - SAMPLE_DESCRIBED_STEPS — full CARL step descriptions
207
+ # - config() — fast mode test config (memory disabled)
208
+ # - deep_config() — deep mode test config (memory disabled)
209
+ # - mock_openai_fast — patches AsyncOpenAI for fast mode
210
+ # - mock_openai_deep — patches AsyncOpenAI with 4-response side_effect for deep mode
211
+ ```
212
+
213
+ ### Running tests
214
+
215
+ ```bash
216
+ uv run pytest tests/ -v # all tests
217
+ uv run pytest tests/test_fast_mode.py -v # specific file
218
+ uv run pytest -k "test_deep" -v # by keyword
219
+ uv run pytest --cov=mmar_mage --cov-report=term-missing # with coverage
220
+ ```
221
+
222
+ ## Configuration System
223
+
224
+ `MAGEConfig` (Pydantic BaseModel) supports multiple loading methods:
225
+
226
+ ```python
227
+ # From TOML
228
+ config = MAGEConfig.from_toml("configs/deep_local.toml")
229
+
230
+ # From environment variables (MAGE_* prefix)
231
+ config = MAGEConfig.from_env()
232
+
233
+ # Factory: OpenRouter
234
+ config = MAGEConfig.for_openrouter(api_key="sk-or-...", model="anthropic/claude-3.5-sonnet")
235
+
236
+ # Factory: Local server (vLLM, Ollama, etc.)
237
+ config = MAGEConfig.for_local(base_url="http://localhost:8000/v1", model="__auto__")
238
+ ```
239
+
240
+ ### Key config fields
241
+
242
+ | Field | Type | Default | Description |
243
+ |-------|------|---------|-------------|
244
+ | `mode` | `"fast"` \| `"deep"` | `"deep"` | Generation mode |
245
+ | `model` | str | `"gpt-4o"` | Model ID or `"__auto__"` for detection |
246
+ | `provider` | `"openai"` \| `"openrouter"` \| `"local"` \| `"custom"` | `"openai"` | LLM provider |
247
+ | `base_url` | str \| None | None | Custom API endpoint |
248
+ | `enable_memory_research` | bool | False | Stage 0a: memory recall |
249
+ | `enable_web_research` | bool | False | Stage 0b: web search |
250
+ | `web_search_provider` | str | `"tavily"` | `tavily` \| `serpapi` \| `brave` |
251
+ | `web_search_api_key` | str \| None | None | API key for web search provider |
252
+ | `cold_start_candidates` | int | 1 | Plan candidates for cold-start scoring |
253
+ | `stage_llm_overrides` | dict | {} | Per-stage model/temperature overrides |
254
+
255
+ ## LLM Client
256
+
257
+ `LLMClient` (in `llm.py`) wraps `AsyncOpenAI` with:
258
+
259
+ - **Per-stage overrides**: `get_stage_config(stage)` merges `stage_llm_overrides`
260
+ - **Auto-detection**: `resolve_model()` calls `/v1/models` when model is `"__auto__"`
261
+ - **Retries**: configurable via `max_retries`
262
+ - **Provider support**: OpenAI, OpenRouter (extra headers), local (vLLM/Ollama), custom
263
+
264
+ Key methods:
265
+ ```python
266
+ await client.generate(system_prompt, user_prompt, json_mode=True, stage="...") → str
267
+ await client.generate_json(system_prompt, user_prompt, stage="...") → dict
268
+ await client.generate_structured(system_prompt, user_prompt, SomeModel, stage="...") → BaseModel
269
+ ```
270
+
271
+ ## Prompts (in `prompts.py`)
272
+
273
+ All prompts are constants. Each includes few-shot examples and format specifications:
274
+
275
+ | Constant | Used by | Purpose |
276
+ |----------|---------|---------|
277
+ | `FAST_MODE_SYSTEM_PROMPT` | `generator._generate_fast` | Single-shot chain generation (3 examples) |
278
+ | `DOMAIN_ANALYZER_PROMPT` | `DomainAnalyzer` | Domain/complexity analysis (3 examples) |
279
+ | `STEP_PLANNER_PROMPT` | `StepPlanner` | Step sequence planning (2 examples) |
280
+ | `DAG_BUILDER_PROMPT` | `DAGBuilder` | Dependency graph construction (1 diamond example) |
281
+ | `STEP_DESCRIBER_PROMPT` | `StepDescriber` | CARL step detail generation |
282
+ | `MEMORY_SUB_QUERY_GENERATOR_PROMPT` | `MemoryResearchAgent` | Sub-query generation for memory |
283
+ | `MEMORY_PURIFIER_PROMPT` | `MemoryResearchAgent` | Memory digest with anti-hallucination |
284
+ | `WEB_SUB_QUERY_GENERATOR_PROMPT` | `WebResearchAgent` | Sub-query generation for web search |
285
+ | `WEB_DIGEST_PROMPT` | `WebResearchAgent` | Web digest with anti-hallucination |
286
+ | `PLAN_SCORER_PROMPT` | `generator._score_plans` | Cold-start plan scoring (weighted criteria) |
287
+
288
+ ## CARL JSON Format
289
+
290
+ Generated chains must conform to:
291
+
292
+ ```json
293
+ {
294
+ "name": "...",
295
+ "max_workers": 2,
296
+ "timeout": 300.0,
297
+ "search_config": {
298
+ "strategy": "substring",
299
+ "substring_config": {
300
+ "case_sensitive": false,
301
+ "min_word_length": 3,
302
+ "max_matches_per_query": 5
303
+ }
304
+ },
305
+ "steps": [
306
+ {
307
+ "step_type": "llm",
308
+ "number": 1,
309
+ "title": "...",
310
+ "aim": "...",
311
+ "reasoning_questions": "...",
312
+ "step_context_queries": ["...", "..."],
313
+ "stage_action": "...",
314
+ "example_reasoning": "...",
315
+ "dependencies": [],
316
+ "retry_max": 3,
317
+ "llm_config": null
318
+ }
319
+ ]
320
+ }
321
+ ```
322
+
323
+ **Validation** (in `generator.py`):
324
+ 1. `validate_carl_json()` — structure, step numbers, DAG acyclicity
325
+ 2. `ReasoningChain.from_json()` — full CARL library validation (if `mmar-carl` installed)
326
+
327
+ ## Dependencies
328
+
329
+ ### Runtime
330
+ - `pydantic>=2.0.0` — schemas and validation
331
+ - `openai>=1.0.0` — AsyncOpenAI client
332
+ - `httpx>=0.27.0` — HTTP client (SSL, timeouts)
333
+ - `typer>=0.15.0` — CLI framework
334
+ - `rich>=13.0.0` — CLI output formatting
335
+
336
+ ### Optional
337
+ - `gigaevo-memory>=0.1.0` — memory service integration
338
+ - `mmar-carl>=0.1.0` — chain validation
339
+
340
+ ### Dev
341
+ - `ruff>=0.4` — linter/formatter
342
+ - `mypy>=1.10` — type checker
343
+ - `pytest>=8.2` — test runner
344
+ - `pytest-asyncio>=1.0.0` — async test support
345
+ - `pytest-cov>=5.0.0` — coverage
346
+ - `pytest-mock>=3.14.0` — mocker fixture
347
+
348
+ ## Git Workflow
349
+
350
+ - **Branches**: `main` → `dev` → `initial-version` (active)
351
+ - **PR #1**: `initial-version` → `dev` (open, not merged)
352
+ - Do NOT merge PRs without explicit request
353
+ - Commit messages: conventional commits (`feat:`, `fix:`, `build:`, `test:`, `docs:`)
354
+
355
+ ## Common Pitfalls
356
+
357
+ 1. **step_type must be lowercase** — `"llm"` not `"LLM"`. The CARL library rejects uppercase.
358
+ 2. **Mock target path** — always `mmar_mage.llm.AsyncOpenAI`, not `openai.AsyncOpenAI`.
359
+ 3. **Memory is optional** — all memory code must handle `gigaevo-memory` not being installed.
360
+ 4. **Web research needs `web_search_api_key` in config** — environment variables alone
361
+ are not auto-read by `MAGEConfig.from_toml()` for web search keys.
362
+ 5. **Config TOML section** — all fields live under `[mage]`.
363
+ 6. **Async tests** — `asyncio_mode = "auto"` in pyproject.toml, no need for `@pytest.mark.asyncio`.
364
+ 7. **`from __future__ import annotations`** — required in every module for `X | None` syntax.
365
+ 8. **Two step-type sets** —
366
+ - `VALID_STEP_TYPES`: what MAGE-internal logic emits, includes `eval` and `aggregator`.
367
+ - `CARL_VALID_STEP_TYPES`: what `mmar_carl.ReasoningChain.from_json()` actually accepts.
368
+ `eval`/`aggregator` are MAGE-only labels translated to `llm` during CARL-compat export
369
+ in `mmar_mage/carl_export.py`. Never put `eval`/`aggregator` into the *final* serialized
370
+ chain JSON — only into intermediate MAGE structures.
371
+ 9. **Non-LLM step config is nested under `step_config`** in the final CARL JSON, not flat.
372
+ `CARLStepSchema` keeps flat fields (`tool_name`, `mcp_server_name`, etc.) for ergonomics
373
+ during generation; `to_carl_compat()` re-nests them before serialization.
374
+
375
+ 10. **agent-features step types require nested config** just like the stable types.
376
+ Every type from {agent_skill, evaluation, parallel_sampling, agent_handoff,
377
+ supervisor, debate, human_input, tool_discovery, mcp_resource} has a dedicated
378
+ mapping table in `mmar_mage/carl_export.py`. To add a new type: update both
379
+ `CARL_VALID_STEP_TYPES` and the per-type field map.
380
+
381
+ 11. **Skill discovery is OFF by default.** Set `enable_skill_discovery=True` in
382
+ `MAGEConfig` to surface packaged AgentSkills via the built-in registry.
383
+ `enable_skill_web_discovery=True` adds the web-search fallback — needs
384
+ `web_search_api_key` set.
385
+
386
+ 12. **RE-PLAN auto-policy needs at least one `checkpoint=True` step** in the
387
+ generated chain. If the StepDescriber doesn't mark any (e.g. the chain is
388
+ short / trivial), `_build_default_replan_policy` returns `None` and no
389
+ policy is attached — that's intentional.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 AIRI Institute
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,56 @@
1
+ .PHONY: install sync test test-cov lint format typecheck clean all example-fast example-deep example-web example-memory example-full examples
2
+
3
+ # --- Setup ---
4
+
5
+ install: ## Install project + dev deps via uv
6
+ uv sync --group dev
7
+
8
+ sync: ## Sync all deps (including optional) via uv
9
+ uv sync --group dev --all-extras
10
+
11
+ # --- Quality ---
12
+
13
+ test: ## Run tests
14
+ uv run pytest tests/ -v --tb=short
15
+
16
+ test-cov: ## Run tests with coverage report
17
+ uv run pytest tests/ -v --cov=mmar_mage --cov-report=term-missing --tb=short
18
+
19
+ lint: ## Run ruff linter
20
+ uv run ruff check mmar_mage/ tests/
21
+
22
+ format: ## Auto-format code with ruff
23
+ uv run ruff format mmar_mage/ tests/
24
+
25
+ typecheck: ## Run mypy type checker
26
+ uv run mypy mmar_mage/
27
+
28
+ # --- Utilities ---
29
+
30
+ clean: ## Remove build artifacts and caches
31
+ rm -rf __pycache__ .pytest_cache .ruff_cache .mypy_cache dist build *.egg-info .venv
32
+ find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
33
+
34
+ all: lint typecheck test ## Run lint + typecheck + tests
35
+
36
+ # --- Examples ---
37
+
38
+ example-fast: ## Run fast mode example
39
+ uv run python examples/fast_mode/run.py
40
+
41
+ example-deep: ## Run deep mode (local) example
42
+ uv run python examples/deep_mode_local/run.py
43
+
44
+ example-web: ## Run deep mode + web research example
45
+ uv run python examples/deep_mode_web_research/run.py
46
+
47
+ example-memory: ## Run deep mode + memory research example
48
+ uv run python examples/deep_mode_memory_research/run.py
49
+
50
+ example-full: ## Run full deep mode example
51
+ uv run python examples/deep_mode_full/run.py
52
+
53
+ examples: example-fast example-deep example-web example-memory example-full ## Run all examples
54
+
55
+ help: ## Show this help
56
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}'