mmar-mage 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mmar_mage-0.0.1/.env.example +36 -0
- mmar_mage-0.0.1/.github/workflows/ci.yml +43 -0
- mmar_mage-0.0.1/.github/workflows/release.yml +76 -0
- mmar_mage-0.0.1/.gitignore +46 -0
- mmar_mage-0.0.1/CLAUDE.md +389 -0
- mmar_mage-0.0.1/LICENSE +21 -0
- mmar_mage-0.0.1/Makefile +56 -0
- mmar_mage-0.0.1/PKG-INFO +546 -0
- mmar_mage-0.0.1/README.md +509 -0
- mmar_mage-0.0.1/TODO.md +659 -0
- mmar_mage-0.0.1/configs/care_default.toml +41 -0
- mmar_mage-0.0.1/configs/care_local.toml +40 -0
- mmar_mage-0.0.1/configs/care_research.toml +62 -0
- mmar_mage-0.0.1/configs/deep_local.toml +10 -0
- mmar_mage-0.0.1/configs/deep_local_airi.toml +11 -0
- mmar_mage-0.0.1/configs/deep_openrouter.toml +12 -0
- mmar_mage-0.0.1/configs/deep_research.toml +12 -0
- mmar_mage-0.0.1/configs/fast.toml +7 -0
- mmar_mage-0.0.1/docs/AGENT_SKILLS.md +160 -0
- mmar_mage-0.0.1/docs/ARCHITECTURE.md +160 -0
- mmar_mage-0.0.1/docs/CARE_INTEGRATION.md +265 -0
- mmar_mage-0.0.1/docs/IMPROVEMENT_PLAN.md +805 -0
- mmar_mage-0.0.1/docs/RELEASE_NOTES.md +172 -0
- mmar_mage-0.0.1/docs/RESEARCH_CONTEXT.md +869 -0
- mmar_mage-0.0.1/evaluation/README.md +234 -0
- mmar_mage-0.0.1/evaluation/__init__.py +59 -0
- mmar_mage-0.0.1/evaluation/baseline_generator.py +308 -0
- mmar_mage-0.0.1/evaluation/category_chain_generator.py +411 -0
- mmar_mage-0.0.1/evaluation/chain_executor.py +516 -0
- mmar_mage-0.0.1/evaluation/chain_generator.py +205 -0
- mmar_mage-0.0.1/evaluation/config.py +149 -0
- mmar_mage-0.0.1/evaluation/eval_config.toml +52 -0
- mmar_mage-0.0.1/evaluation/runner.py +406 -0
- mmar_mage-0.0.1/evaluation/tasks.py +300 -0
- mmar_mage-0.0.1/examples/README.md +69 -0
- mmar_mage-0.0.1/examples/agent_features/config.toml +20 -0
- mmar_mage-0.0.1/examples/agent_features/run.py +94 -0
- mmar_mage-0.0.1/examples/deep_mode_full/config.toml +14 -0
- mmar_mage-0.0.1/examples/deep_mode_full/run.py +57 -0
- mmar_mage-0.0.1/examples/deep_mode_local/config.toml +7 -0
- mmar_mage-0.0.1/examples/deep_mode_local/run.py +47 -0
- mmar_mage-0.0.1/examples/deep_mode_memory_research/config.toml +11 -0
- mmar_mage-0.0.1/examples/deep_mode_memory_research/run.py +51 -0
- mmar_mage-0.0.1/examples/deep_mode_web_research/config.toml +10 -0
- mmar_mage-0.0.1/examples/deep_mode_web_research/run.py +53 -0
- mmar_mage-0.0.1/examples/fast_mode/config.toml +7 -0
- mmar_mage-0.0.1/examples/fast_mode/run.py +47 -0
- mmar_mage-0.0.1/mmar_mage/__init__.py +249 -0
- mmar_mage-0.0.1/mmar_mage/agents/__init__.py +57 -0
- mmar_mage-0.0.1/mmar_mage/agents/capability_lookup_agent.py +525 -0
- mmar_mage-0.0.1/mmar_mage/agents/chain_editor.py +106 -0
- mmar_mage-0.0.1/mmar_mage/agents/chain_verifier.py +259 -0
- mmar_mage-0.0.1/mmar_mage/agents/dag_builder.py +71 -0
- mmar_mage-0.0.1/mmar_mage/agents/domain_analyzer.py +66 -0
- mmar_mage-0.0.1/mmar_mage/agents/feedback_recall_agent.py +244 -0
- mmar_mage-0.0.1/mmar_mage/agents/mcts_constructor.py +574 -0
- mmar_mage-0.0.1/mmar_mage/agents/memory_research_agent.py +290 -0
- mmar_mage-0.0.1/mmar_mage/agents/needs_analysis.py +54 -0
- mmar_mage-0.0.1/mmar_mage/agents/self_refiner.py +256 -0
- mmar_mage-0.0.1/mmar_mage/agents/skill_synthesizer.py +137 -0
- mmar_mage-0.0.1/mmar_mage/agents/step_critic.py +162 -0
- mmar_mage-0.0.1/mmar_mage/agents/step_describer.py +260 -0
- mmar_mage-0.0.1/mmar_mage/agents/step_planner.py +258 -0
- mmar_mage-0.0.1/mmar_mage/agents/template_assembler_agent.py +122 -0
- mmar_mage-0.0.1/mmar_mage/agents/tool_implementation_generator.py +148 -0
- mmar_mage-0.0.1/mmar_mage/agents/tool_spec_generator.py +174 -0
- mmar_mage-0.0.1/mmar_mage/agents/topology_selector.py +105 -0
- mmar_mage-0.0.1/mmar_mage/agents/tot_planner.py +276 -0
- mmar_mage-0.0.1/mmar_mage/agents/web_research_agent.py +323 -0
- mmar_mage-0.0.1/mmar_mage/benchmark/__init__.py +30 -0
- mmar_mage-0.0.1/mmar_mage/benchmark/reporter.py +47 -0
- mmar_mage-0.0.1/mmar_mage/benchmark/runner.py +89 -0
- mmar_mage-0.0.1/mmar_mage/benchmark/schemas.py +116 -0
- mmar_mage-0.0.1/mmar_mage/benchmark/synthesizer.py +117 -0
- mmar_mage-0.0.1/mmar_mage/bootstrap.py +161 -0
- mmar_mage-0.0.1/mmar_mage/carl_export.py +859 -0
- mmar_mage-0.0.1/mmar_mage/chain_edit.py +124 -0
- mmar_mage-0.0.1/mmar_mage/chain_repair.py +88 -0
- mmar_mage-0.0.1/mmar_mage/cli.py +538 -0
- mmar_mage-0.0.1/mmar_mage/code_generator.py +271 -0
- mmar_mage-0.0.1/mmar_mage/cost.py +498 -0
- mmar_mage-0.0.1/mmar_mage/evolve/__init__.py +39 -0
- mmar_mage-0.0.1/mmar_mage/evolve/benchmark_synthesizer.py +306 -0
- mmar_mage-0.0.1/mmar_mage/evolve/engine.py +452 -0
- mmar_mage-0.0.1/mmar_mage/evolve/grader.py +284 -0
- mmar_mage-0.0.1/mmar_mage/evolve/operators.py +409 -0
- mmar_mage-0.0.1/mmar_mage/evolve/schemas.py +131 -0
- mmar_mage-0.0.1/mmar_mage/exceptions.py +80 -0
- mmar_mage-0.0.1/mmar_mage/generator.py +2557 -0
- mmar_mage-0.0.1/mmar_mage/llm.py +502 -0
- mmar_mage-0.0.1/mmar_mage/memory.py +1292 -0
- mmar_mage-0.0.1/mmar_mage/profiles.py +203 -0
- mmar_mage-0.0.1/mmar_mage/prompt_loader.py +92 -0
- mmar_mage-0.0.1/mmar_mage/prompts.py +669 -0
- mmar_mage-0.0.1/mmar_mage/prompts_data/prompts.yaml +1362 -0
- mmar_mage-0.0.1/mmar_mage/schemas.py +1738 -0
- mmar_mage-0.0.1/mmar_mage/security/__init__.py +29 -0
- mmar_mage-0.0.1/mmar_mage/security/sandbox.py +367 -0
- mmar_mage-0.0.1/mmar_mage/skills/__init__.py +46 -0
- mmar_mage-0.0.1/mmar_mage/skills/discovery.py +443 -0
- mmar_mage-0.0.1/mmar_mage/skills/manifest.py +149 -0
- mmar_mage-0.0.1/mmar_mage/skills/registry.py +267 -0
- mmar_mage-0.0.1/mmar_mage/templates/__init__.py +12 -0
- mmar_mage-0.0.1/mmar_mage/templates/skeletons.py +101 -0
- mmar_mage-0.0.1/mmar_mage/topology/__init__.py +32 -0
- mmar_mage-0.0.1/mmar_mage/topology/library.py +180 -0
- mmar_mage-0.0.1/mmar_mage/topology/schemas.py +55 -0
- mmar_mage-0.0.1/pyproject.toml +85 -0
- mmar_mage-0.0.1/tests/__init__.py +0 -0
- mmar_mage-0.0.1/tests/conftest.py +271 -0
- mmar_mage-0.0.1/tests/test_allowed_step_types.py +101 -0
- mmar_mage-0.0.1/tests/test_anti_patterns.py +51 -0
- mmar_mage-0.0.1/tests/test_benchmark_profiles.py +109 -0
- mmar_mage-0.0.1/tests/test_benchmark_runner.py +99 -0
- mmar_mage-0.0.1/tests/test_benchmark_schemas.py +45 -0
- mmar_mage-0.0.1/tests/test_benchmark_stage7.py +72 -0
- mmar_mage-0.0.1/tests/test_benchmark_synthesizer.py +66 -0
- mmar_mage-0.0.1/tests/test_cancel_retry.py +353 -0
- mmar_mage-0.0.1/tests/test_capability_lookup.py +97 -0
- mmar_mage-0.0.1/tests/test_capability_p1.py +314 -0
- mmar_mage-0.0.1/tests/test_capability_unified.py +609 -0
- mmar_mage-0.0.1/tests/test_care_metadata.py +363 -0
- mmar_mage-0.0.1/tests/test_care_presets.py +209 -0
- mmar_mage-0.0.1/tests/test_care_replay_bundle.py +331 -0
- mmar_mage-0.0.1/tests/test_carl_export.py +575 -0
- mmar_mage-0.0.1/tests/test_carl_export_field_maps.py +208 -0
- mmar_mage-0.0.1/tests/test_carl_load_policy.py +109 -0
- mmar_mage-0.0.1/tests/test_carl_loadable_smoke.py +116 -0
- mmar_mage-0.0.1/tests/test_carl_round_trip.py +149 -0
- mmar_mage-0.0.1/tests/test_category_chain_generator.py +401 -0
- mmar_mage-0.0.1/tests/test_chain_edit.py +359 -0
- mmar_mage-0.0.1/tests/test_chain_executor.py +468 -0
- mmar_mage-0.0.1/tests/test_chain_verifier_deps.py +72 -0
- mmar_mage-0.0.1/tests/test_checkpoint_promotion.py +59 -0
- mmar_mage-0.0.1/tests/test_cli.py +93 -0
- mmar_mage-0.0.1/tests/test_cli_validate.py +353 -0
- mmar_mage-0.0.1/tests/test_code_generator.py +264 -0
- mmar_mage-0.0.1/tests/test_config.py +219 -0
- mmar_mage-0.0.1/tests/test_cost.py +246 -0
- mmar_mage-0.0.1/tests/test_deep_mode.py +232 -0
- mmar_mage-0.0.1/tests/test_demo_bootstrap.py +224 -0
- mmar_mage-0.0.1/tests/test_describer_examples.py +85 -0
- mmar_mage-0.0.1/tests/test_digest_capping.py +124 -0
- mmar_mage-0.0.1/tests/test_ecosystem_writes.py +427 -0
- mmar_mage-0.0.1/tests/test_ecosystem_writes_p2.py +613 -0
- mmar_mage-0.0.1/tests/test_evaluation.py +1176 -0
- mmar_mage-0.0.1/tests/test_evolve.py +376 -0
- mmar_mage-0.0.1/tests/test_evolve_operators.py +188 -0
- mmar_mage-0.0.1/tests/test_execution_feedback.py +119 -0
- mmar_mage-0.0.1/tests/test_fallback_templates.py +129 -0
- mmar_mage-0.0.1/tests/test_fast_mode.py +109 -0
- mmar_mage-0.0.1/tests/test_feedback.py +151 -0
- mmar_mage-0.0.1/tests/test_feedback_applicability_gate.py +95 -0
- mmar_mage-0.0.1/tests/test_flag_orthogonality.py +133 -0
- mmar_mage-0.0.1/tests/test_generator.py +162 -0
- mmar_mage-0.0.1/tests/test_heterogeneous_steps.py +199 -0
- mmar_mage-0.0.1/tests/test_integration_p2.py +438 -0
- mmar_mage-0.0.1/tests/test_intermediate_artifact_save.py +293 -0
- mmar_mage-0.0.1/tests/test_iterative_step_describing.py +76 -0
- mmar_mage-0.0.1/tests/test_llm_only_mode.py +272 -0
- mmar_mage-0.0.1/tests/test_memory.py +115 -0
- mmar_mage-0.0.1/tests/test_memory_applicability_gate.py +197 -0
- mmar_mage-0.0.1/tests/test_memory_benchmarks.py +191 -0
- mmar_mage-0.0.1/tests/test_memory_research.py +280 -0
- mmar_mage-0.0.1/tests/test_memory_search_mode.py +138 -0
- mmar_mage-0.0.1/tests/test_memory_typed_entities.py +260 -0
- mmar_mage-0.0.1/tests/test_multi_provider.py +334 -0
- mmar_mage-0.0.1/tests/test_orchestration_primitives.py +143 -0
- mmar_mage-0.0.1/tests/test_parallel_topology_sampling.py +104 -0
- mmar_mage-0.0.1/tests/test_pareto_selection.py +110 -0
- mmar_mage-0.0.1/tests/test_per_stage_entrypoints.py +471 -0
- mmar_mage-0.0.1/tests/test_plan_scoring.py +102 -0
- mmar_mage-0.0.1/tests/test_preflight_cost.py +305 -0
- mmar_mage-0.0.1/tests/test_profile_demos.py +116 -0
- mmar_mage-0.0.1/tests/test_prompt_loader.py +292 -0
- mmar_mage-0.0.1/tests/test_quality_enhancements.py +1194 -0
- mmar_mage-0.0.1/tests/test_reflective_mutation.py +213 -0
- mmar_mage-0.0.1/tests/test_replan_policy.py +131 -0
- mmar_mage-0.0.1/tests/test_replay_from.py +328 -0
- mmar_mage-0.0.1/tests/test_sandbox.py +129 -0
- mmar_mage-0.0.1/tests/test_save_chain_roundtrip.py +215 -0
- mmar_mage-0.0.1/tests/test_schemas.py +321 -0
- mmar_mage-0.0.1/tests/test_simplicity_bias.py +113 -0
- mmar_mage-0.0.1/tests/test_skill_execution_modes.py +128 -0
- mmar_mage-0.0.1/tests/test_skill_synthesis.py +122 -0
- mmar_mage-0.0.1/tests/test_skills.py +403 -0
- mmar_mage-0.0.1/tests/test_step_config_required.py +120 -0
- mmar_mage-0.0.1/tests/test_step_metrics.py +133 -0
- mmar_mage-0.0.1/tests/test_step_type_round_trips.py +212 -0
- mmar_mage-0.0.1/tests/test_streaming_progress.py +437 -0
- mmar_mage-0.0.1/tests/test_suggested_naming.py +366 -0
- mmar_mage-0.0.1/tests/test_suggested_naming_golden.py +114 -0
- mmar_mage-0.0.1/tests/test_template_assembler.py +126 -0
- mmar_mage-0.0.1/tests/test_template_skeletons.py +47 -0
- mmar_mage-0.0.1/tests/test_tool_implementation.py +95 -0
- mmar_mage-0.0.1/tests/test_tool_step_generation.py +104 -0
- mmar_mage-0.0.1/tests/test_tool_synthesis.py +156 -0
- mmar_mage-0.0.1/tests/test_topology.py +292 -0
- mmar_mage-0.0.1/tests/test_web_research.py +289 -0
- mmar_mage-0.0.1/tests/test_web_source_grounding.py +123 -0
- mmar_mage-0.0.1/uv.lock +1340 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# LLM provider
|
|
2
|
+
MAGE_API_KEY=sk-...
|
|
3
|
+
OPENAI_API_KEY=sk-... # fallback
|
|
4
|
+
MAGE_BASE_URL= # optional: OpenRouter / vLLM / Ollama endpoint
|
|
5
|
+
MAGE_MODEL=gpt-4o
|
|
6
|
+
|
|
7
|
+
# Provider selection: openai | openrouter | local | custom
|
|
8
|
+
MAGE_PROVIDER=openai
|
|
9
|
+
|
|
10
|
+
# OpenRouter
|
|
11
|
+
MAGE_OPENROUTER_SITE_URL=https://github.com/Glazkoff/carl-mage
|
|
12
|
+
MAGE_OPENROUTER_APP_NAME=carl-mage
|
|
13
|
+
|
|
14
|
+
# Local / self-hosted OpenAI-compatible (e.g. vLLM, AIRI, Ollama)
|
|
15
|
+
# MAGE_BASE_URL=https://inference.airi.net:46783/v1
|
|
16
|
+
# MAGE_API_KEY=<your-local-server-api-key>
|
|
17
|
+
# MAGE_SSL_VERIFY=false # set false if server uses self-signed cert
|
|
18
|
+
# MAGE_HTTP_TIMEOUT=120.0
|
|
19
|
+
|
|
20
|
+
# Model — use "__auto__" to detect first available model from the server
|
|
21
|
+
# MAGE_MODEL=__auto__
|
|
22
|
+
|
|
23
|
+
# Memory (gigaevo-memory)
|
|
24
|
+
MAGE_ENABLE_MEMORY=true
|
|
25
|
+
MAGE_MEMORY_BASE_URL=http://localhost:8002
|
|
26
|
+
|
|
27
|
+
# Deep Research
|
|
28
|
+
MAGE_ENABLE_MEMORY_RESEARCH=true
|
|
29
|
+
MAGE_ENABLE_WEB_RESEARCH=false
|
|
30
|
+
MAGE_WEB_SEARCH_API_KEY= # Tavily / SerpAPI / Brave API key
|
|
31
|
+
MAGE_WEB_SEARCH_PROVIDER=tavily # tavily | serpapi | brave
|
|
32
|
+
|
|
33
|
+
# Generation quality
|
|
34
|
+
MAGE_MEMORY_RELEVANCE_THRESHOLD=0.4
|
|
35
|
+
MAGE_MEMORY_RECALL_TOP_K=5
|
|
36
|
+
MAGE_COLD_START_CANDIDATES=3
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
workflow_dispatch:
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
lint-and-test:
|
|
15
|
+
name: Lint + tests (py${{ matrix.python }})
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
strategy:
|
|
18
|
+
fail-fast: false
|
|
19
|
+
matrix:
|
|
20
|
+
python: ["3.12", "3.13"]
|
|
21
|
+
steps:
|
|
22
|
+
- name: Checkout repository
|
|
23
|
+
uses: actions/checkout@v5
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
uses: astral-sh/setup-uv@v6
|
|
27
|
+
with:
|
|
28
|
+
enable-cache: true
|
|
29
|
+
|
|
30
|
+
- name: Set up Python ${{ matrix.python }}
|
|
31
|
+
run: uv python install ${{ matrix.python }}
|
|
32
|
+
|
|
33
|
+
- name: Sync dev dependencies
|
|
34
|
+
run: uv sync --group dev
|
|
35
|
+
|
|
36
|
+
- name: Run ruff
|
|
37
|
+
run: uv run ruff check mmar_mage/ tests/ examples/
|
|
38
|
+
|
|
39
|
+
- name: Run mypy
|
|
40
|
+
run: uv run mypy mmar_mage/
|
|
41
|
+
|
|
42
|
+
- name: Run pytest
|
|
43
|
+
run: uv run pytest tests/ -q --maxfail=1
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
name: Release to PyPI
|
|
2
|
+
|
|
3
|
+
# Publish `mmar-mage` to PyPI whenever the version in pyproject.toml is not yet
|
|
4
|
+
# on PyPI. Runs on every push to main and gates on the live PyPI index, so a
|
|
5
|
+
# bumped-but-never-published version (e.g. a prior failed run) still ships.
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
branches: [main]
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
check-version:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
outputs:
|
|
14
|
+
should_publish: ${{ steps.check.outputs.should_publish }}
|
|
15
|
+
version: ${{ steps.check.outputs.version }}
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v5
|
|
18
|
+
|
|
19
|
+
- name: Check whether version is already on PyPI
|
|
20
|
+
id: check
|
|
21
|
+
run: |
|
|
22
|
+
version=$(grep -m1 '^version = ' pyproject.toml | sed -E 's/version = "(.*)"/\1/')
|
|
23
|
+
echo "pyproject version: $version"
|
|
24
|
+
echo "version=$version" >> "$GITHUB_OUTPUT"
|
|
25
|
+
|
|
26
|
+
status=$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/mmar-mage/$version/json")
|
|
27
|
+
echo "PyPI lookup HTTP status: $status"
|
|
28
|
+
case "$status" in
|
|
29
|
+
200)
|
|
30
|
+
echo "Version $version already on PyPI — nothing to publish."
|
|
31
|
+
echo "should_publish=false" >> "$GITHUB_OUTPUT"
|
|
32
|
+
;;
|
|
33
|
+
404)
|
|
34
|
+
echo "Version $version not on PyPI — will publish."
|
|
35
|
+
echo "should_publish=true" >> "$GITHUB_OUTPUT"
|
|
36
|
+
;;
|
|
37
|
+
*)
|
|
38
|
+
echo "Unexpected status $status from PyPI; refusing to guess." >&2
|
|
39
|
+
exit 1
|
|
40
|
+
;;
|
|
41
|
+
esac
|
|
42
|
+
|
|
43
|
+
release:
|
|
44
|
+
needs: check-version
|
|
45
|
+
if: needs.check-version.outputs.should_publish == 'true'
|
|
46
|
+
runs-on: ubuntu-latest
|
|
47
|
+
permissions:
|
|
48
|
+
contents: write # push the v<version> tag
|
|
49
|
+
steps:
|
|
50
|
+
- uses: actions/checkout@v5
|
|
51
|
+
|
|
52
|
+
- name: Set up Python
|
|
53
|
+
uses: actions/setup-python@v6
|
|
54
|
+
with:
|
|
55
|
+
python-version: "3.12"
|
|
56
|
+
|
|
57
|
+
- name: Install build tooling
|
|
58
|
+
run: python -m pip install --upgrade build
|
|
59
|
+
|
|
60
|
+
- name: Build sdist + wheel
|
|
61
|
+
run: python -m build
|
|
62
|
+
|
|
63
|
+
- name: Publish to PyPI
|
|
64
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
65
|
+
with:
|
|
66
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
67
|
+
|
|
68
|
+
- name: Tag the release
|
|
69
|
+
run: |
|
|
70
|
+
version="${{ needs.check-version.outputs.version }}"
|
|
71
|
+
if git ls-remote --exit-code --tags origin "refs/tags/v$version" >/dev/null 2>&1; then
|
|
72
|
+
echo "Tag v$version already exists — skipping."
|
|
73
|
+
else
|
|
74
|
+
git tag "v$version"
|
|
75
|
+
git push origin "v$version"
|
|
76
|
+
fi
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
*.egg-info/
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
*.egg
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
|
|
16
|
+
# IDE
|
|
17
|
+
.vscode/
|
|
18
|
+
.idea/
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
|
|
22
|
+
# Claude Code (local config, runtime locks, worktrees)
|
|
23
|
+
.claude/
|
|
24
|
+
|
|
25
|
+
# Testing
|
|
26
|
+
.pytest_cache/
|
|
27
|
+
.coverage
|
|
28
|
+
htmlcov/
|
|
29
|
+
|
|
30
|
+
# Ruff
|
|
31
|
+
.ruff_cache/
|
|
32
|
+
|
|
33
|
+
# mypy
|
|
34
|
+
.mypy_cache/
|
|
35
|
+
|
|
36
|
+
# OS
|
|
37
|
+
.DS_Store
|
|
38
|
+
Thumbs.db
|
|
39
|
+
|
|
40
|
+
# Examples output
|
|
41
|
+
examples/**/result.json
|
|
42
|
+
|
|
43
|
+
# Environment
|
|
44
|
+
.env
|
|
45
|
+
.env.local
|
|
46
|
+
experiments/baselines/_staged_data/
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
# CLAUDE.md — Project Context for Claude Code
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
|
|
5
|
+
**MAGE** (Mesh Architecture Generation Engine) converts natural-language queries into structured
|
|
6
|
+
[CARL](https://github.com/Glazkoff/carl) reasoning chains (JSON). Part of the MMAR ecosystem
|
|
7
|
+
alongside `mmar-carl` (chain execution) and `gigaevo-memory` (memory service).
|
|
8
|
+
|
|
9
|
+
- **Package**: `mmar-mage` v0.1.0
|
|
10
|
+
- **Python**: ≥ 3.12
|
|
11
|
+
- **License**: MIT
|
|
12
|
+
- **Author**: glazkov (glazkov@airi.net)
|
|
13
|
+
|
|
14
|
+
## Quick Commands
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Setup
|
|
18
|
+
uv sync --group dev # install all deps (creates .venv automatically)
|
|
19
|
+
|
|
20
|
+
# Quality
|
|
21
|
+
uv run pytest tests/ -v # ~320 tests
|
|
22
|
+
uv run ruff check mmar_mage/ tests/ examples/ # lint
|
|
23
|
+
uv run ruff format mmar_mage/ tests/ # auto-format
|
|
24
|
+
uv run mypy mmar_mage/ # type-check
|
|
25
|
+
|
|
26
|
+
# Or via Makefile
|
|
27
|
+
make install # uv sync --group dev
|
|
28
|
+
make test # pytest
|
|
29
|
+
make lint # ruff check
|
|
30
|
+
make all # lint + typecheck + tests
|
|
31
|
+
make examples # run all example scripts
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Architecture
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
User query (str)
|
|
38
|
+
│
|
|
39
|
+
▼
|
|
40
|
+
┌────────────────────────────────────────────────┐
|
|
41
|
+
│ MAGEGenerator │
|
|
42
|
+
│ │
|
|
43
|
+
│ mode=fast ──► single LLM call (with retries) │
|
|
44
|
+
│ mode=deep ──► agentic pipeline: │
|
|
45
|
+
│ 0a. MemoryResearchAgent → digest (opt) │
|
|
46
|
+
│ 0b. WebResearchAgent → digest (opt) │
|
|
47
|
+
│ 0c. CapabilityLookupAgent → tools (opt) │
|
|
48
|
+
│ 0d. TemplateAssemblerAgent → skel (opt) │
|
|
49
|
+
│ 0e. FeedbackRecallAgent → digest (opt) │
|
|
50
|
+
│ 0f. SkillDiscoveryAgent → URIs (opt) │
|
|
51
|
+
│ 1. DomainAnalyzer → DomainAnalysis │
|
|
52
|
+
│ 2. StepPlanner → StepPlan │
|
|
53
|
+
│ 3. DAGBuilder | ToTPlanner → DAGStructure │
|
|
54
|
+
│ 4. StepDescriber | MCTSConstructor │
|
|
55
|
+
│ 4.5 StepCriticAgent → revise (opt) │
|
|
56
|
+
│ 5. Serialization → CARLChainSchema │
|
|
57
|
+
│ + default ReplanPolicy attach (opt) │
|
|
58
|
+
│ 5.5 ChainVerifier → fix (opt) │
|
|
59
|
+
│ 5.6 SelfRefiner → refine (opt) │
|
|
60
|
+
│ │
|
|
61
|
+
│ + validate_carl_json() — MAGE-internal types │
|
|
62
|
+
│ + to_carl_compat() — eval/aggregator → llm, │
|
|
63
|
+
│ flat fields → nested step_config │
|
|
64
|
+
│ + validate_carl_json(strict_carl=True) │
|
|
65
|
+
│ + ReasoningChain.from_json() (if installed) │
|
|
66
|
+
│ + MemoryManager.save_chain() (optional) │
|
|
67
|
+
└────────────────────────────────────────────────┘
|
|
68
|
+
|
|
69
|
+
agent-features integration (carl-experiments @ agent-features):
|
|
70
|
+
• Step types: llm/tool/mcp/memory/transform/conditional/structured_output
|
|
71
|
+
+ agent_skill, evaluation, agent_handoff, parallel_sampling, supervisor,
|
|
72
|
+
debate, human_input, tool_discovery, mcp_resource
|
|
73
|
+
• Skill discovery via SkillDiscoveryAgent + built-in SkillRegistry (PDF,
|
|
74
|
+
DOCX, PPTX, XLSX, mcp-builder, ...) + optional web fallback
|
|
75
|
+
• Cost estimation via mmar_mage.cost.estimate_chain_cost (dry-run)
|
|
76
|
+
• Evolve loop has budget guard (max_cost_usd / max_tokens_per_chain)
|
|
77
|
+
• RE-PLAN: per-step checkpoint hints + chain-level default policy synth
|
|
78
|
+
│
|
|
79
|
+
▼
|
|
80
|
+
MAGEResult { chain_json, chain_dict, memory_key, mode, metadata }
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Edit mode (NL-driven chain editing)
|
|
84
|
+
|
|
85
|
+
Besides *generating*, MAGE can **edit an existing chain**:
|
|
86
|
+
`MAGEGenerator.edit(instruction, *, entity_id=None, chain=None, save=False)` →
|
|
87
|
+
`MAGEEditResult`. It resolves the target (`entity_id` → `MemoryManager.load_chain`;
|
|
88
|
+
else `search_similar`, disambiguating on ties → `needs_disambiguation` +
|
|
89
|
+
`candidates`), asks `ChainEditPlanner` (`CHAIN_EDIT_PROMPT`) for a **minimal** edit
|
|
90
|
+
plan, applies it via `mmar_mage.chain_edit.apply_edit_plan` — which reuses
|
|
91
|
+
`evolve.operators._apply_edit` for the 5 structural ops and adds 3 lightweight ones
|
|
92
|
+
(`edit_field` / `set_dependencies` / `set_chain_field`) — re-validates with
|
|
93
|
+
`validate_or_repair`, and (with `save=True`) versions it via
|
|
94
|
+
`MemoryManager.save_chain(entity_id=…)`. CLI: `mage edit`. Convenience:
|
|
95
|
+
`edit_chain(...)`. Note: `chain_edit` lazy-imports `evolve.operators` inside
|
|
96
|
+
`apply_edit` to avoid the `evolve → engine → generator → chain_edit` import cycle
|
|
97
|
+
(same trick as `chain_repair`'s lazy `from .generator import validate_carl_json`).
|
|
98
|
+
|
|
99
|
+
### Key Design Principles
|
|
100
|
+
|
|
101
|
+
1. **Every deep-mode stage is independently toggleable** via `MAGEConfig` booleans
|
|
102
|
+
(`enable_domain_analysis`, `enable_step_planning`, `enable_dag_optimization`,
|
|
103
|
+
`enable_step_descriptions`, `enable_memory_research`, `enable_web_research`).
|
|
104
|
+
Disabled stages use deterministic fallbacks.
|
|
105
|
+
|
|
106
|
+
2. **Memory research and web research are COMPLETELY SEPARATE features**.
|
|
107
|
+
They are independent stages (0a and 0b), can be enabled/disabled individually,
|
|
108
|
+
and feed their digests into StepPlanner as optional context.
|
|
109
|
+
|
|
110
|
+
3. **Structured output everywhere** — all LLM calls use `response_format=json_object`,
|
|
111
|
+
validated against Pydantic models before propagation.
|
|
112
|
+
|
|
113
|
+
4. **Graceful degradation** — memory and web research failures are non-fatal (`try/except`
|
|
114
|
+
with logging, generation continues without the research results).
|
|
115
|
+
|
|
116
|
+
5. **Cold-start multi-sampling** — when `memory_research` is enabled but finds no hits
|
|
117
|
+
(`was_cold_start=True`), generates `cold_start_candidates` plan variants and uses
|
|
118
|
+
`PLAN_SCORER_PROMPT` to pick the best.
|
|
119
|
+
|
|
120
|
+
## File Structure
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
mmar_mage/
|
|
124
|
+
__init__.py # public API: MAGEGenerator, MAGEConfig, MAGEResult, etc.
|
|
125
|
+
generator.py # MAGEGenerator orchestrator (fast + deep modes)
|
|
126
|
+
schemas.py # Pydantic models (MAGEConfig, MAGEResult, DomainAnalysis, etc.)
|
|
127
|
+
llm.py # LLMClient — async wrapper around AsyncOpenAI
|
|
128
|
+
prompts.py # ALL LLM prompt templates (system messages)
|
|
129
|
+
memory.py # MemoryManager — gigaevo-memory wrapper (incl. save/recall_benchmark)
|
|
130
|
+
cli.py # Typer CLI: `mage generate`, `mage evolve`, `mage version`
|
|
131
|
+
exceptions.py # MAGEError, MAGEValidationError, MAGELLMError, MAGEMemoryError
|
|
132
|
+
carl_export.py # MAGE-format → CARL-loadable JSON (eval/aggregator → llm, nested step_config)
|
|
133
|
+
cost.py # estimate_chain_cost / fits_in_budget (dry-run cost)
|
|
134
|
+
agents/ # see Agents section
|
|
135
|
+
skills/
|
|
136
|
+
__init__.py
|
|
137
|
+
registry.py # SkillRegistry + built-in catalog (PDF, DOCX, PPTX, XLSX, ...)
|
|
138
|
+
discovery.py # SkillDiscoveryAgent (gate → rerank → web fallback)
|
|
139
|
+
evolve/
|
|
140
|
+
__init__.py
|
|
141
|
+
schemas.py # EvolveConfig, EvolveResult, Individual, ChainScore
|
|
142
|
+
benchmark_synthesizer.py # Stage 1: synthesize benchmark per query
|
|
143
|
+
grader.py # Stage 3: structural rubric + LLM-judge (+ real-execution)
|
|
144
|
+
operators.py # Mutation/crossover/LLM-repair operators on chain_dict
|
|
145
|
+
engine.py # ChainEvolver — main evolutionary loop
|
|
146
|
+
|
|
147
|
+
configs/ # Preset TOML configurations
|
|
148
|
+
fast.toml # Fast mode, no research
|
|
149
|
+
deep_local.toml # Deep mode, no research, no memory
|
|
150
|
+
deep_research.toml # Deep mode + memory research + web research
|
|
151
|
+
deep_openrouter.toml # Deep mode via OpenRouter
|
|
152
|
+
deep_local_airi.toml # Deep mode via AIRI inference server
|
|
153
|
+
|
|
154
|
+
examples/ # Runnable example scripts
|
|
155
|
+
README.md
|
|
156
|
+
fast_mode/ # run.py + config.toml → result.json
|
|
157
|
+
deep_mode_local/
|
|
158
|
+
deep_mode_web_research/
|
|
159
|
+
deep_mode_memory_research/
|
|
160
|
+
deep_mode_full/
|
|
161
|
+
|
|
162
|
+
tests/
|
|
163
|
+
conftest.py # Shared fixtures, mock LLM responses, sample data
|
|
164
|
+
test_schemas.py # MAGEConfig, MAGEResult, Pydantic model tests
|
|
165
|
+
test_fast_mode.py # Fast mode generation tests
|
|
166
|
+
test_deep_mode.py # Deep mode pipeline tests
|
|
167
|
+
test_generator.py # Generator validation, edge cases
|
|
168
|
+
test_memory.py # MemoryManager tests
|
|
169
|
+
test_cli.py # CLI command tests
|
|
170
|
+
test_config.py # Config loading (TOML, env, factories)
|
|
171
|
+
test_memory_research.py # MemoryResearchAgent tests
|
|
172
|
+
test_web_research.py # WebResearchAgent tests
|
|
173
|
+
|
|
174
|
+
docs/ARCHITECTURE.md # Detailed architecture documentation
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Code Style & Conventions
|
|
178
|
+
|
|
179
|
+
- **Formatter/Linter**: ruff, line-length=120, ignore E501
|
|
180
|
+
- **Ruff rules**: E, F, I, N, W, UP, B
|
|
181
|
+
- **Type checking**: mypy, python 3.12, ignore_missing_imports=true
|
|
182
|
+
- **Async**: all LLM calls are async (`AsyncOpenAI`), pytest uses `asyncio_mode = "auto"`
|
|
183
|
+
- **Docstrings**: NumPy-style with Parameters/Returns/Raises sections
|
|
184
|
+
- **CARL step_type**: MUST be lowercase (`"llm"`, not `"LLM"`) — this is enforced in
|
|
185
|
+
`PlannedStep`, `DAGStep`, and `CARLStepSchema` defaults
|
|
186
|
+
- **Imports**: use `from __future__ import annotations` everywhere
|
|
187
|
+
- **Dependencies**: Pydantic v2 (`model_validate`, `model_dump`), not v1 API
|
|
188
|
+
|
|
189
|
+
## Testing
|
|
190
|
+
|
|
191
|
+
- **94 tests**, **87% coverage**, all passing
|
|
192
|
+
- **Mock target**: `mmar_mage.llm.AsyncOpenAI` — always mock at this path
|
|
193
|
+
- **Pattern**: `conftest.py` defines `mock_openai_fast` and `mock_openai_deep` fixtures
|
|
194
|
+
that return `AsyncMock` for `chat.completions.create` with side_effect lists
|
|
195
|
+
- **All LLM and HTTP calls must be mocked** — no real API calls in tests
|
|
196
|
+
- **Coverage requirement**: ≥ 80%
|
|
197
|
+
|
|
198
|
+
### Test fixture structure
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
# conftest.py provides:
|
|
202
|
+
# - SAMPLE_CARL_CHAIN — valid 3-step CARL chain dict
|
|
203
|
+
# - SAMPLE_DOMAIN_ANALYSIS — finance domain analysis
|
|
204
|
+
# - SAMPLE_STEP_PLAN — 3-step plan
|
|
205
|
+
# - SAMPLE_DAG — funnel pattern DAG
|
|
206
|
+
# - SAMPLE_DESCRIBED_STEPS — full CARL step descriptions
|
|
207
|
+
# - config() — fast mode test config (memory disabled)
|
|
208
|
+
# - deep_config() — deep mode test config (memory disabled)
|
|
209
|
+
# - mock_openai_fast — patches AsyncOpenAI for fast mode
|
|
210
|
+
# - mock_openai_deep — patches AsyncOpenAI with 4-response side_effect for deep mode
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Running tests
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
uv run pytest tests/ -v # all tests
|
|
217
|
+
uv run pytest tests/test_fast_mode.py -v # specific file
|
|
218
|
+
uv run pytest -k "test_deep" -v # by keyword
|
|
219
|
+
uv run pytest --cov=mmar_mage --cov-report=term-missing # with coverage
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Configuration System
|
|
223
|
+
|
|
224
|
+
`MAGEConfig` (Pydantic BaseModel) supports multiple loading methods:
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
# From TOML
|
|
228
|
+
config = MAGEConfig.from_toml("configs/deep_local.toml")
|
|
229
|
+
|
|
230
|
+
# From environment variables (MAGE_* prefix)
|
|
231
|
+
config = MAGEConfig.from_env()
|
|
232
|
+
|
|
233
|
+
# Factory: OpenRouter
|
|
234
|
+
config = MAGEConfig.for_openrouter(api_key="sk-or-...", model="anthropic/claude-3.5-sonnet")
|
|
235
|
+
|
|
236
|
+
# Factory: Local server (vLLM, Ollama, etc.)
|
|
237
|
+
config = MAGEConfig.for_local(base_url="http://localhost:8000/v1", model="__auto__")
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### Key config fields
|
|
241
|
+
|
|
242
|
+
| Field | Type | Default | Description |
|
|
243
|
+
|-------|------|---------|-------------|
|
|
244
|
+
| `mode` | `"fast"` \| `"deep"` | `"deep"` | Generation mode |
|
|
245
|
+
| `model` | str | `"gpt-4o"` | Model ID or `"__auto__"` for detection |
|
|
246
|
+
| `provider` | `"openai"` \| `"openrouter"` \| `"local"` \| `"custom"` | `"openai"` | LLM provider |
|
|
247
|
+
| `base_url` | str \| None | None | Custom API endpoint |
|
|
248
|
+
| `enable_memory_research` | bool | False | Stage 0a: memory recall |
|
|
249
|
+
| `enable_web_research` | bool | False | Stage 0b: web search |
|
|
250
|
+
| `web_search_provider` | str | `"tavily"` | `tavily` \| `serpapi` \| `brave` |
|
|
251
|
+
| `web_search_api_key` | str \| None | None | API key for web search provider |
|
|
252
|
+
| `cold_start_candidates` | int | 1 | Plan candidates for cold-start scoring |
|
|
253
|
+
| `stage_llm_overrides` | dict | {} | Per-stage model/temperature overrides |
|
|
254
|
+
|
|
255
|
+
## LLM Client
|
|
256
|
+
|
|
257
|
+
`LLMClient` (in `llm.py`) wraps `AsyncOpenAI` with:
|
|
258
|
+
|
|
259
|
+
- **Per-stage overrides**: `get_stage_config(stage)` merges `stage_llm_overrides`
|
|
260
|
+
- **Auto-detection**: `resolve_model()` calls `/v1/models` when model is `"__auto__"`
|
|
261
|
+
- **Retries**: configurable via `max_retries`
|
|
262
|
+
- **Provider support**: OpenAI, OpenRouter (extra headers), local (vLLM/Ollama), custom
|
|
263
|
+
|
|
264
|
+
Key methods:
|
|
265
|
+
```python
|
|
266
|
+
await client.generate(system_prompt, user_prompt, json_mode=True, stage="...") → str
|
|
267
|
+
await client.generate_json(system_prompt, user_prompt, stage="...") → dict
|
|
268
|
+
await client.generate_structured(system_prompt, user_prompt, SomeModel, stage="...") → BaseModel
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
## Prompts (in `prompts.py`)
|
|
272
|
+
|
|
273
|
+
All prompts are constants. Each includes few-shot examples and format specifications:
|
|
274
|
+
|
|
275
|
+
| Constant | Used by | Purpose |
|
|
276
|
+
|----------|---------|---------|
|
|
277
|
+
| `FAST_MODE_SYSTEM_PROMPT` | `generator._generate_fast` | Single-shot chain generation (3 examples) |
|
|
278
|
+
| `DOMAIN_ANALYZER_PROMPT` | `DomainAnalyzer` | Domain/complexity analysis (3 examples) |
|
|
279
|
+
| `STEP_PLANNER_PROMPT` | `StepPlanner` | Step sequence planning (2 examples) |
|
|
280
|
+
| `DAG_BUILDER_PROMPT` | `DAGBuilder` | Dependency graph construction (1 diamond example) |
|
|
281
|
+
| `STEP_DESCRIBER_PROMPT` | `StepDescriber` | CARL step detail generation |
|
|
282
|
+
| `MEMORY_SUB_QUERY_GENERATOR_PROMPT` | `MemoryResearchAgent` | Sub-query generation for memory |
|
|
283
|
+
| `MEMORY_PURIFIER_PROMPT` | `MemoryResearchAgent` | Memory digest with anti-hallucination |
|
|
284
|
+
| `WEB_SUB_QUERY_GENERATOR_PROMPT` | `WebResearchAgent` | Sub-query generation for web search |
|
|
285
|
+
| `WEB_DIGEST_PROMPT` | `WebResearchAgent` | Web digest with anti-hallucination |
|
|
286
|
+
| `PLAN_SCORER_PROMPT` | `generator._score_plans` | Cold-start plan scoring (weighted criteria) |
|
|
287
|
+
|
|
288
|
+
## CARL JSON Format
|
|
289
|
+
|
|
290
|
+
Generated chains must conform to:
|
|
291
|
+
|
|
292
|
+
```json
|
|
293
|
+
{
|
|
294
|
+
"name": "...",
|
|
295
|
+
"max_workers": 2,
|
|
296
|
+
"timeout": 300.0,
|
|
297
|
+
"search_config": {
|
|
298
|
+
"strategy": "substring",
|
|
299
|
+
"substring_config": {
|
|
300
|
+
"case_sensitive": false,
|
|
301
|
+
"min_word_length": 3,
|
|
302
|
+
"max_matches_per_query": 5
|
|
303
|
+
}
|
|
304
|
+
},
|
|
305
|
+
"steps": [
|
|
306
|
+
{
|
|
307
|
+
"step_type": "llm",
|
|
308
|
+
"number": 1,
|
|
309
|
+
"title": "...",
|
|
310
|
+
"aim": "...",
|
|
311
|
+
"reasoning_questions": "...",
|
|
312
|
+
"step_context_queries": ["...", "..."],
|
|
313
|
+
"stage_action": "...",
|
|
314
|
+
"example_reasoning": "...",
|
|
315
|
+
"dependencies": [],
|
|
316
|
+
"retry_max": 3,
|
|
317
|
+
"llm_config": null
|
|
318
|
+
}
|
|
319
|
+
]
|
|
320
|
+
}
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
**Validation** (in `generator.py`):
|
|
324
|
+
1. `validate_carl_json()` — structure, step numbers, DAG acyclicity
|
|
325
|
+
2. `ReasoningChain.from_json()` — full CARL library validation (if `mmar-carl` installed)
|
|
326
|
+
|
|
327
|
+
## Dependencies
|
|
328
|
+
|
|
329
|
+
### Runtime
|
|
330
|
+
- `pydantic>=2.0.0` — schemas and validation
|
|
331
|
+
- `openai>=1.0.0` — AsyncOpenAI client
|
|
332
|
+
- `httpx>=0.27.0` — HTTP client (SSL, timeouts)
|
|
333
|
+
- `typer>=0.15.0` — CLI framework
|
|
334
|
+
- `rich>=13.0.0` — CLI output formatting
|
|
335
|
+
|
|
336
|
+
### Optional
|
|
337
|
+
- `gigaevo-memory>=0.1.0` — memory service integration
|
|
338
|
+
- `mmar-carl>=0.1.0` — chain validation
|
|
339
|
+
|
|
340
|
+
### Dev
|
|
341
|
+
- `ruff>=0.4` — linter/formatter
|
|
342
|
+
- `mypy>=1.10` — type checker
|
|
343
|
+
- `pytest>=8.2` — test runner
|
|
344
|
+
- `pytest-asyncio>=1.0.0` — async test support
|
|
345
|
+
- `pytest-cov>=5.0.0` — coverage
|
|
346
|
+
- `pytest-mock>=3.14.0` — mocker fixture
|
|
347
|
+
|
|
348
|
+
## Git Workflow
|
|
349
|
+
|
|
350
|
+
- **Branches**: `main` → `dev` → `initial-version` (active)
|
|
351
|
+
- **PR #1**: `initial-version` → `dev` (open, not merged)
|
|
352
|
+
- Do NOT merge PRs without explicit request
|
|
353
|
+
- Commit messages: conventional commits (`feat:`, `fix:`, `build:`, `test:`, `docs:`)
|
|
354
|
+
|
|
355
|
+
## Common Pitfalls
|
|
356
|
+
|
|
357
|
+
1. **step_type must be lowercase** — `"llm"` not `"LLM"`. The CARL library rejects uppercase.
|
|
358
|
+
2. **Mock target path** — always `mmar_mage.llm.AsyncOpenAI`, not `openai.AsyncOpenAI`.
|
|
359
|
+
3. **Memory is optional** — all memory code must handle `gigaevo-memory` not being installed.
|
|
360
|
+
4. **Web research needs `web_search_api_key` in config** — environment variables alone
|
|
361
|
+
are not auto-read by `MAGEConfig.from_toml()` for web search keys.
|
|
362
|
+
5. **Config TOML section** — all fields live under `[mage]`.
|
|
363
|
+
6. **Async tests** — `asyncio_mode = "auto"` in pyproject.toml, no need for `@pytest.mark.asyncio`.
|
|
364
|
+
7. **`from __future__ import annotations`** — required in every module for `X | None` syntax.
|
|
365
|
+
8. **Two step-type sets** —
|
|
366
|
+
- `VALID_STEP_TYPES`: what MAGE-internal logic emits, includes `eval` and `aggregator`.
|
|
367
|
+
- `CARL_VALID_STEP_TYPES`: what `mmar_carl.ReasoningChain.from_json()` actually accepts.
|
|
368
|
+
`eval`/`aggregator` are MAGE-only labels translated to `llm` during CARL-compat export
|
|
369
|
+
in `mmar_mage/carl_export.py`. Never put `eval`/`aggregator` into the *final* serialized
|
|
370
|
+
chain JSON — only into intermediate MAGE structures.
|
|
371
|
+
9. **Non-LLM step config is nested under `step_config`** in the final CARL JSON, not flat.
|
|
372
|
+
`CARLStepSchema` keeps flat fields (`tool_name`, `mcp_server_name`, etc.) for ergonomics
|
|
373
|
+
during generation; `to_carl_compat()` re-nests them before serialization.
|
|
374
|
+
|
|
375
|
+
10. **agent-features step types require nested config** just like the stable types.
|
|
376
|
+
Every type from {agent_skill, evaluation, parallel_sampling, agent_handoff,
|
|
377
|
+
supervisor, debate, human_input, tool_discovery, mcp_resource} has a dedicated
|
|
378
|
+
mapping table in `mmar_mage/carl_export.py`. To add a new type: update both
|
|
379
|
+
`CARL_VALID_STEP_TYPES` and the per-type field map.
|
|
380
|
+
|
|
381
|
+
11. **Skill discovery is OFF by default.** Set `enable_skill_discovery=True` in
|
|
382
|
+
`MAGEConfig` to surface packaged AgentSkills via the built-in registry.
|
|
383
|
+
`enable_skill_web_discovery=True` adds the web-search fallback — needs
|
|
384
|
+
`web_search_api_key` set.
|
|
385
|
+
|
|
386
|
+
12. **RE-PLAN auto-policy needs at least one `checkpoint=True` step** in the
|
|
387
|
+
generated chain. If the StepDescriber doesn't mark any (e.g. the chain is
|
|
388
|
+
short / trivial), `_build_default_replan_policy` returns `None` and no
|
|
389
|
+
policy is attached — that's intentional.
|
mmar_mage-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AIRI Institute
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
mmar_mage-0.0.1/Makefile
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
.PHONY: install sync test test-cov lint format typecheck clean all example-fast example-deep example-web example-memory example-full examples
|
|
2
|
+
|
|
3
|
+
# --- Setup ---
|
|
4
|
+
|
|
5
|
+
install: ## Install project + dev deps via uv
|
|
6
|
+
uv sync --group dev
|
|
7
|
+
|
|
8
|
+
sync: ## Sync all deps (including optional) via uv
|
|
9
|
+
uv sync --group dev --all-extras
|
|
10
|
+
|
|
11
|
+
# --- Quality ---
|
|
12
|
+
|
|
13
|
+
test: ## Run tests
|
|
14
|
+
uv run pytest tests/ -v --tb=short
|
|
15
|
+
|
|
16
|
+
test-cov: ## Run tests with coverage report
|
|
17
|
+
uv run pytest tests/ -v --cov=mmar_mage --cov-report=term-missing --tb=short
|
|
18
|
+
|
|
19
|
+
lint: ## Run ruff linter
|
|
20
|
+
uv run ruff check mmar_mage/ tests/
|
|
21
|
+
|
|
22
|
+
format: ## Auto-format code with ruff
|
|
23
|
+
uv run ruff format mmar_mage/ tests/
|
|
24
|
+
|
|
25
|
+
typecheck: ## Run mypy type checker
|
|
26
|
+
uv run mypy mmar_mage/
|
|
27
|
+
|
|
28
|
+
# --- Utilities ---
|
|
29
|
+
|
|
30
|
+
clean: ## Remove build artifacts and caches
|
|
31
|
+
rm -rf __pycache__ .pytest_cache .ruff_cache .mypy_cache dist build *.egg-info .venv
|
|
32
|
+
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
|
|
33
|
+
|
|
34
|
+
all: lint typecheck test ## Run lint + typecheck + tests
|
|
35
|
+
|
|
36
|
+
# --- Examples ---
|
|
37
|
+
|
|
38
|
+
example-fast: ## Run fast mode example
|
|
39
|
+
uv run python examples/fast_mode/run.py
|
|
40
|
+
|
|
41
|
+
example-deep: ## Run deep mode (local) example
|
|
42
|
+
uv run python examples/deep_mode_local/run.py
|
|
43
|
+
|
|
44
|
+
example-web: ## Run deep mode + web research example
|
|
45
|
+
uv run python examples/deep_mode_web_research/run.py
|
|
46
|
+
|
|
47
|
+
example-memory: ## Run deep mode + memory research example
|
|
48
|
+
uv run python examples/deep_mode_memory_research/run.py
|
|
49
|
+
|
|
50
|
+
example-full: ## Run full deep mode example
|
|
51
|
+
uv run python examples/deep_mode_full/run.py
|
|
52
|
+
|
|
53
|
+
examples: example-fast example-deep example-web example-memory example-full ## Run all examples
|
|
54
|
+
|
|
55
|
+
help: ## Show this help
|
|
56
|
+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}'
|