maestro-se 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maestro_se-0.1.0/.github/workflows/ci.yml +37 -0
- maestro_se-0.1.0/.github/workflows/release.yml +70 -0
- maestro_se-0.1.0/.gitignore +46 -0
- maestro_se-0.1.0/Dockerfile +42 -0
- maestro_se-0.1.0/Makefile +47 -0
- maestro_se-0.1.0/PKG-INFO +12 -0
- maestro_se-0.1.0/README.md +268 -0
- maestro_se-0.1.0/RELEASING.md +149 -0
- maestro_se-0.1.0/benchmarks/swebench_task/pyproject.toml +11 -0
- maestro_se-0.1.0/benchmarks/swebench_task/src/dateutils/__init__.py +42 -0
- maestro_se-0.1.0/benchmarks/swebench_task/swebench_instance.json +14 -0
- maestro_se-0.1.0/benchmarks/swebench_task/tests/test_parse_date.py +34 -0
- maestro_se-0.1.0/configs/harness_policy.yaml +40 -0
- maestro_se-0.1.0/docker-compose.yml +10 -0
- maestro_se-0.1.0/docs/architecture.md +120 -0
- maestro_se-0.1.0/docs/architecture.png +0 -0
- maestro_se-0.1.0/docs/banner.png +0 -0
- maestro_se-0.1.0/docs/dev-plan.md +788 -0
- maestro_se-0.1.0/docs/icon-simple.svg +22 -0
- maestro_se-0.1.0/docs/icon.svg +74 -0
- maestro_se-0.1.0/docs/rp-zh.md +614 -0
- maestro_se-0.1.0/docs/rp.md +615 -0
- maestro_se-0.1.0/examples/README.md +38 -0
- maestro_se-0.1.0/libs/maestro-agents/pyproject.toml +24 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/__init__.py +8 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/animation.py +79 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/backend.py +33 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/base.py +168 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/contract_validator.py +94 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/deps.py +17 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/frontend.py +33 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/permissions.py +24 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/planning.py +40 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/py.typed +0 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/review.py +31 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/test_agent.py +39 -0
- maestro_se-0.1.0/libs/maestro-agents/src/maestro_agents/tools.py +266 -0
- maestro_se-0.1.0/libs/maestro-agents/tests/__init__.py +0 -0
- maestro_se-0.1.0/libs/maestro-agents/tests/conftest.py +16 -0
- maestro_se-0.1.0/libs/maestro-agents/tests/test_agent_base.py +63 -0
- maestro_se-0.1.0/libs/maestro-agents/tests/test_backend_agent.py +23 -0
- maestro_se-0.1.0/libs/maestro-agents/tests/test_contract_validator.py +49 -0
- maestro_se-0.1.0/libs/maestro-agents/tests/test_planning_agent.py +23 -0
- maestro_se-0.1.0/libs/maestro-harness/pyproject.toml +17 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/__init__.py +0 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/acl.py +39 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/budget.py +59 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/harness.py +125 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/policy.py +46 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/py.typed +0 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/rollback.py +96 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/sandbox.py +58 -0
- maestro_se-0.1.0/libs/maestro-harness/src/maestro_harness/validator.py +67 -0
- maestro_se-0.1.0/libs/maestro-harness/tests/__init__.py +0 -0
- maestro_se-0.1.0/libs/maestro-harness/tests/conftest.py +55 -0
- maestro_se-0.1.0/libs/maestro-harness/tests/test_harness.py +254 -0
- maestro_se-0.1.0/libs/maestro-loop/pyproject.toml +24 -0
- maestro_se-0.1.0/libs/maestro-loop/src/maestro_loop/__init__.py +8 -0
- maestro_se-0.1.0/libs/maestro-loop/src/maestro_loop/controller.py +584 -0
- maestro_se-0.1.0/libs/maestro-loop/src/maestro_loop/py.typed +0 -0
- maestro_se-0.1.0/libs/maestro-loop/src/maestro_loop/retry.py +36 -0
- maestro_se-0.1.0/libs/maestro-loop/src/maestro_loop/scheduler.py +98 -0
- maestro_se-0.1.0/libs/maestro-loop/src/maestro_loop/states.py +48 -0
- maestro_se-0.1.0/libs/maestro-loop/tests/__init__.py +0 -0
- maestro_se-0.1.0/libs/maestro-loop/tests/conftest.py +43 -0
- maestro_se-0.1.0/libs/maestro-loop/tests/test_controller.py +103 -0
- maestro_se-0.1.0/libs/maestro-loop/tests/test_scheduler.py +56 -0
- maestro_se-0.1.0/libs/maestro-loop/tests/test_state_machine.py +29 -0
- maestro_se-0.1.0/libs/maestro-ssot/pyproject.toml +16 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/__init__.py +0 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/access.py +94 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/contracts.py +76 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/execution_log.py +47 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/hub.py +217 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/memory.py +27 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/models.py +90 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/persistence.py +415 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/py.typed +0 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/requirements.py +119 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/snapshot.py +122 -0
- maestro_se-0.1.0/libs/maestro-ssot/src/maestro_ssot/versioning.py +28 -0
- maestro_se-0.1.0/libs/maestro-ssot/tests/__init__.py +0 -0
- maestro_se-0.1.0/libs/maestro-ssot/tests/conftest.py +22 -0
- maestro_se-0.1.0/libs/maestro-ssot/tests/test_models.py +134 -0
- maestro_se-0.1.0/libs/maestro-ssot/tests/test_persistence.py +222 -0
- maestro_se-0.1.0/libs/maestro-ssot/tests/test_requirements.py +260 -0
- maestro_se-0.1.0/libs/maestro-ssot/tests/test_versioning.py +76 -0
- maestro_se-0.1.0/pyproject.toml +75 -0
- maestro_se-0.1.0/scripts/dev-server.py +97 -0
- maestro_se-0.1.0/scripts/install.sh +347 -0
- maestro_se-0.1.0/scripts/run_swebench_task.py +195 -0
- maestro_se-0.1.0/scripts/stats_analysis.py +207 -0
- maestro_se-0.1.0/src/maestro/__init__.py +0 -0
- maestro_se-0.1.0/src/maestro/agent_factory.py +44 -0
- maestro_se-0.1.0/src/maestro/baselines/__init__.py +22 -0
- maestro_se-0.1.0/src/maestro/baselines/message_passing.py +204 -0
- maestro_se-0.1.0/src/maestro/baselines/no_harness.py +61 -0
- maestro_se-0.1.0/src/maestro/baselines/no_loop.py +161 -0
- maestro_se-0.1.0/src/maestro/baselines/single_agent.py +110 -0
- maestro_se-0.1.0/src/maestro/benchmark_runner.py +278 -0
- maestro_se-0.1.0/src/maestro/benchmarks/__init__.py +11 -0
- maestro_se-0.1.0/src/maestro/benchmarks/evaluator.py +258 -0
- maestro_se-0.1.0/src/maestro/benchmarks/py.typed +0 -0
- maestro_se-0.1.0/src/maestro/benchmarks/task_base.py +30 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/__init__.py +41 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_01_jwt_auth.py +84 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_02_todo_api.py +85 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_03_chat_ws.py +84 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_cb_04_user_crud.py +82 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_cb_05_file_upload.py +80 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_cb_06_payment_gateway.py +81 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_cb_07_notifications.py +81 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_cb_08_search_engine.py +81 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_cb_09_microservices_order.py +85 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_cb_10_collaborative_editor.py +80 -0
- maestro_se-0.1.0/src/maestro/benchmarks/tasks/task_swebench_email.py +96 -0
- maestro_se-0.1.0/src/maestro/cli.py +148 -0
- maestro_se-0.1.0/src/maestro/commands/__init__.py +0 -0
- maestro_se-0.1.0/src/maestro/commands/benchmark_cmd.py +77 -0
- maestro_se-0.1.0/src/maestro/commands/config_cmd.py +127 -0
- maestro_se-0.1.0/src/maestro/commands/eval.py +200 -0
- maestro_se-0.1.0/src/maestro/commands/run.py +308 -0
- maestro_se-0.1.0/src/maestro/commands/status.py +10 -0
- maestro_se-0.1.0/src/maestro/config.py +306 -0
- maestro_se-0.1.0/src/maestro/default_policy.yaml +40 -0
- maestro_se-0.1.0/src/maestro/demo.py +136 -0
- maestro_se-0.1.0/src/maestro/llm.py +141 -0
- maestro_se-0.1.0/src/maestro/output.py +173 -0
- maestro_se-0.1.0/src/maestro/project.py +130 -0
- maestro_se-0.1.0/src/maestro/repl.py +167 -0
- maestro_se-0.1.0/src/maestro/resilience.py +107 -0
- maestro_se-0.1.0/src/maestro/session.py +115 -0
- maestro_se-0.1.0/src/maestro/streaming.py +171 -0
- maestro_se-0.1.0/tests/__init__.py +0 -0
- maestro_se-0.1.0/tests/test_cli.py +74 -0
- maestro_se-0.1.0/tests/test_integration.py +229 -0
- maestro_se-0.1.0/tests/test_llm_integration.py +220 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: astral-sh/setup-uv@v5
|
|
15
|
+
- run: uv sync
|
|
16
|
+
- run: uv run ruff check .
|
|
17
|
+
- run: uv run ruff format --check .
|
|
18
|
+
|
|
19
|
+
typecheck:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v4
|
|
23
|
+
- uses: astral-sh/setup-uv@v5
|
|
24
|
+
- run: uv sync
|
|
25
|
+
- run: uv run mypy libs/ src/maestro/
|
|
26
|
+
|
|
27
|
+
test:
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
steps:
|
|
30
|
+
- uses: actions/checkout@v4
|
|
31
|
+
- uses: astral-sh/setup-uv@v5
|
|
32
|
+
- run: uv sync
|
|
33
|
+
- run: uv run pytest libs/maestro-ssot/tests/ --cov --cov-report=xml -v
|
|
34
|
+
- run: uv run pytest libs/maestro-harness/tests/ --cov --cov-append --cov-report=xml -v
|
|
35
|
+
- run: uv run pytest libs/maestro-agents/tests/ --cov --cov-append --cov-report=xml -v
|
|
36
|
+
- run: uv run pytest libs/maestro-loop/tests/ --cov --cov-append --cov-report=xml -v
|
|
37
|
+
- run: uv run pytest tests/ --cov --cov-append --cov-report=xml -v
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write # for creating GitHub Release
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
build:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- uses: astral-sh/setup-uv@v5
|
|
18
|
+
with:
|
|
19
|
+
version: "latest"
|
|
20
|
+
|
|
21
|
+
- name: Build all packages
|
|
22
|
+
run: uv build --all-packages
|
|
23
|
+
|
|
24
|
+
- name: Upload distributions
|
|
25
|
+
uses: actions/upload-artifact@v4
|
|
26
|
+
with:
|
|
27
|
+
name: dist
|
|
28
|
+
path: dist/
|
|
29
|
+
|
|
30
|
+
publish-pypi:
|
|
31
|
+
needs: build
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
environment:
|
|
34
|
+
name: pypi
|
|
35
|
+
url: https://pypi.org/p/maestro
|
|
36
|
+
steps:
|
|
37
|
+
- uses: actions/checkout@v4
|
|
38
|
+
|
|
39
|
+
- uses: astral-sh/setup-uv@v5
|
|
40
|
+
with:
|
|
41
|
+
version: "latest"
|
|
42
|
+
|
|
43
|
+
- name: Download distributions
|
|
44
|
+
uses: actions/download-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/
|
|
48
|
+
|
|
49
|
+
- name: Publish to PyPI
|
|
50
|
+
env:
|
|
51
|
+
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
|
|
52
|
+
run: uv publish dist/*
|
|
53
|
+
|
|
54
|
+
github-release:
|
|
55
|
+
needs: build
|
|
56
|
+
runs-on: ubuntu-latest
|
|
57
|
+
steps:
|
|
58
|
+
- uses: actions/checkout@v4
|
|
59
|
+
|
|
60
|
+
- name: Download distributions
|
|
61
|
+
uses: actions/download-artifact@v4
|
|
62
|
+
with:
|
|
63
|
+
name: dist
|
|
64
|
+
path: dist/
|
|
65
|
+
|
|
66
|
+
- name: Create GitHub Release
|
|
67
|
+
uses: softprops/action-gh-release@v2
|
|
68
|
+
with:
|
|
69
|
+
files: dist/*
|
|
70
|
+
generate_release_notes: true
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
|
|
13
|
+
# Testing
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
htmlcov/
|
|
16
|
+
.coverage
|
|
17
|
+
coverage.xml
|
|
18
|
+
|
|
19
|
+
# Type checking
|
|
20
|
+
.mypy_cache/
|
|
21
|
+
.pyright/
|
|
22
|
+
|
|
23
|
+
# Ruff
|
|
24
|
+
.ruff_cache/
|
|
25
|
+
|
|
26
|
+
# IDE
|
|
27
|
+
.idea/
|
|
28
|
+
*.swp
|
|
29
|
+
*.swo
|
|
30
|
+
*~
|
|
31
|
+
|
|
32
|
+
# OS
|
|
33
|
+
.DS_Store
|
|
34
|
+
Thumbs.db
|
|
35
|
+
|
|
36
|
+
# uv
|
|
37
|
+
uv.lock
|
|
38
|
+
|
|
39
|
+
# Project
|
|
40
|
+
*.db
|
|
41
|
+
*.sqlite3
|
|
42
|
+
.maestro/
|
|
43
|
+
src/api/
|
|
44
|
+
|
|
45
|
+
# env
|
|
46
|
+
.env
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# MAESTRO-SSOT — Reproducible Experiment Environment
|
|
2
|
+
# Usage:
|
|
3
|
+
# docker build -t maestro-ssot .
|
|
4
|
+
# docker run -it --rm -v $(pwd)/output:/output maestro-ssot maestro benchmark --output-dir /output
|
|
5
|
+
|
|
6
|
+
FROM python:3.12-slim
|
|
7
|
+
|
|
8
|
+
# Install system deps
|
|
9
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
10
|
+
git \
|
|
11
|
+
curl \
|
|
12
|
+
build-essential \
|
|
13
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
14
|
+
|
|
15
|
+
# Install uv
|
|
16
|
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
|
17
|
+
|
|
18
|
+
WORKDIR /app
|
|
19
|
+
|
|
20
|
+
# Copy project files
|
|
21
|
+
COPY pyproject.toml uv.lock ./
|
|
22
|
+
COPY libs/ ./libs/
|
|
23
|
+
COPY src/ ./src/
|
|
24
|
+
COPY tests/ ./tests/
|
|
25
|
+
COPY benchmarks/ ./benchmarks/
|
|
26
|
+
COPY scripts/ ./scripts/
|
|
27
|
+
COPY configs/ ./configs/
|
|
28
|
+
COPY docs/ ./docs/
|
|
29
|
+
COPY README.md ./
|
|
30
|
+
|
|
31
|
+
# Install dependencies
|
|
32
|
+
RUN uv sync --frozen --no-dev
|
|
33
|
+
|
|
34
|
+
# Create default global config
|
|
35
|
+
RUN mkdir -p /root/.maestro && \
|
|
36
|
+
echo '[llm]\nprovider = "anthropic"\nmodel = "claude-sonnet-4-20250514"\nthinking = true\n' > /root/.maestro/config.toml
|
|
37
|
+
|
|
38
|
+
ENV PATH="/app/.venv/bin:$PATH"
|
|
39
|
+
|
|
40
|
+
# Default: show help
|
|
41
|
+
ENTRYPOINT ["maestro"]
|
|
42
|
+
CMD ["--help"]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
.PHONY: help install test lint format typecheck demo dev status clean
|
|
2
|
+
|
|
3
|
+
help: ## Show this help
|
|
4
|
+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}'
|
|
5
|
+
|
|
6
|
+
install: ## Install all dependencies
|
|
7
|
+
uv sync
|
|
8
|
+
|
|
9
|
+
test: ## Run all tests
|
|
10
|
+
uv run pytest libs/maestro-ssot/tests/ -q
|
|
11
|
+
uv run pytest libs/maestro-harness/tests/ -q
|
|
12
|
+
uv run pytest libs/maestro-agents/tests/ -q
|
|
13
|
+
uv run pytest libs/maestro-loop/tests/ -q
|
|
14
|
+
uv run pytest tests/ -q
|
|
15
|
+
|
|
16
|
+
lint: ## Run ruff linter
|
|
17
|
+
uv run ruff check .
|
|
18
|
+
|
|
19
|
+
format: ## Run ruff formatter
|
|
20
|
+
uv run ruff format .
|
|
21
|
+
|
|
22
|
+
typecheck: ## Run mypy type checker
|
|
23
|
+
uv run mypy libs/ src/maestro/
|
|
24
|
+
|
|
25
|
+
check: lint format typecheck test ## Run all checks (lint + format + typecheck + test)
|
|
26
|
+
|
|
27
|
+
demo: ## Run maestro demo
|
|
28
|
+
uv run maestro demo
|
|
29
|
+
|
|
30
|
+
status: ## Run maestro status (requires project)
|
|
31
|
+
uv run maestro status
|
|
32
|
+
|
|
33
|
+
dev: ## Hot-reload dev server (default: demo)
|
|
34
|
+
python scripts/dev-server.py demo
|
|
35
|
+
|
|
36
|
+
dev-run: ## Hot-reload dev server with run --demo
|
|
37
|
+
python scripts/dev-server.py run --demo "Build a JWT auth system"
|
|
38
|
+
|
|
39
|
+
dev-repl: ## Hot-reload dev server with REPL
|
|
40
|
+
python scripts/dev-server.py
|
|
41
|
+
|
|
42
|
+
clean: ## Clean generated files
|
|
43
|
+
find . -type d -name __pycache__ -exec rm -rf {} +
|
|
44
|
+
find . -type d -name .pytest_cache -exec rm -rf {} +
|
|
45
|
+
find . -type d -name .mypy_cache -exec rm -rf {} +
|
|
46
|
+
find . -type d -name .ruff_cache -exec rm -rf {} +
|
|
47
|
+
find . -type f -name "*.pyc" -delete
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: maestro-se
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Managed Autonomy in Multi-Agent Software Engineering via Single Source of Truth
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: maestro-agents
|
|
7
|
+
Requires-Dist: maestro-harness
|
|
8
|
+
Requires-Dist: maestro-loop
|
|
9
|
+
Requires-Dist: maestro-ssot
|
|
10
|
+
Requires-Dist: prompt-toolkit>=3.0
|
|
11
|
+
Requires-Dist: tomli-w>=1.2.0
|
|
12
|
+
Requires-Dist: typer>=0.25
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="docs/banner.png" width="640" alt="MAESTRO-SSOT">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
<strong>Managed Autonomy in Multi-Agent Software Engineering<br>via Single Source of Truth</strong>
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
<p align="center">
|
|
10
|
+
<a href="https://github.com/HYPERVAPOR/maestro-ssot/actions/workflows/ci.yml">
|
|
11
|
+
<img src="https://img.shields.io/github/actions/workflow/status/HYPERVAPOR/maestro-ssot/ci.yml?branch=main&label=CI" alt="CI">
|
|
12
|
+
</a>
|
|
13
|
+
<img src="https://img.shields.io/badge/python-3.12+-blue.svg" alt="Python 3.12+">
|
|
14
|
+
<img src="https://img.shields.io/badge/code%20size-3.9k%20LOC-informational" alt="Code Size">
|
|
15
|
+
<img src="https://img.shields.io/badge/tests-147%20passing-brightgreen" alt="Tests">
|
|
16
|
+
<img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License">
|
|
17
|
+
</p>
|
|
18
|
+
|
|
19
|
+
<p align="center">
|
|
20
|
+
A multi-agent framework where specialized LLM agents collaborate around a <strong>shared structured state</strong> instead of passing messages — coordinated by an autonomous control loop and guarded by a restrictive execution harness.
|
|
21
|
+
</p>
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Why MAESTRO?
|
|
26
|
+
|
|
27
|
+
Current multi-agent coding systems (AutoGen, ChatDev) treat collaboration as **message passing**: agents work in isolation, then reconcile post-hoc. This causes integration failures, redundant communication, and opaque execution.
|
|
28
|
+
|
|
29
|
+
Real engineering teams use **shared artifacts** — issue trackers, API specs, design docs. MAESTRO brings this model to AI agents:
|
|
30
|
+
|
|
31
|
+
- **SSOT Hub** — a SQLite-backed shared state for requirements, contracts, execution logs, and agent memory
|
|
32
|
+
- **Agent Harness** — file ACLs, command filtering, token/step budgets, and dangerous pattern detection
|
|
33
|
+
- **File & Shell Tools** — agents read/write actual files and execute shell commands through the sandbox
|
|
34
|
+
- **Auto-Loop** — an LLM-driven controller that plans, assigns, executes, validates, and commits autonomously
|
|
35
|
+
- **Real-time Observability** — streaming thinking content and tool calls as they happen
|
|
36
|
+
|
|
37
|
+
<p align="center">
|
|
38
|
+
<img src="docs/architecture.png" width="640" alt="System Architecture">
|
|
39
|
+
</p>
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
### Install (one line)
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
curl -sSL https://raw.githubusercontent.com/HYPERVAPOR/maestro-ssot/main/scripts/install.sh | bash
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Or manually with `uv`:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
uv tool install git+https://github.com/HYPERVAPOR/maestro-ssot.git
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Run a task (any directory)
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# Demo mode (deterministic, no LLM calls)
|
|
59
|
+
maestro run --demo "Implement JWT user authentication"
|
|
60
|
+
|
|
61
|
+
# Real LLM execution (requires API key)
|
|
62
|
+
maestro run "Implement JWT user authentication"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
MAESTRO auto-creates a `.maestro/` project directory on first use (Claude Code-style).
|
|
66
|
+
|
|
67
|
+
### Interactive REPL
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
maestro
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
╭──────────────────────────────────────────────────────────────────╮
|
|
75
|
+
│ MAESTRO-SSOT v0.1.0 │
|
|
76
|
+
│ Project: my-project (/home/user/my-project) │
|
|
77
|
+
│ Model: claude-sonnet-4-20250514 │
|
|
78
|
+
│ DB: .maestro/ssot.db (3 requirements) │
|
|
79
|
+
╰──────────────────────────────────────────────────────────────────╯
|
|
80
|
+
|
|
81
|
+
maestro> task Add user profile page with avatar upload
|
|
82
|
+
maestro> status
|
|
83
|
+
maestro> contracts
|
|
84
|
+
maestro> help
|
|
85
|
+
maestro> quit
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Evaluate & Benchmark
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Single-task comparison: MAESTRO vs baselines vs ablations
|
|
92
|
+
maestro eval "Implement a Todo REST API"
|
|
93
|
+
|
|
94
|
+
# Run only specific methods
|
|
95
|
+
maestro eval "Implement a Todo REST API" --method maestro-ssot --method single-agent
|
|
96
|
+
|
|
97
|
+
# Full Contract-Bench suite (10 tasks × 5 methods)
|
|
98
|
+
maestro benchmark --output-dir ./results
|
|
99
|
+
|
|
100
|
+
# Dry run to see what would be executed
|
|
101
|
+
maestro benchmark --dry-run
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### View project status
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
maestro status
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Displays a Rich-formatted view of the requirement tree, contract registry, and execution log.
|
|
111
|
+
|
|
112
|
+
### Initialize explicitly (optional)
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
maestro init . --name "my-project"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Most commands work without explicit `init` — the project is created lazily on first use.
|
|
119
|
+
|
|
120
|
+
## Docker
|
|
121
|
+
|
|
122
|
+
Build and run in a reproducible container:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
# Build
|
|
126
|
+
docker build -t maestro-ssot .
|
|
127
|
+
|
|
128
|
+
# Run benchmark
|
|
129
|
+
docker run -it --rm \
|
|
130
|
+
-e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
|
|
131
|
+
-v $(pwd)/output:/output \
|
|
132
|
+
maestro-ssot benchmark --output-dir /output
|
|
133
|
+
|
|
134
|
+
# Or use docker-compose
|
|
135
|
+
docker-compose run maestro
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Architecture
|
|
139
|
+
|
|
140
|
+
| Package | Role | Dependencies |
|
|
141
|
+
| ----------------- | ------------------------------------------------------------------------------------ | --------------------------------- |
|
|
142
|
+
| `maestro-ssot` | Shared state hub — requirements, contracts, logs, memory, versioning, access control | None |
|
|
143
|
+
| `maestro-harness` | Safety layer — file ACL, command filtering, budgets, sandbox, validator | None |
|
|
144
|
+
| `maestro-agents` | Role agents — Planner, Backend, Frontend, Test, Review | `maestro-ssot`, `maestro-harness` |
|
|
145
|
+
| `maestro-loop` | Autonomous controller — LLM-driven controller, scheduler, retry budget | All above |
|
|
146
|
+
| `maestro` (root) | CLI + TUI — Typer commands, Rich output, interactive REPL | All above |
|
|
147
|
+
|
|
148
|
+
### Key Design Principles
|
|
149
|
+
|
|
150
|
+
1. **Shared state over messages** — Agents read/write a structured SSOT, not chat logs
|
|
151
|
+
2. **Guarded execution** — Every agent action passes through ACL → validator → budget → sandbox
|
|
152
|
+
3. **Observable by default** — All mutations are logged; snapshots enable rollback
|
|
153
|
+
4. **Testable without LLMs** — PydanticAI TestModel enables deterministic CI testing
|
|
154
|
+
5. **Resilient by design** — Timeouts, retries, and graceful degradation on API failures
|
|
155
|
+
|
|
156
|
+
## Configuration
|
|
157
|
+
|
|
158
|
+
### Global: `~/.maestro/config.toml`
|
|
159
|
+
|
|
160
|
+
```toml
|
|
161
|
+
[llm]
|
|
162
|
+
provider = "anthropic"
|
|
163
|
+
model = "claude-sonnet-4-20250514"
|
|
164
|
+
thinking = true # Stream agent thinking content
|
|
165
|
+
|
|
166
|
+
[display]
|
|
167
|
+
verbose = false
|
|
168
|
+
log_level = "INFO"
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Project: `.maestro/config.toml`
|
|
172
|
+
|
|
173
|
+
```toml
|
|
174
|
+
[project]
|
|
175
|
+
name = "my-project"
|
|
176
|
+
|
|
177
|
+
[harness]
|
|
178
|
+
policy = ".maestro/policy.yaml"
|
|
179
|
+
|
|
180
|
+
[[agents]]
|
|
181
|
+
role = "planner"
|
|
182
|
+
|
|
183
|
+
[[agents]]
|
|
184
|
+
role = "backend"
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Harness Policy: `.maestro/policy.yaml`
|
|
188
|
+
|
|
189
|
+
```yaml
|
|
190
|
+
agents:
|
|
191
|
+
backend-1:
|
|
192
|
+
filesystem:
|
|
193
|
+
allow: ["src/**", "tests/**", "libs/**"]
|
|
194
|
+
deny: ["**/.env", "**/secrets*"]
|
|
195
|
+
commands:
|
|
196
|
+
allow: ["python*", "pytest*", "ruff*", "mypy*"]
|
|
197
|
+
budget:
|
|
198
|
+
max_tokens_per_task: 60000
|
|
199
|
+
max_steps_per_epoch: 20
|
|
200
|
+
max_wall_time_minutes: 10
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Contract-Bench
|
|
204
|
+
|
|
205
|
+
MAESTRO includes a built-in benchmark suite for evaluating multi-agent SE systems:
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
# List available tasks
|
|
209
|
+
python -c "from benchmarks.contract_bench.tasks import ALL_TASKS; print([t.TASK_ID for t in ALL_TASKS])"
|
|
210
|
+
|
|
211
|
+
# Evaluate a single task programmatically
|
|
212
|
+
from benchmarks.contract_bench.tasks import TASK_01_JWT_AUTH
|
|
213
|
+
result = TASK_01_JWT_AUTH.evaluate("./my-project")
|
|
214
|
+
print(result.score, result.passed)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
| Task | Description | Difficulty | Modules |
|
|
218
|
+
|------|-------------|------------|---------|
|
|
219
|
+
| CB-01 | JWT Auth + Login Page | Easy | backend, frontend |
|
|
220
|
+
| CB-02 | Todo API + SQLite + React | Medium | backend, frontend, database |
|
|
221
|
+
| CB-03 | Chat WebSocket + Redis | Hard | backend, frontend, messaging |
|
|
222
|
+
| CB-04 | User CRUD API | Easy | backend |
|
|
223
|
+
| CB-05 | File Upload/Download | Easy | backend, frontend |
|
|
224
|
+
| CB-06 | Payment Gateway | Medium | backend, frontend |
|
|
225
|
+
| CB-07 | Real-time Notifications (SSE) | Medium | backend, frontend |
|
|
226
|
+
| CB-08 | Full-text Search Engine | Medium | backend, frontend |
|
|
227
|
+
| CB-09 | Microservices Order System | Hard | backend, database, messaging |
|
|
228
|
+
| CB-10 | Collaborative Editor (OT/CRDT) | Hard | backend, frontend |
|
|
229
|
+
|
|
230
|
+
## Development
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
# Install with dev dependencies
|
|
234
|
+
uv sync
|
|
235
|
+
|
|
236
|
+
# Run all checks
|
|
237
|
+
uv run ruff check . # lint
|
|
238
|
+
uv run ruff format --check . # format
|
|
239
|
+
uv run mypy libs/ src/maestro/ # type-check
|
|
240
|
+
|
|
241
|
+
# Run tests (per-package to avoid conftest collision)
|
|
242
|
+
uv run pytest libs/maestro-ssot/tests/
|
|
243
|
+
uv run pytest libs/maestro-harness/tests/
|
|
244
|
+
uv run pytest libs/maestro-agents/tests/
|
|
245
|
+
uv run pytest tests/
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Project Status
|
|
249
|
+
|
|
250
|
+
| Phase | Description | Status |
|
|
251
|
+
| --------- | ------------------------------------------------------------------ | ------- |
|
|
252
|
+
| Phase 1 | SSOT core, Harness, Agents, CLI, TUI, REPL | Done |
|
|
253
|
+
| Phase 1.5 | CLI productization, Rich output, project management | Done |
|
|
254
|
+
| Phase 2 | LLM-driven Controller, streaming output, Contract-Bench v1 | Done |
|
|
255
|
+
| Phase 2.5 | Resilience layer (timeouts, retries), TUI animations | Done |
|
|
256
|
+
| Phase 3 | 10-task benchmark, baselines, ablation studies, data pipeline | Done |
|
|
257
|
+
| Phase 3.6 | Agent file/shell tools, SWE-bench style task validation | Done |
|
|
258
|
+
| Phase 4 | Paper submission (ICSE/FSE/ASE) | In Progress |
|
|
259
|
+
|
|
260
|
+
## Research
|
|
261
|
+
|
|
262
|
+
This project is the reference implementation for our research on multi-agent software engineering. See the [research proposal](docs/rp.md) for the full technical motivation.
|
|
263
|
+
|
|
264
|
+
**Core hypothesis**: Explicit shared-state mediation (SSOT) combined with runtime guardrails (Harness) enables multi-agent SE systems to achieve higher integration success rates and lower communication overhead than message-passing alternatives.
|
|
265
|
+
|
|
266
|
+
## License
|
|
267
|
+
|
|
268
|
+
MIT
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Releasing MAESTRO-SSOT
|
|
2
|
+
|
|
3
|
+
## Prerequisites
|
|
4
|
+
|
|
5
|
+
1. **PyPI account** with `maestro-se`, `maestro-ssot`, `maestro-harness`, `maestro-loop`, `maestro-agents` package names registered (or reserved).
|
|
6
|
+
2. **GitHub Repository** with Trusted Publishing configured:
|
|
7
|
+
- Go to [PyPI → Account → Publishing](https://pypi.org/manage/account/publishing/)
|
|
8
|
+
- Add a new pending publisher:
|
|
9
|
+
- **PyPI Project Name**: `maestro-se`
|
|
10
|
+
- **Owner**: `HYPERVAPOR`
|
|
11
|
+
- **Repository name**: `maestro-ssot`
|
|
12
|
+
- **Workflow name**: `release.yml`
|
|
13
|
+
- **Environment name**: `pypi`
|
|
14
|
+
- Repeat for each workspace package: `maestro-ssot`, `maestro-harness`, `maestro-loop`, `maestro-agents`
|
|
15
|
+
|
|
16
|
+
> Alternatively, you can use a PyPI API token stored as a GitHub secret (`PYPI_API_TOKEN`) and modify `.github/workflows/release.yml` to use `uv publish --token $PYPI_TOKEN` instead of `--trusted-publishing always`.
|
|
17
|
+
|
|
18
|
+
## Release Steps
|
|
19
|
+
|
|
20
|
+
### 1. Update the version
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# Edit version in all pyproject.toml files
|
|
24
|
+
# Root package
|
|
25
|
+
sed -i 's/version = "0.1.0"/version = "0.2.0"/' pyproject.toml
|
|
26
|
+
# Lib packages
|
|
27
|
+
for f in libs/*/pyproject.toml; do
|
|
28
|
+
sed -i 's/version = "0.1.0"/version = "0.2.0"/' "$f"
|
|
29
|
+
done
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Or use a script:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
./scripts/bump-version.sh 0.2.0
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 2. Update CHANGELOG (optional)
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Add release notes to CHANGELOG.md
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### 3. Commit and tag
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
git add -A
|
|
48
|
+
git commit -m "release: v0.2.0"
|
|
49
|
+
git tag v0.2.0
|
|
50
|
+
git push origin main
|
|
51
|
+
git push origin v0.2.0
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### 4. CI handles the rest
|
|
55
|
+
|
|
56
|
+
The `release.yml` workflow will automatically:
|
|
57
|
+
1. Build wheels and source distributions for all workspace packages
|
|
58
|
+
2. Publish them to PyPI via trusted publishing
|
|
59
|
+
3. Create a GitHub Release with auto-generated notes and attached artifacts
|
|
60
|
+
|
|
61
|
+
### 5. Verify
|
|
62
|
+
|
|
63
|
+
Wait for the workflow to complete (~2 minutes), then verify:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Check PyPI
|
|
67
|
+
curl -s https://pypi.org/pypi/maestro-se/json | python -c "import sys,json; print(json.load(sys.stdin)['info']['version'])""
|
|
68
|
+
|
|
69
|
+
# Test install
|
|
70
|
+
uv tool install maestro-se
|
|
71
|
+
maestro --version
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Local Build & Test (without publishing)
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# Build all packages
|
|
78
|
+
uv build --all-packages
|
|
79
|
+
|
|
80
|
+
# Inspect the built artifacts
|
|
81
|
+
ls -la dist/
|
|
82
|
+
|
|
83
|
+
# Test install from local build
|
|
84
|
+
uv tool install --reinstall dist/maestro-*.whl
|
|
85
|
+
maestro --version
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## One-Line Install
|
|
89
|
+
|
|
90
|
+
After a release is published, users can install MAESTRO with:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
# Via install.sh (auto-detects uv/pipx/pip)
|
|
94
|
+
curl -sSL https://raw.githubusercontent.com/HYPERVAPOR/maestro-ssot/main/scripts/install.sh | bash
|
|
95
|
+
|
|
96
|
+
# Or directly with uv
|
|
97
|
+
uv tool install maestro
|
|
98
|
+
|
|
99
|
+
# Or with pipx
|
|
100
|
+
pipx install maestro-se
|
|
101
|
+
|
|
102
|
+
# Or with pip
|
|
103
|
+
pip install --user maestro-se
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Uninstall
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
curl -sSL https://raw.githubusercontent.com/HYPERVAPOR/maestro-ssot/main/scripts/install.sh | MAESTRO_UNINSTALL=1 bash
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Troubleshooting
|
|
113
|
+
|
|
114
|
+
### Trusted Publishing fails
|
|
115
|
+
|
|
116
|
+
If the `publish-pypi` job fails with an authentication error:
|
|
117
|
+
1. Verify the PyPI pending publisher configuration matches exactly (owner, repo, workflow name, environment)
|
|
118
|
+
2. Check that the `permissions: id-token: write` is set in the workflow
|
|
119
|
+
3. As a fallback, create a PyPI API token and add it as a GitHub secret named `PYPI_API_TOKEN`, then update the workflow:
|
|
120
|
+
```yaml
|
|
121
|
+
- name: Publish to PyPI
|
|
122
|
+
run: uv publish --token ${{ secrets.PYPI_API_TOKEN }} dist/*
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Package name conflicts on PyPI
|
|
126
|
+
|
|
127
|
+
The root package is published as `maestro-se` because `maestro` is already occupied on PyPI (by a vision-language model tool). The CLI command remains `maestro` regardless of the PyPI package name.
|
|
128
|
+
|
|
129
|
+
If any other package names are taken, you can:
|
|
130
|
+
1. Use a different namespace
|
|
131
|
+
2. Request a name transfer from the current owner
|
|
132
|
+
3. Use a private index (e.g., GitHub Packages, private PyPI)
|
|
133
|
+
|
|
134
|
+
### Install.sh fails
|
|
135
|
+
|
|
136
|
+
Test locally before pushing:
|
|
137
|
+
```bash
|
|
138
|
+
bash scripts/install.sh
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
With a specific version:
|
|
142
|
+
```bash
|
|
143
|
+
MAESTRO_VERSION=0.2.0 bash scripts/install.sh
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
With uninstall:
|
|
147
|
+
```bash
|
|
148
|
+
MAESTRO_UNINSTALL=1 bash scripts/install.sh
|
|
149
|
+
```
|