opalacoder 0.1.0__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opalacoder-0.1.3/.claude/settings.json +20 -0
- opalacoder-0.1.3/.gitignore +128 -0
- opalacoder-0.1.0/AGENT.md → opalacoder-0.1.3/AGENTS.md +4 -0
- opalacoder-0.1.3/ARCH_SUMMARY.md +201 -0
- opalacoder-0.1.3/CLAUDE.md +35 -0
- opalacoder-0.1.3/FLOW_MAP.md +230 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/GEMINI.md +4 -0
- opalacoder-0.1.3/PKG-INFO +222 -0
- opalacoder-0.1.3/README.md +197 -0
- opalacoder-0.1.3/agents.yaml +95 -0
- opalacoder-0.1.3/docs/ALGS.md +257 -0
- opalacoder-0.1.3/docs/figs/logotipo.png +0 -0
- opalacoder-0.1.3/docs/paper_memplan.md +257 -0
- opalacoder-0.1.3/docs/skills-plugin-system.md +218 -0
- opalacoder-0.1.3/docs/specs1.md +16 -0
- opalacoder-0.1.3/docs/specs2.md +1 -0
- opalacoder-0.1.3/docs/specs3.md +0 -0
- opalacoder-0.1.3/experiments/direct_gemini_test.py +27 -0
- opalacoder-0.1.3/experiments/run_micalc_test.py +145 -0
- opalacoder-0.1.3/experiments/test_complexity.py +1 -0
- opalacoder-0.1.3/experiments/test_gemini.py +20 -0
- opalacoder-0.1.3/experiments/test_gemini_sequences.py +52 -0
- opalacoder-0.1.3/experiments/test_gemma4.py +94 -0
- opalacoder-0.1.3/experiments/test_llm.py +58 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/agents.py +94 -60
- opalacoder-0.1.3/opalacoder/archival.py +93 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/cli.py +151 -33
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/cli_commands.py +6 -3
- opalacoder-0.1.3/opalacoder/code_index.py +681 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/config.py +16 -2
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/embeddings.py +3 -1
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/i18n.py +22 -0
- opalacoder-0.1.3/opalacoder/memgpt.py +463 -0
- opalacoder-0.1.3/opalacoder/orchestrator.py +165 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/planner.py +6 -1
- opalacoder-0.1.3/opalacoder/plugins/__init__.py +0 -0
- opalacoder-0.1.3/opalacoder/plugins/default_reviewer.py +76 -0
- opalacoder-0.1.3/opalacoder/plugins/html_css_js_tools.py +501 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/project.py +35 -5
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/skills.py +129 -2
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/structured.py +49 -58
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/terminal.py +62 -0
- opalacoder-0.1.3/opalacoder/tools.py +856 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/vcs.py +32 -37
- opalacoder-0.1.3/opalacoder/workflow_orchestrator.py +1414 -0
- opalacoder-0.1.3/opalacoder/workflow_tools.py +268 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/pyproject.toml +6 -1
- opalacoder-0.1.3/pytest.ini +9 -0
- opalacoder-0.1.3/requirements.txt +11 -0
- opalacoder-0.1.3/skills/debug.md +36 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/skills/html_css_js.md +3 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_agent_config.py +13 -16
- opalacoder-0.1.3/tests/test_code_index_integration.py +460 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_intent_classifier.py +15 -7
- opalacoder-0.1.3/tests/test_planner_oracle_live.py +406 -0
- opalacoder-0.1.3/tests/test_planner_output.py +189 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_refinement_loop.py +32 -91
- opalacoder-0.1.3/tests/test_search_bugs.py +235 -0
- opalacoder-0.1.3/tests/test_verification_strategies.py +440 -0
- opalacoder-0.1.3/tests/test_workflow_pipeline.py +1045 -0
- opalacoder-0.1.0/.gitignore +0 -8
- opalacoder-0.1.0/ALGS.md +0 -47
- opalacoder-0.1.0/ARCH_SUMMARY.md +0 -76
- opalacoder-0.1.0/CLAUDE.md +0 -1
- opalacoder-0.1.0/PKG-INFO +0 -230
- opalacoder-0.1.0/README.md +0 -210
- opalacoder-0.1.0/agents.yaml +0 -78
- opalacoder-0.1.0/opalacoder/orchestrator.py +0 -381
- opalacoder-0.1.0/opalacoder/tools.py +0 -351
- opalacoder-0.1.0/rename_project.py +0 -57
- opalacoder-0.1.0/requirements.txt +0 -6
- opalacoder-0.1.0/test_complexity.py +0 -13
- opalacoder-0.1.0/tests/test_executor_context_extraction.py +0 -119
- opalacoder-0.1.0/tests/test_new_features.py +0 -262
- opalacoder-0.1.0/tests/test_orchestrator_prompt.py +0 -112
- opalacoder-0.1.0/tests/test_pipeline_integration.py +0 -225
- opalacoder-0.1.0/tests/test_planner_flow.py +0 -146
- opalacoder-0.1.0/tests/test_project_dir_isolation.py +0 -248
- {opalacoder-0.1.0 → opalacoder-0.1.3}/.github/workflows/publish.yml +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3/debug}/scratch_tail.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3/docs}/analysis_results.md +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/main.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/__init__.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/api_keys.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/opalacoder/session.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/skills/generaldeveloper.md +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/skills/opalacoder.md +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/skills/python_subprocess.md +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/skills/react_vite.md +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_complexity_evaluator.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_context_guard.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_double_inference.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_i18n_coverage.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_project_store.py +0 -0
- {opalacoder-0.1.0 → opalacoder-0.1.3}/tests/test_skills_loading.py +0 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(grep -A 30 \"checkAndEnforceHardFail\\\\|improve.*loop\\\\|composite.*tool\" /tmp/smallcode/bin/smallcode.js | head -60)",
|
|
5
|
+
"Read(//tmp/smallcode/bin/**)",
|
|
6
|
+
"Bash(python -c ' *)",
|
|
7
|
+
"Read(//home/gilzamir/**)",
|
|
8
|
+
"Bash(sed -n '/DEBUG PLAN JSON/,/\\\\/DEBUG PLAN JSON/p' /home/gilzamir/log.log)",
|
|
9
|
+
"Read(//home/gilzamir/micalc/**)",
|
|
10
|
+
"Bash(git stash *)",
|
|
11
|
+
"Bash(python -m pytest tests/ -q --ignore=tests/test_planner_output.py --ignore=tests/test_agent_config.py -k \"not llm and asyncio\")",
|
|
12
|
+
"Bash(python -m pytest tests/ -q --ignore=tests/test_planner_output.py --ignore=tests/test_agent_config.py -k \"not llm\")",
|
|
13
|
+
"Bash(python -m pytest tests/test_workflow_pipeline.py tests/test_verification_strategies.py -q -k \"asyncio or \\(not llm\\)\")",
|
|
14
|
+
"Bash(python -m pytest tests/ -x -q)"
|
|
15
|
+
],
|
|
16
|
+
"additionalDirectories": [
|
|
17
|
+
"/home/gilzamir"
|
|
18
|
+
]
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# --- Project-specific ---
|
|
2
|
+
*.env
|
|
3
|
+
.env
|
|
4
|
+
*.env*
|
|
5
|
+
.venv
|
|
6
|
+
*.venv
|
|
7
|
+
node_modules/
|
|
8
|
+
plan.md
|
|
9
|
+
|
|
10
|
+
# --- Project runtime data ---
|
|
11
|
+
.opalacoder/
|
|
12
|
+
|
|
13
|
+
# --- Python bytecode & caches ---
|
|
14
|
+
__pycache__/
|
|
15
|
+
*.py[cod]
|
|
16
|
+
*$py.class
|
|
17
|
+
*.pyo
|
|
18
|
+
|
|
19
|
+
# --- C extensions ---
|
|
20
|
+
*.so
|
|
21
|
+
|
|
22
|
+
# --- Distribution / packaging ---
|
|
23
|
+
.Python
|
|
24
|
+
build/
|
|
25
|
+
develop-eggs/
|
|
26
|
+
dist/
|
|
27
|
+
downloads/
|
|
28
|
+
eggs/
|
|
29
|
+
.eggs/
|
|
30
|
+
lib/
|
|
31
|
+
lib64/
|
|
32
|
+
parts/
|
|
33
|
+
sdist/
|
|
34
|
+
var/
|
|
35
|
+
wheels/
|
|
36
|
+
share/python-wheels/
|
|
37
|
+
*.egg-info/
|
|
38
|
+
.installed.cfg
|
|
39
|
+
*.egg
|
|
40
|
+
MANIFEST
|
|
41
|
+
|
|
42
|
+
# --- PyInstaller ---
|
|
43
|
+
*.manifest
|
|
44
|
+
*.spec
|
|
45
|
+
|
|
46
|
+
# --- Installer logs ---
|
|
47
|
+
pip-log.txt
|
|
48
|
+
pip-delete-this-directory.txt
|
|
49
|
+
|
|
50
|
+
# --- Testing & coverage ---
|
|
51
|
+
htmlcov/
|
|
52
|
+
.tox/
|
|
53
|
+
.nox/
|
|
54
|
+
.coverage
|
|
55
|
+
.coverage.*
|
|
56
|
+
.cache
|
|
57
|
+
nosetests.xml
|
|
58
|
+
coverage.xml
|
|
59
|
+
*.cover
|
|
60
|
+
*.py,cover
|
|
61
|
+
.hypothesis/
|
|
62
|
+
.pytest_cache/
|
|
63
|
+
cover/
|
|
64
|
+
|
|
65
|
+
# --- Translations ---
|
|
66
|
+
*.mo
|
|
67
|
+
*.pot
|
|
68
|
+
|
|
69
|
+
# --- Databases ---
|
|
70
|
+
*.sqlite
|
|
71
|
+
*.sqlite3
|
|
72
|
+
*.sqlite3-journal
|
|
73
|
+
*.sqlite-shm
|
|
74
|
+
*.sqlite-wal
|
|
75
|
+
|
|
76
|
+
# --- Logging ---
|
|
77
|
+
*.log
|
|
78
|
+
|
|
79
|
+
# --- Virtual environments ---
|
|
80
|
+
env/
|
|
81
|
+
venv/
|
|
82
|
+
ENV/
|
|
83
|
+
env.bak/
|
|
84
|
+
venv.bak/
|
|
85
|
+
|
|
86
|
+
# --- Jupyter ---
|
|
87
|
+
.ipynb_checkpoints
|
|
88
|
+
|
|
89
|
+
# --- IPython ---
|
|
90
|
+
profile_default/
|
|
91
|
+
ipython_config.py
|
|
92
|
+
|
|
93
|
+
# --- PEP 582 ---
|
|
94
|
+
__pypackages__/
|
|
95
|
+
|
|
96
|
+
# --- Type checkers ---
|
|
97
|
+
.mypy_cache/
|
|
98
|
+
.dmypy.json
|
|
99
|
+
dmypy.json
|
|
100
|
+
.pyre/
|
|
101
|
+
.pytype/
|
|
102
|
+
|
|
103
|
+
# --- Build tools ---
|
|
104
|
+
.pybuilder/
|
|
105
|
+
target/
|
|
106
|
+
cython_debug/
|
|
107
|
+
docs/_build/
|
|
108
|
+
/site
|
|
109
|
+
|
|
110
|
+
# --- Misc Python tools ---
|
|
111
|
+
.scrapy
|
|
112
|
+
instance/
|
|
113
|
+
.webassets-cache
|
|
114
|
+
celerybeat-schedule
|
|
115
|
+
celerybeat.pid
|
|
116
|
+
*.sage.py
|
|
117
|
+
.spyderproject
|
|
118
|
+
.spyproject
|
|
119
|
+
.ropeproject
|
|
120
|
+
local_settings.py
|
|
121
|
+
|
|
122
|
+
# --- IDEs / Editors ---
|
|
123
|
+
.vscode/
|
|
124
|
+
.idea/
|
|
125
|
+
*.swp
|
|
126
|
+
*.swo
|
|
127
|
+
*~
|
|
128
|
+
*.db
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# OpalaCoder — Architecture Summary
|
|
2
|
+
|
|
3
|
+
## Focus
|
|
4
|
+
|
|
5
|
+
OpalaCoder is designed to work with **small language models** (e.g. Ministral-3 14B, Gemma 4 via Ollama) as its primary target. The core design prioritizes correctness and usability under constrained context windows and limited reasoning capacity. Larger models are supported as an alternative for complex tasks via the `alternative` key in `agents.yaml`.
|
|
6
|
+
|
|
7
|
+
## Core Idea: Project-Centric Context Management
|
|
8
|
+
|
|
9
|
+
The central abstraction is the **project**, not the session. Every interaction happens within a named project that has a fixed filesystem path. This design exists for one reason: **context management**.
|
|
10
|
+
|
|
11
|
+
Small models degrade quickly when context is large, mixed, or unbounded. By anchoring all activity to a project, OpalaCoder can:
|
|
12
|
+
|
|
13
|
+
- Inject a stable, minimal project header (`[PROJECT: name | PATH: /path]`) into every prompt instead of growing conversation state
|
|
14
|
+
- Load only the skills relevant to that project type (selected at creation, not dynamically per-request)
|
|
15
|
+
- Scope all file reads, writes, and commands to the project directory, eliminating ambiguity
|
|
16
|
+
- Persist history, plan state, and skill configuration per project in SQLite, keeping each LLM call focused
|
|
17
|
+
|
|
18
|
+
## Architecture
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
CLI (cli.py)
|
|
22
|
+
├── startup_menu() — load or create project
|
|
23
|
+
├── _create_project() — name + path + description → LLM selects skills
|
|
24
|
+
└── repl_loop() — main REPL; loads project-scoped skills once
|
|
25
|
+
├── REPLState — session container; holds chat_agent, intent_classifier,
|
|
26
|
+
│ complexity_evaluator (created once, reused per turn)
|
|
27
|
+
├── run_pipeline() — triggers orchestration for planning tasks
|
|
28
|
+
│ └── get_orchestrator(strategy) — reads strategy from agents.yaml,
|
|
29
|
+
│ └── OrchestratorRegistry looks up registered class, instantiates it
|
|
30
|
+
│ └── WorkflowOrchestratorStrategy (default: strategy: workflow)
|
|
31
|
+
│ ├── _plan_and_refine() phase 1+2: panorama + user refinement
|
|
32
|
+
│ └── _orchestration_loop() phase 3: Plan→Execute→Verify
|
|
33
|
+
│ ├── _oracle(PlanOutput) — task decomposition
|
|
34
|
+
│ ├── _run_worker(task) — per-command LLMAgentBlock
|
|
35
|
+
│ └── _oracle(VerifyOutput) — acceptance check
|
|
36
|
+
└── chat_agent — conversational assistant with memory
|
|
37
|
+
|
|
38
|
+
Orchestrator Registry (orchestrator.py)
|
|
39
|
+
├── register_orchestrator(name) — class decorator; maps name → strategy class
|
|
40
|
+
├── get_orchestrator(name, model)— instantiates registered strategy; raises ValueError
|
|
41
|
+
│ for unknown names (fails loudly on misconfiguration)
|
|
42
|
+
├── CHECKPOINT_SUBPATH — shared constant ".opalacoder/session_state.json"
|
|
43
|
+
└── BaseOrchestratorStrategy — abstract base; subclasses implement run()
|
|
44
|
+
|
|
45
|
+
WorkflowOrchestratorStrategy (workflow_orchestrator.py)
|
|
46
|
+
├── Task schema (Pydantic)
|
|
47
|
+
│ ├── id — short unique identifier (t1, t2, ...)
|
|
48
|
+
│ ├── goal — one-sentence objective: what + why
|
|
49
|
+
│ ├── commands — ordered list of atomic worker steps
|
|
50
|
+
│ ├── related_files — files worker must read before acting
|
|
51
|
+
│ ├── context — operational detail: class names, IDs, APIs, contracts
|
|
52
|
+
│ │ (field_validator coerces dict/list → JSON string for
|
|
53
|
+
│ │ models that generate object-typed context)
|
|
54
|
+
│ └── depends_on — explicit ordering between tasks
|
|
55
|
+
├── _validate_task() — semantic guardrail: rejects empty goal/commands/context
|
|
56
|
+
│ and CSS/JS tasks without related_files
|
|
57
|
+
├── _oracle() — litellm call with JSON mode + structural + semantic reflection
|
|
58
|
+
│ DEBUG prints raw content/reasoning per attempt to stderr
|
|
59
|
+
├── _run_worker() — iterates task.commands; each command = one LLMAgentBlock call
|
|
60
|
+
│ with context_block preamble (TASK GOAL / RELATED FILES /
|
|
61
|
+
│ CONTEXT / COMMAND) injected into every worker prompt
|
|
62
|
+
│ Worker uses termination_tools=["send_message"] so the loop
|
|
63
|
+
│ stops immediately when send_message is called
|
|
64
|
+
└── _planner_system() — system prompt with full Task schema + field rules + examples
|
|
65
|
+
includes hard rule: redeclaration fix = delete the line,
|
|
66
|
+
never change const→let
|
|
67
|
+
|
|
68
|
+
Project (project.py)
|
|
69
|
+
├── ProjectData — name, path, skills, description, history, plan state
|
|
70
|
+
└── ProjectStore — SQLite CRUD for projects and message history
|
|
71
|
+
|
|
72
|
+
Skills (skills.py)
|
|
73
|
+
├── load_project_skills() — loads only skills listed in the project
|
|
74
|
+
├── select_skills_for_project() — LLM picks skills from description at creation
|
|
75
|
+
├── find_skill_file() — resolves <name>.md across skill search dirs
|
|
76
|
+
└── get_relevant_skills_llm() — semantic router (uses project skills only)
|
|
77
|
+
|
|
78
|
+
Tools (tools.py) — file read/write, run_command, search_code, ask_human
|
|
79
|
+
├── _PROJECT_PATH global — all tools resolve paths relative to the project dir
|
|
80
|
+
├── write_file — writes file + runs _auto_lint(); returns lint errors
|
|
81
|
+
│ to worker so reflection loop can self-correct
|
|
82
|
+
└── _auto_lint() — py_compile for .py, node --check for .js/.ts/.jsx/.tsx
|
|
83
|
+
|
|
84
|
+
Workflow Tools (workflow_tools.py)
|
|
85
|
+
├── find_symbol — index-backed, all languages, falls back to grep
|
|
86
|
+
├── find_callers — reverse call graph tool exposed to workers
|
|
87
|
+
├── edit_file — atomic find-replace + auto-lint + reindex
|
|
88
|
+
│ `line` param resolves ambiguous old_str by proximity
|
|
89
|
+
│ new_str="" deletes the matched line
|
|
90
|
+
└── read_file — token-aware: full content or AST overview + anchors
|
|
91
|
+
|
|
92
|
+
Code Index (code_index.py) — multi-language symbol index, SQLite-backed
|
|
93
|
+
├── CODE_INDEX singleton — one instance shared by all tools and orchestrators
|
|
94
|
+
├── set_project(root) — opens/creates .opalacoder/code_index.sqlite
|
|
95
|
+
├── build() — full incremental scan (skips unchanged files by mtime)
|
|
96
|
+
├── rebuild_file(path) — called automatically by write_file and edit_file
|
|
97
|
+
├── search(query) — exact → prefix → substring match across all languages
|
|
98
|
+
├── find_callers(name) — reverse call graph: who calls this symbol?
|
|
99
|
+
├── symbols_in_file(rel) — used by get_file_overview for any language
|
|
100
|
+
└── project_snapshot() — symbol-enriched file listing fed to oracle prompts
|
|
101
|
+
|
|
102
|
+
VCS (vcs.py) — shadow git strategies
|
|
103
|
+
├── _run_shadow_git(cmd, project_path) — always receives explicit project_path;
|
|
104
|
+
│ never uses get_project_path() global
|
|
105
|
+
├── AutoGitStrategy — setup() + manual_commit() before execution (default)
|
|
106
|
+
├── HybridGitStrategy — same + exposes git tools to workers
|
|
107
|
+
├── AgentDrivenGitStrategy — workers have full git control
|
|
108
|
+
└── NoGitStrategy — VCS disabled
|
|
109
|
+
Auto-checkpoint flow:
|
|
110
|
+
set_project_context() called first → get_project_path() correct →
|
|
111
|
+
_vcs.setup() → _vcs.manual_commit("auto-checkpoint before plan execution") →
|
|
112
|
+
/undo calls undo_last() → reset --hard HEAD~1 on shadow git
|
|
113
|
+
|
|
114
|
+
Skill search order:
|
|
115
|
+
1. {project_path}/skills/ (project-local, highest priority)
|
|
116
|
+
2. {repo_root}/skills/ (OpalaCoder built-in skills)
|
|
117
|
+
3. ~/.opalacoder/skills/ (user global skills)
|
|
118
|
+
|
|
119
|
+
Benchmark (scripts/)
|
|
120
|
+
├── collect_jsbench.py — collects JS bug-fix instances from GitHub
|
|
121
|
+
│ uses timeline API (not search) to find linked PRs
|
|
122
|
+
│ filters: has jest/vitest, pure .js files, ≤3 files changed
|
|
123
|
+
└── eval_jsbench.py — evaluates OpalaCoder on collected instances
|
|
124
|
+
clone at base_commit → npm test (before) →
|
|
125
|
+
run OpalaCoder → npm test (after) → pass/fail
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Adding a New Orchestrator Strategy
|
|
129
|
+
|
|
130
|
+
1. Create a class that extends `BaseOrchestratorStrategy` and implements `async run()`.
|
|
131
|
+
2. Decorate it with `@register_orchestrator("my_strategy_name")`.
|
|
132
|
+
3. Set `strategy: my_strategy_name` under `agents.orchestrator` in `agents.yaml`.
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
@register_orchestrator("my_strategy")
|
|
136
|
+
class MyOrchestratorStrategy(BaseOrchestratorStrategy):
|
|
137
|
+
async def run(self, user_request: str, history: str, **kwargs) -> str:
|
|
138
|
+
...
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Key Decisions
|
|
142
|
+
|
|
143
|
+
| Decision | Reason |
|
|
144
|
+
|---|---|
|
|
145
|
+
| Project replaces session as primary abstraction | Stable context anchor; avoids unbounded session drift |
|
|
146
|
+
| Skills fixed at project creation | Prevents irrelevant skill injection; reduces prompt size for small models |
|
|
147
|
+
| Orchestrator registry (`register_orchestrator`) | Allows multiple strategies selectable via `agents.yaml`; fails loudly on unknown names |
|
|
148
|
+
| `strategy: workflow` in `agents.yaml` | WorkflowOrchestratorStrategy is the current default |
|
|
149
|
+
| Classifiers instantiated once in `REPLState` | Avoids creating a new `LLMAgentBlock` on every user turn |
|
|
150
|
+
| All tools use `_PROJECT_PATH` as `cwd` | Eliminates path ambiguity; agent does not need to reason about absolute paths |
|
|
151
|
+
| SQLite persistence | Lightweight, zero-dependency, suitable for local-first tooling |
|
|
152
|
+
| Code index in SQLite | Survives session restarts; incremental rebuild by mtime avoids re-parsing unchanged files |
|
|
153
|
+
| `project_snapshot()` replaces plain file listing | Oracle receives symbol names per file → plans tasks at function granularity |
|
|
154
|
+
| Structured `Task` schema with semantic validation | Forces the planner to externalize class names, IDs, and file contracts |
|
|
155
|
+
| `_validate_task()` in oracle reflection loop | Rejects incomplete plans before they reach workers |
|
|
156
|
+
| Commands list in `Task` (not monolithic description) | Each atomic step runs as a separate `LLMAgentBlock` call; shorter prompts, tighter focus |
|
|
157
|
+
| `context_block` preamble injected into every worker prompt | Worker never needs to infer cross-file contracts |
|
|
158
|
+
| `_auto_lint()` called in both `edit_file` and `write_file` | Syntax errors surface as tool return values; worker self-corrects |
|
|
159
|
+
| `reasoning_effort: "none"` for workers and classifiers | Gemma4/Ministral on Ollama leave `tool_calls` empty when thinking is enabled |
|
|
160
|
+
| `max_iterations=None` for workers | Worker bounded by `max_tool_calls` only |
|
|
161
|
+
| `termination_tools=["send_message"]` in LLMAgentBlock | Worker loop stops immediately when send_message is called; requires agenticblocks ≥ commit 98795f3 |
|
|
162
|
+
| `field_validator` on `Task.context` | Coerces dict/list → JSON string for models (e.g. Mistral) that generate object-typed context |
|
|
163
|
+
| `edit_file` `line` param | Resolves ambiguous old_str by picking occurrence closest to given line number |
|
|
164
|
+
| `set_project_context()` before VCS checkpoint | Ensures `get_project_path()` returns the correct user project path, not OpalaCoder's own root |
|
|
165
|
+
| Intent classifier prompt in English only | LLM generalizes to PT-BR from English examples; PT-BR examples in LLM prompts cause confusion |
|
|
166
|
+
| `ministral-3:14b` as default model | Better instruction-following than mistral-nemo for structured JSON; faster than gemma4 |
|
|
167
|
+
|
|
168
|
+
## Workflow Orchestrator: Plan→Execute→Verify Loop
|
|
169
|
+
|
|
170
|
+
```
|
|
171
|
+
while heartbeats < max_hb:
|
|
172
|
+
1. PLAN — _oracle(PlanOutput) decomposes request into structured Tasks
|
|
173
|
+
_validate_task() runs semantic checks; reflection on failure
|
|
174
|
+
2. EXECUTE — for each task:
|
|
175
|
+
for each command in task.commands:
|
|
176
|
+
LLMAgentBlock(prompt=context_block + command,
|
|
177
|
+
termination_tools=["send_message"])
|
|
178
|
+
worker calls read_file / edit_file / write_file / run_command
|
|
179
|
+
lint errors returned as tool output → self-correction loop
|
|
180
|
+
loop exits immediately on send_message call
|
|
181
|
+
3. VERIFY — _oracle(VerifyOutput) reads actual file contents on disk
|
|
182
|
+
done=True → exit
|
|
183
|
+
done=False → corrections (structured Tasks) → back to EXECUTE
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Skill Scopes
|
|
187
|
+
|
|
188
|
+
Skills carry a `scope` frontmatter field:
|
|
189
|
+
|
|
190
|
+
- `all` — injected into both the intent classifier and the orchestrator
|
|
191
|
+
- `orchestrator` — injected only into the planner/executor
|
|
192
|
+
- `classifier` — injected only into the intent classifier
|
|
193
|
+
|
|
194
|
+
## Known Removed / Dead Code
|
|
195
|
+
|
|
196
|
+
- `AutonomousOrchestratorStrategy`, `autonomous_orchestrator.py` — superseded by `WorkflowOrchestratorStrategy`
|
|
197
|
+
- `profile_executor.py`, `profiles.py`, `profiles/` — profile-based execution replaced by skill system
|
|
198
|
+
- `make_confirmation_agent()` — duplicated logic already in `confirm_plan()` in `structured.py`
|
|
199
|
+
- `DeterministicOrchestratorStrategy` — described in prior versions but never implemented
|
|
200
|
+
- `SubplanSchema`, `DecompositionResult`, `decompose_to_subplans()` — belonged to unimplemented deterministic path
|
|
201
|
+
- `Task.description` (string) — replaced by structured `Task` schema (goal + commands + related_files + context + depends_on)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# General Rules
|
|
2
|
+
|
|
3
|
+
1. Don't make baseless assumptions; before drawing any conclusions, conduct tests and analysis to gather as much information as possible.
|
|
4
|
+
|
|
5
|
+
# Project Guide
|
|
6
|
+
|
|
7
|
+
## Language
|
|
8
|
+
|
|
9
|
+
All code, comments, and documentation must be written in **English**.
|
|
10
|
+
|
|
11
|
+
## Architecture
|
|
12
|
+
|
|
13
|
+
This project uses the **AgenticBlocks.IO** framework.
|
|
14
|
+
Before starting any task, read the library source and documentation at:
|
|
15
|
+
https://github.com/gilzamir/agenticblocks
|
|
16
|
+
|
|
17
|
+
Key things to understand from that repo:
|
|
18
|
+
- How blocks are structured and composed
|
|
19
|
+
- How agents communicate and dispatch events
|
|
20
|
+
- Naming conventions used throughout the framework
|
|
21
|
+
|
|
22
|
+
## Project Context
|
|
23
|
+
|
|
24
|
+
Read `ARCH_SUMMARY.md` before making changes. It contains the current project status,
|
|
25
|
+
known issues, and decisions already made. Do not re-litigate what is documented there.
|
|
26
|
+
|
|
27
|
+
## Build & Test Commands
|
|
28
|
+
Run tests on tests dir after you implement a new feature.
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
> Fill in your actual commands below — this is the highest-value section.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
python -m pytest
|
|
35
|
+
```
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# OpalaCoder — Mapa do Fluxo de Execução
|
|
2
|
+
|
|
3
|
+
> Arquivo gerado para auditoria. Referências de linha apontam para o estado atual do código.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Visão geral (entrada → saída)
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
User request
|
|
11
|
+
│
|
|
12
|
+
▼
|
|
13
|
+
[WorkflowOrchestrator.run()] workflow_orchestrator.py:697
|
|
14
|
+
│
|
|
15
|
+
├─ Setup: VCS checkpoint, code index, skill tools/reviewers
|
|
16
|
+
│
|
|
17
|
+
▼
|
|
18
|
+
[_orchestration_loop()] workflow_orchestrator.py:1157
|
|
19
|
+
│
|
|
20
|
+
├─ PRÉ-PLANNING: skill tool pre-scan ← NOVO
|
|
21
|
+
│
|
|
22
|
+
▼
|
|
23
|
+
[Planner Oracle] → PlanOutput workflow_orchestrator.py:1245
|
|
24
|
+
│
|
|
25
|
+
└─ loop: para cada task pendente
|
|
26
|
+
│
|
|
27
|
+
├─ VCS checkpoint (rollback point)
|
|
28
|
+
├─ errors_before = _run_skill_scan() ← NOVO
|
|
29
|
+
│
|
|
30
|
+
▼
|
|
31
|
+
[Worker] → worker_result workflow_orchestrator.py:1301
|
|
32
|
+
│
|
|
33
|
+
▼
|
|
34
|
+
[_review_task()] workflow_orchestrator.py:945
|
|
35
|
+
│
|
|
36
|
+
├─ CHECK 1: lint
|
|
37
|
+
├─ CHECK 2: arquivos mudaram no disco?
|
|
38
|
+
├─ CHECK 3: partial edit (old_str not found)?
|
|
39
|
+
├─ CHECK 4: H2 before/after (orchestrator) ← NOVO
|
|
40
|
+
├─ CHECK 5: skill reviewers (plugins) ← NOVO
|
|
41
|
+
└─ CHECK 6: LLM reviewer oracle
|
|
42
|
+
│
|
|
43
|
+
├─ done=True → task.status = "done"
|
|
44
|
+
└─ done=False → task.failure_count++
|
|
45
|
+
└─ se >= MAX_TASK_FAILURES → abort
|
|
46
|
+
└─ senão → adiciona correction task ao plano e retenta
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Fase 0 — Setup (run, linhas 697–795)
|
|
52
|
+
|
|
53
|
+
| Passo | O que faz | Onde |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| VCS auto-checkpoint | Salva estado do projeto em `.opalacoder/.git` antes de qualquer mudança | ~758 |
|
|
56
|
+
| Code index build | Indexa arquivos do projeto para buscas do worker | ~764 |
|
|
57
|
+
| Carregar skill tools | `load_skill_tools()` lê frontmatter `tools:` de cada skill ativa e importa as funções | ~773 |
|
|
58
|
+
| Carregar skill reviewers | `load_skill_reviewers()` lê frontmatter `reviewer:` e importa as funções | ~774 |
|
|
59
|
+
| Montar planner_sys | `_planner_system()` — sistema de prompt do planner LLM | ~787 |
|
|
60
|
+
|
|
61
|
+
**Skill ativa por padrão para projetos HTML/JS/CSS:** `html_css_js`
|
|
62
|
+
- Tool registrada: `html_css_js_tools.search_html_css_js_bugs`
|
|
63
|
+
- Reviewer registrado: `html_css_js_tools.html_css_js_reviewer`
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Fase 1 — Pré-planning (linhas 1195–1243)
|
|
68
|
+
|
|
69
|
+
### 1a. File snippets
|
|
70
|
+
Extrai conteúdo de arquivos mencionados explicitamente na request do usuário (ex: "corrige o `index.html`").
|
|
71
|
+
|
|
72
|
+
### 1b. Skill tool pre-scan ← **PONTO CRÍTICO**
|
|
73
|
+
```
|
|
74
|
+
para cada st em _skill_tools:
|
|
75
|
+
chama st._func(".") ← sync, direto na função, não via wrapper async
|
|
76
|
+
filtra linhas com [CONTRACT ERROR] / [SYNTAX ERROR] / [ERROR]
|
|
77
|
+
injeta no planning_prompt como "Skill tool pre-scan"
|
|
78
|
+
```
|
|
79
|
+
**Por que importa:** sem isso, o planner só vê o sintoma ("botão 9 não funciona") e chuta o arquivo errado (script.js). Com o pre-scan ele vê `[CONTRACT ERROR] index.html:42: botão 9 tem data-value="multiply"` e cria a task no arquivo correto.
|
|
80
|
+
|
|
81
|
+
**Só linhas de erro bloqueantes são injetadas** — WARNING/INFO causariam tasks desnecessárias.
|
|
82
|
+
|
|
83
|
+
### 1c. planning_prompt montado
|
|
84
|
+
```
|
|
85
|
+
Project files: <snapshot do diretório>
|
|
86
|
+
Relevant file contents and diagnostics:
|
|
87
|
+
<file snippets>
|
|
88
|
+
Skill tool pre-scan:
|
|
89
|
+
[search_html_css_js_bugs]
|
|
90
|
+
[CONTRACT ERROR] index.html:42: ...
|
|
91
|
+
Conversation history: ...
|
|
92
|
+
User request: ...
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### 1d. Planner oracle
|
|
96
|
+
`_oracle(PlanOutput, planner_sys, planning_prompt)` → retorna lista de `Task` com:
|
|
97
|
+
- `id`, `goal`, `commands[]`, `related_files[]`, `depends_on[]`
|
|
98
|
+
|
|
99
|
+
O system prompt do planner (`_planner_system`, linha 370) contém instrução crítica:
|
|
100
|
+
> "CRITICAL — when the planning prompt contains a skill tool pre-scan with [CONTRACT ERROR]: create tasks ONLY for the files explicitly named in 'FIX REQUIRED IN <file>'. Do NOT create tasks for files the scan does not flag."
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Fase 2 — Loop de execução de tasks (linhas 1263–1380)
|
|
105
|
+
|
|
106
|
+
Para cada task pendente (respeitando `depends_on`):
|
|
107
|
+
|
|
108
|
+
### 2a. VCS checkpoint por task (linha 1292)
|
|
109
|
+
`_vcs.manual_commit("pre-task t1: ...")` — permite rollback se a task falhar.
|
|
110
|
+
|
|
111
|
+
### 2b. errors_before = _run_skill_scan() (linha 1299) ← **NOVO**
|
|
112
|
+
Captura o conjunto de erros bloqueantes **antes** do worker rodar.
|
|
113
|
+
```python
|
|
114
|
+
def _run_skill_scan() -> set[str]: # linha 920
|
|
115
|
+
# chama cada st._func(".") de _skill_tools
|
|
116
|
+
# retorna set de linhas [CONTRACT ERROR]/[SYNTAX ERROR]/[ERROR]
|
|
117
|
+
```
|
|
118
|
+
Este snapshot será comparado com o estado **depois** do worker para detectar se o erro foi de fato corrigido.
|
|
119
|
+
|
|
120
|
+
### 2c. Worker (linha 1301)
|
|
121
|
+
`_run_worker_safe(task)` → `_run_worker(task, ...)` (linha 540)
|
|
122
|
+
|
|
123
|
+
O worker é um `LLMAgentBlock` com ferramentas:
|
|
124
|
+
- `read_file`, `edit_file`, `create_file`, `list_dir`, `search_code`
|
|
125
|
+
- + skill tools do projeto (ex: `search_html_css_js_bugs`)
|
|
126
|
+
|
|
127
|
+
O worker recebe o `task.goal` + `task.commands[]` como prompt e executa.
|
|
128
|
+
Ao final, o footer do resultado anota `[Tools invoked: edit_file, read_file, ...]`.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Fase 3 — Review (`_review_task`, linhas 945–1143)
|
|
133
|
+
|
|
134
|
+
Os checks são executados **em ordem**. O primeiro que reprovar devolve `VerifyOutput(done=False)` sem executar os seguintes.
|
|
135
|
+
|
|
136
|
+
### CHECK 1 — Lint (linha 948)
|
|
137
|
+
`_run_lint(task)` verifica sintaxe JS (node --check) / Python (py_compile).
|
|
138
|
+
- Erro de sintaxe → **rejeição automática**, sem chamar LLM.
|
|
139
|
+
|
|
140
|
+
### CHECK 2 — Arquivos mudaram? (linha 969)
|
|
141
|
+
`_git_changed_files()` usa `git status --porcelain` no shadow git.
|
|
142
|
+
- Worker chamou ferramenta de escrita **mas nenhum arquivo mudou** → **rejeição automática**.
|
|
143
|
+
- Worker não chamou nenhuma ferramenta de escrita e nenhum arquivo mudou → **rejeição automática**.
|
|
144
|
+
|
|
145
|
+
### CHECK 3 — Partial edit (linha 1034)
|
|
146
|
+
Contabiliza `old_str not found` no worker_result. Se > 0, injeta nota no contexto do LLM reviewer (não rejeita automaticamente, mas avisa).
|
|
147
|
+
|
|
148
|
+
### CHECK 4 — H2 before/after orchestrator (linha 1051) ← **NOVO**
|
|
149
|
+
```python
|
|
150
|
+
errors_after = _run_skill_scan()
|
|
151
|
+
unresolved = errors_before & errors_after # erros que existiam antes e ainda existem
|
|
152
|
+
new_errors = errors_after - errors_before # erros novos introduzidos pelo worker
|
|
153
|
+
|
|
154
|
+
if unresolved or new_errors:
|
|
155
|
+
→ rejeição automática, passa as linhas de erro como corrections
|
|
156
|
+
```
|
|
157
|
+
**Por que é genérico:** compara sets de strings — não sabe nada sobre o tipo de erro. Funciona para qualquer ferramenta registrada em `tools:` que emita linhas `[CONTRACT ERROR]` / `[SYNTAX ERROR]` / `[ERROR]`.
|
|
158
|
+
|
|
159
|
+
### CHECK 5 — Skill reviewers (linha 1080) ← **NOVO**
|
|
160
|
+
Para cada função em `_skill_reviewers`:
|
|
161
|
+
```python
|
|
162
|
+
if "errors_before" in sig.parameters:
|
|
163
|
+
resultado = reviewer(project_path, task_goal, related_files, errors_before=errors_before)
|
|
164
|
+
else:
|
|
165
|
+
resultado = reviewer(project_path, task_goal, related_files)
|
|
166
|
+
|
|
167
|
+
if resultado["done"] == False:
|
|
168
|
+
→ rejeição, corrections = resultado["corrections"]
|
|
169
|
+
```
|
|
170
|
+
O `html_css_js_reviewer` aceita `errors_before` e faz a mesma lógica de before/after, mas com acesso direto ao scanner (pode gerar mensagens mais detalhadas que o CHECK 4).
|
|
171
|
+
|
|
172
|
+
### CHECK 6 — LLM reviewer oracle (linha 1100)
|
|
173
|
+
Só chega aqui se todos os checks anteriores passaram.
|
|
174
|
+
Manda para o LLM:
|
|
175
|
+
- task JSON, lint summary, arquivos que mudaram, worker_result, conteúdo atual dos `related_files`
|
|
176
|
+
|
|
177
|
+
Retorna `VerifyOutput(done, summary, corrections[])`.
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## O que acontece quando review.done=False
|
|
182
|
+
|
|
183
|
+
```
|
|
184
|
+
task.failure_count += 1
|
|
185
|
+
|
|
186
|
+
if failure_count >= MAX_TASK_FAILURES:
|
|
187
|
+
→ abort com mensagem de erro para o usuário
|
|
188
|
+
|
|
189
|
+
else:
|
|
190
|
+
para cada Task em review.corrections:
|
|
191
|
+
adiciona ao plan.tasks com id único
|
|
192
|
+
→ próxima iteração do while loop pega a correction task como pending
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## Arquivos relevantes
|
|
198
|
+
|
|
199
|
+
| Arquivo | Responsabilidade |
|
|
200
|
+
|---|---|
|
|
201
|
+
| [opalacoder/workflow_orchestrator.py](opalacoder/workflow_orchestrator.py) | Fluxo completo: planner, worker, reviewer |
|
|
202
|
+
| [opalacoder/plugins/html_css_js_tools.py](opalacoder/plugins/html_css_js_tools.py) | Scanner de bugs HTML/JS (`search_html_css_js_bugs`) e reviewer (`html_css_js_reviewer`) |
|
|
203
|
+
| [opalacoder/skills.py](opalacoder/skills.py) | Carregamento de skills, tools e reviewers por frontmatter |
|
|
204
|
+
| [skills/html_css_js.md](skills/html_css_js.md) | Frontmatter: `tools:` e `reviewer:` que ativam o scanner e o reviewer |
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## O que verificar manualmente para o bug do micalc
|
|
209
|
+
|
|
210
|
+
1. **O planner viu o erro certo?**
|
|
211
|
+
Rode: `python tests/test_planner_output.py micalc "botão 9 não funciona"`
|
|
212
|
+
Procure no output: `Skill tool pre-scan` com `[CONTRACT ERROR]` apontando para `index.html`.
|
|
213
|
+
Se não aparecer: o scanner `search_html_css_js_bugs` não detectou o bug.
|
|
214
|
+
|
|
215
|
+
2. **O scanner detecta o bug?**
|
|
216
|
+
```bash
|
|
217
|
+
cd /caminho/do/micalc
|
|
218
|
+
python -c "
|
|
219
|
+
import sys; sys.path.insert(0, '/home/gilzamir/projetos/OpalaCoder')
|
|
220
|
+
from opalacoder.plugins.html_css_js_tools import search_html_css_js_bugs
|
|
221
|
+
print(search_html_css_js_bugs('.'))
|
|
222
|
+
"
|
|
223
|
+
```
|
|
224
|
+
Deve aparecer `[CONTRACT ERROR]` para o botão 9.
|
|
225
|
+
|
|
226
|
+
3. **O H2 rejeita corretamente?**
|
|
227
|
+
Se o worker mudar o arquivo errado (script.js em vez de index.html), o `errors_before` ainda vai conter o CONTRACT ERROR do botão 9. O `_run_skill_scan()` após o worker vai retornar o mesmo erro. `unresolved = errors_before & errors_after` vai ser não-vazio → rejeição automática no CHECK 4.
|
|
228
|
+
|
|
229
|
+
4. **O reviewer do plugin foi chamado?**
|
|
230
|
+
Procure no log do agente por `[skill_reviewer]` ou `[H2-REJECT]`.
|