claude-code-generator 0.4.11__tar.gz → 0.4.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {claude_code_generator-0.4.11/src/claude_code_generator.egg-info → claude_code_generator-0.4.13}/PKG-INFO +1 -1
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/pyproject.toml +1 -1
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13/src/claude_code_generator.egg-info}/PKG-INFO +1 -1
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/__init__.py +1 -1
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/ollama_budget.py +72 -49
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/phase1_plan.py +95 -21
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-phase-6-test.md +13 -6
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_ollama_budget.py +77 -28
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase1.py +97 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/LICENSE +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/README.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/setup.cfg +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/claude_code_generator.egg-info/SOURCES.txt +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/claude_code_generator.egg-info/dependency_links.txt +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/claude_code_generator.egg-info/entry_points.txt +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/claude_code_generator.egg-info/requires.txt +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/claude_code_generator.egg-info/top_level.txt +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/agents.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/cli.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/__init__.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/_bench_io.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/_crash_recovery.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/_detect.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/_dispatch.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/_resume.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/_validators.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/bench.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/bench_compare.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/bench_export.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/generate.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/init.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/optimize.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/review.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/commands/status.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/effort.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/env.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/gh/__init__.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/gh/core.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/gh/issues.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/gh/labels.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/gh/milestones.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/git_ops.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/logging_setup.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/memory.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/__init__.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/_client_lifecycle.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/_comments.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/_memory_writers.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/_phase5_precommit.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/cycle_loop.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/cycle_prompts.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/phase0_complexity.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/phase2_review.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/phase3_4_implement.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/phase5_closure.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/phase6_test.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/orchestrator/phase7_commit.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/preflight.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/__init__.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/hashes.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-cycle-specializer.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-optimize-requirements.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-phase-0-complexity.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-phase-1-planning.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-phase-2-batch-review.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-phase-2-issue-review.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-phase-3-implementation.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-phase-5-final-review.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-phase-7-commit.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/prompts/prompt-review.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/repo_info.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/repomap.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/requirements_structure.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/__init__.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/_telemetry.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/batch.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/fake_runner.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/mcp.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/message_parsing.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/options.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/protocol.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/rate_limit.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/retry.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/sdk_runner.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/soft_reset.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/state_guard.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/subprocess_runner.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/types.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/runner/utils.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/state.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/state_retention.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/templates/__init__.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/templates/angular.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/templates/base.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/templates/fastapi.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/templates/finance.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/templates/fullstack.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/templates/nestjs.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/src/code_generator/templates/python-cli.md +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_agents.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_bench.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_bench_compare.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_bench_export.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_bench_fixture.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_bench_regression.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_changelog.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_claude_md.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_client_lifecycle.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_comments.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_commit_message.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_crash_recovery.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_cycle_loop.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_cycle_loop_multicycle.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_cycle_ollama_model.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_cycle_prompts.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_delta_planning.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_dependencies.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_detect.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_docs_no_default_max_turns.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_docs_ollama_model_guide.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_docs_ollama_pro.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_effective_model_routing.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_effort.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_env.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_generate.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_generate_ollama.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_generate_resume.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_gh.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_gh_labels.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_gh_milestones.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_gh_repo_threading.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_gh_submodules.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_git_ops.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_init.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_logging_setup.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_max_turns_cli_flag.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_mcp.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_memory.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_memory_writers.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_message_parsing.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_no_max_turns_in_call_sites.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_no_max_turns_literal.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_non_goals_grep_guard.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_ollama_rate_limit.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_optimize.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_options.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase0.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase2.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase2_batch.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase3_4.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase5.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase5_precommit.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase6.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase7.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase_mcp_regression.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_phase_token_logging.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_preflight.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_preflight_ollama.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_prompt_drift.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_prompt_prefix_snapshots.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_prompt_prefix_stability.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_prompts.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_rate_limit.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_repo_info.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_repomap.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_requirements_structure.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_retry.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_review.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_runner_protocol.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_runner_protocol_annotations.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_runner_types.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_runner_utils.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_sdk_runner.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_sdk_runner_shared.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_session_mode.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_state.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_state_guard.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_state_retention.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_status.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_subprocess_runner.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_telemetry.py +0 -0
- {claude_code_generator-0.4.11 → claude_code_generator-0.4.13}/tests/test_version.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "claude-code-generator"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.13"
|
|
8
8
|
description = "Orchestrator CLI that drives Claude Code end-to-end to generate whole projects from a requirements.md file."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
"""Per-cycle safety backstop for the Ollama codepath.
|
|
2
2
|
|
|
3
|
-
The pre-0.4.11 design
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
already lives in two places and triggers on real malfunctions, not on an
|
|
7
|
-
arbitrary counter:
|
|
3
|
+
The pre-0.4.11 design aborted cycles on two arbitrary counters — 200 turns
|
|
4
|
+
and 1 h wall-clock. That's the wrong layer: fault-detection already lives
|
|
5
|
+
in two places that trigger on real malfunctions, not on counters:
|
|
8
6
|
|
|
9
7
|
* :class:`~code_generator.runner.retry.CircuitBreaker` — trips after ``N``
|
|
10
8
|
consecutive failures on a single phase call. Already wrapped around
|
|
@@ -13,29 +11,27 @@ arbitrary counter:
|
|
|
13
11
|
* :func:`~code_generator.runner.rate_limit.handle_ollama_429` —
|
|
14
12
|
wait-and-resume on 429s returned by the Ollama daemon.
|
|
15
13
|
|
|
16
|
-
|
|
14
|
+
Starting with 0.4.13 both thresholds are **soft warnings** — the module
|
|
15
|
+
never aborts the pipeline. Weak open models are slow AND chatty; letting
|
|
16
|
+
them run is the right call. The operator can always Ctrl-C a runaway.
|
|
17
17
|
|
|
18
|
-
*
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
* The wall-clock cap remains a **hard abort**, but the default is raised
|
|
23
|
-
to 4 hours. It exists purely to catch a stuck daemon or a model trapped
|
|
24
|
-
in a pathological loop the CircuitBreaker cannot see (e.g. infinite
|
|
25
|
-
``end_turn``→``continue`` cycle producing no tool calls).
|
|
18
|
+
* ``OLLAMA_SOFT_TURN_WARN`` (int, positive; default 500). Emitted once
|
|
19
|
+
per cycle when cumulative ``num_turns`` crosses the threshold.
|
|
20
|
+
* ``OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS`` (int, positive; default 14400).
|
|
21
|
+
Emitted once per cycle when elapsed wall-clock crosses the threshold.
|
|
26
22
|
|
|
27
|
-
|
|
23
|
+
Backwards-compatible shims: the legacy env names ``OLLAMA_TURN_BUDGET`` and
|
|
24
|
+
``OLLAMA_WALLCLOCK_BUDGET_SECONDS`` remain honoured and map onto the new
|
|
25
|
+
soft-warn thresholds. Scripts that previously relied on the abort now see
|
|
26
|
+
a WARNING instead; the 0.4.11 changelog entry called this out for the turn
|
|
27
|
+
budget, and 0.4.13 extends the same semantics to wall-clock.
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
Backwards-compatible shim: the old ``OLLAMA_TURN_BUDGET`` env variable is
|
|
33
|
-
still honoured and maps onto the soft-warn threshold, so operators with
|
|
34
|
-
existing scripts see no behaviour change beyond the abort becoming a
|
|
35
|
-
warning.
|
|
29
|
+
:class:`OllamaBudgetExceeded` is retained only as a concrete exception
|
|
30
|
+
type for backwards compatibility with callers that ``except`` it — it is
|
|
31
|
+
no longer raised by this module.
|
|
36
32
|
|
|
37
33
|
Nothing is persisted in ``state.json`` — the tracker is per-run and
|
|
38
|
-
discarded on
|
|
34
|
+
discarded on clean completion.
|
|
39
35
|
"""
|
|
40
36
|
|
|
41
37
|
from __future__ import annotations
|
|
@@ -57,7 +53,7 @@ _logger = logging.getLogger(__name__)
|
|
|
57
53
|
# ---------------------------------------------------------------------------
|
|
58
54
|
|
|
59
55
|
_DEFAULT_SOFT_TURN_WARN = 500
|
|
60
|
-
|
|
56
|
+
_DEFAULT_WALLCLOCK_SOFT_WARN_SECONDS = 14400 # 4 h
|
|
61
57
|
|
|
62
58
|
|
|
63
59
|
def _read_int_env(name: str, default: int) -> int:
|
|
@@ -100,17 +96,31 @@ aborts.
|
|
|
100
96
|
OLLAMA_TURN_BUDGET = OLLAMA_SOFT_TURN_WARN
|
|
101
97
|
"""Backwards-compatible alias for :data:`OLLAMA_SOFT_TURN_WARN`."""
|
|
102
98
|
|
|
103
|
-
|
|
104
|
-
"OLLAMA_WALLCLOCK_BUDGET_SECONDS"
|
|
105
|
-
)
|
|
106
|
-
|
|
99
|
+
def _resolve_wallclock_soft_warn() -> int:
|
|
100
|
+
"""Honour legacy ``OLLAMA_WALLCLOCK_BUDGET_SECONDS`` env var for backwards compat."""
|
|
101
|
+
legacy = _read_int_env("OLLAMA_WALLCLOCK_BUDGET_SECONDS", 0)
|
|
102
|
+
if legacy:
|
|
103
|
+
return legacy
|
|
104
|
+
return _read_int_env(
|
|
105
|
+
"OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS", _DEFAULT_WALLCLOCK_SOFT_WARN_SECONDS
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS = _resolve_wallclock_soft_warn()
|
|
110
|
+
"""Soft warning threshold on per-cycle wall-clock elapsed (seconds).
|
|
107
111
|
|
|
108
|
-
Defaults to 14400 (4 h)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
+
Defaults to 14400 (4 h). Override via ``OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS``
|
|
113
|
+
(new name) or the legacy ``OLLAMA_WALLCLOCK_BUDGET_SECONDS`` (preserved for
|
|
114
|
+
backwards compatibility). The value is **non-blocking**: the pipeline only
|
|
115
|
+
logs a WARNING once per cycle when elapsed first crosses this threshold.
|
|
116
|
+
It never aborts.
|
|
112
117
|
"""
|
|
113
118
|
|
|
119
|
+
# Kept as a module-level alias so existing importers (tests, scripts) keep
|
|
120
|
+
# working. The semantics are now "soft warning threshold", not "abort".
|
|
121
|
+
OLLAMA_WALLCLOCK_BUDGET_SECONDS = OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS
|
|
122
|
+
"""Backwards-compatible alias for :data:`OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS`."""
|
|
123
|
+
|
|
114
124
|
|
|
115
125
|
# ---------------------------------------------------------------------------
|
|
116
126
|
# Exception
|
|
@@ -118,11 +128,12 @@ stuck daemon or a pathological loop the per-phase
|
|
|
118
128
|
|
|
119
129
|
|
|
120
130
|
class OllamaBudgetExceeded(RuntimeError):
|
|
121
|
-
"""
|
|
131
|
+
"""Retained only for backwards compatibility with ``except`` clauses.
|
|
122
132
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
in
|
|
133
|
+
As of 0.4.13 this module never raises ``OllamaBudgetExceeded``. Both the
|
|
134
|
+
turn counter and the wall-clock are non-blocking soft WARNINGs. Real
|
|
135
|
+
per-call failures are handled by the ``CircuitBreaker`` in
|
|
136
|
+
:mod:`code_generator.runner.retry`.
|
|
126
137
|
|
|
127
138
|
Subclasses ``RuntimeError`` to match the existing safety-abort hierarchy
|
|
128
139
|
(e.g. :class:`~code_generator.runner.types.OverageAbort`).
|
|
@@ -137,9 +148,8 @@ class OllamaBudgetExceeded(RuntimeError):
|
|
|
137
148
|
class OllamaBudgetTracker:
|
|
138
149
|
"""Adaptive per-cycle safety backstop; a no-op on the Anthropic Max path.
|
|
139
150
|
|
|
140
|
-
Emits one WARNING
|
|
141
|
-
|
|
142
|
-
turn count — real failures are the responsibility of
|
|
151
|
+
Emits at most one WARNING per threshold (turn count, wall-clock). **Never
|
|
152
|
+
aborts**. Real failures are the responsibility of
|
|
143
153
|
:class:`~code_generator.runner.retry.CircuitBreaker` and the rate-limit
|
|
144
154
|
handlers in :mod:`code_generator.runner.rate_limit`.
|
|
145
155
|
"""
|
|
@@ -162,6 +172,7 @@ class OllamaBudgetTracker:
|
|
|
162
172
|
self._clock = clock or time.monotonic
|
|
163
173
|
self._start_time: float | None = None
|
|
164
174
|
self._turn_warning_emitted = False
|
|
175
|
+
self._wallclock_warning_emitted = False
|
|
165
176
|
|
|
166
177
|
def start(self) -> None:
|
|
167
178
|
"""Record the cycle start time. Idempotent; only the first call matters."""
|
|
@@ -169,11 +180,11 @@ class OllamaBudgetTracker:
|
|
|
169
180
|
self._start_time = self._clock()
|
|
170
181
|
|
|
171
182
|
def check(self, state: State, cycle: CycleState | None) -> None:
|
|
172
|
-
"""Warn on
|
|
183
|
+
"""Warn on either threshold; never raise."""
|
|
173
184
|
if not self._active:
|
|
174
185
|
return
|
|
175
186
|
self._check_turn_soft_warn(state, cycle)
|
|
176
|
-
self.
|
|
187
|
+
self._check_wallclock_soft_warn()
|
|
177
188
|
|
|
178
189
|
def _check_turn_soft_warn(self, state: State, cycle: CycleState | None) -> None:
|
|
179
190
|
"""Emit one WARNING the first time the cycle crosses the soft threshold.
|
|
@@ -196,17 +207,29 @@ class OllamaBudgetTracker:
|
|
|
196
207
|
)
|
|
197
208
|
self._turn_warning_emitted = True
|
|
198
209
|
|
|
199
|
-
def
|
|
200
|
-
|
|
210
|
+
def _check_wallclock_soft_warn(self) -> None:
|
|
211
|
+
"""Emit one WARNING the first time the cycle crosses the wall-clock threshold.
|
|
212
|
+
|
|
213
|
+
Never raises. As of 0.4.13 the wall-clock ceiling is advisory: a
|
|
214
|
+
genuinely stuck daemon will be caught by the per-phase
|
|
215
|
+
CircuitBreaker or by the operator's Ctrl-C; aborting a live session
|
|
216
|
+
on a counter wastes hours of work.
|
|
217
|
+
"""
|
|
218
|
+
if self._wallclock_warning_emitted or self._start_time is None:
|
|
201
219
|
return
|
|
202
220
|
elapsed = self._clock() - self._start_time
|
|
203
|
-
if elapsed >
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
"
|
|
208
|
-
"
|
|
221
|
+
if elapsed > OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS:
|
|
222
|
+
_logger.warning(
|
|
223
|
+
"Ollama cycle has been running for %.0fs (soft threshold: "
|
|
224
|
+
"%ds). Letting it run — real stalls are caught by the "
|
|
225
|
+
"per-phase CircuitBreaker in runner/retry.py. Raise the "
|
|
226
|
+
"threshold via OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS or the "
|
|
227
|
+
"legacy OLLAMA_WALLCLOCK_BUDGET_SECONDS env var to silence "
|
|
228
|
+
"this warning. Use Ctrl-C to stop a truly runaway cycle.",
|
|
229
|
+
elapsed,
|
|
230
|
+
OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS,
|
|
209
231
|
)
|
|
232
|
+
self._wallclock_warning_emitted = True
|
|
210
233
|
|
|
211
234
|
|
|
212
235
|
def _sum_num_turns(state: State, cycle: CycleState | None) -> int:
|
|
@@ -116,6 +116,47 @@ _PHASE1_DEFAULT_MODEL = "claude-opus-4-7"
|
|
|
116
116
|
# CLAUDE.md invariant #8; overridden via ``effective_model`` on Ollama — #219.
|
|
117
117
|
|
|
118
118
|
|
|
119
|
+
_PHASE1_MAX_ATTEMPTS = 3
|
|
120
|
+
"""Max planning attempts before surfacing ``Phase1NoIssuesError`` (0.4.12).
|
|
121
|
+
|
|
122
|
+
Weak open models on the Ollama codepath often respond with prose describing
|
|
123
|
+
the plan instead of invoking ``gh issue create`` via the Bash tool. A single
|
|
124
|
+
stricter re-prompt is usually enough to unblock them without operator
|
|
125
|
+
intervention.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
_PHASE1_RETRY_NUDGE = (
|
|
130
|
+
"Your previous attempt finished without creating any GitHub issues. "
|
|
131
|
+
"This is a CRITICAL failure. You MUST use the Bash tool to invoke "
|
|
132
|
+
"`gh issue create` for each issue right now. Do NOT output markdown "
|
|
133
|
+
"or prose describing the plan — every planned issue must become a "
|
|
134
|
+
"real GitHub issue via `gh issue create ... --milestone "
|
|
135
|
+
'"{MILESTONE}" --assignee @me --label "..."`. Once you have called '
|
|
136
|
+
"`gh issue create` for every planned issue, print a one-line summary "
|
|
137
|
+
"per issue and stop.\n\n"
|
|
138
|
+
"The original instructions follow.\n\n"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _accumulate_usage(
|
|
143
|
+
total: _state.TokenUsage | None,
|
|
144
|
+
delta: _state.TokenUsage,
|
|
145
|
+
) -> _state.TokenUsage:
|
|
146
|
+
"""Sum two TokenUsage records field-by-field across Phase 1 attempts."""
|
|
147
|
+
from code_generator.runner.types import TokenUsage
|
|
148
|
+
|
|
149
|
+
if total is None:
|
|
150
|
+
return delta
|
|
151
|
+
return TokenUsage(
|
|
152
|
+
input=total.input + delta.input,
|
|
153
|
+
output=total.output + delta.output,
|
|
154
|
+
cache_read=total.cache_read + delta.cache_read,
|
|
155
|
+
cache_write=total.cache_write + delta.cache_write,
|
|
156
|
+
num_turns=total.num_turns + delta.num_turns,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
119
160
|
def _load_specialized_prompt(
|
|
120
161
|
project_dir: Path,
|
|
121
162
|
state: State,
|
|
@@ -249,28 +290,56 @@ async def run(
|
|
|
249
290
|
**max_turns_kwargs(max_turns),
|
|
250
291
|
)
|
|
251
292
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
293
|
+
# Up to _PHASE1_MAX_ATTEMPTS attempts: the first with the normal
|
|
294
|
+
# prompt, subsequent attempts prefixed with a stricter nudge that
|
|
295
|
+
# tells the model to use the Bash tool to call ``gh issue create``
|
|
296
|
+
# right now. Weak open models on the Ollama codepath commonly
|
|
297
|
+
# respond with prose on the first turn; the nudge recovers most
|
|
298
|
+
# of those cases without operator intervention (0.4.12).
|
|
299
|
+
issue_states: list[_state.IssueState] = []
|
|
300
|
+
total_usage = result = None # type: ignore[assignment]
|
|
301
|
+
attempt_prompt = prompt
|
|
302
|
+
effective_model_name = effective_model or _PHASE1_DEFAULT_MODEL
|
|
303
|
+
for attempt in range(1, _PHASE1_MAX_ATTEMPTS + 1):
|
|
304
|
+
result = await rate_limit.main_loop(
|
|
305
|
+
runner_module,
|
|
306
|
+
attempt_prompt,
|
|
307
|
+
options,
|
|
308
|
+
state_path=state_path,
|
|
309
|
+
logger=logger,
|
|
310
|
+
)
|
|
311
|
+
total_usage = _accumulate_usage(total_usage, result.usage)
|
|
259
312
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
313
|
+
raw_issues = gh.list_issues(
|
|
314
|
+
repo,
|
|
315
|
+
milestone=milestone_title,
|
|
316
|
+
state="all",
|
|
317
|
+
)
|
|
318
|
+
issue_states = _build_issue_states(raw_issues)
|
|
319
|
+
if issue_states:
|
|
320
|
+
break
|
|
321
|
+
|
|
322
|
+
if attempt >= _PHASE1_MAX_ATTEMPTS:
|
|
323
|
+
break
|
|
324
|
+
|
|
325
|
+
logger.warning(
|
|
326
|
+
"Phase 1 attempt %d/%d: model returned without creating any "
|
|
327
|
+
"GitHub issues. Re-prompting with a stricter nudge.",
|
|
328
|
+
attempt,
|
|
329
|
+
_PHASE1_MAX_ATTEMPTS,
|
|
330
|
+
)
|
|
331
|
+
nudge = _PHASE1_RETRY_NUDGE.replace("{MILESTONE}", milestone_title or "")
|
|
332
|
+
attempt_prompt = nudge + prompt
|
|
267
333
|
|
|
268
334
|
if not issue_states:
|
|
269
335
|
raise Phase1NoIssuesError(
|
|
270
|
-
"Phase 1 finished without creating any GitHub issues
|
|
271
|
-
"
|
|
272
|
-
"
|
|
273
|
-
"
|
|
336
|
+
f"Phase 1 finished without creating any GitHub issues after "
|
|
337
|
+
f"{_PHASE1_MAX_ATTEMPTS} attempts (model={effective_model_name!r}). "
|
|
338
|
+
"The model likely responded with prose instead of invoking "
|
|
339
|
+
"`gh issue create` via the Bash tool. Inspect "
|
|
340
|
+
".code-generator/logs/phase1.log for the tool-call trace, "
|
|
341
|
+
"try a stronger model, or simplify the cycle scope in "
|
|
342
|
+
"requirements.md and re-run."
|
|
274
343
|
)
|
|
275
344
|
|
|
276
345
|
target = cycle if cycle is not None else state
|
|
@@ -278,12 +347,17 @@ async def run(
|
|
|
278
347
|
cycle.issues = issue_states
|
|
279
348
|
else:
|
|
280
349
|
state.issues = issue_states
|
|
281
|
-
|
|
350
|
+
# Persist the accumulated usage across all attempts (not just the last).
|
|
351
|
+
target.token_usage["phase1"] = total_usage if total_usage is not None else result.usage
|
|
282
352
|
if hasattr(target, "cache_telemetry"):
|
|
283
|
-
accumulate_telemetry(
|
|
353
|
+
accumulate_telemetry(
|
|
354
|
+
target.cache_telemetry,
|
|
355
|
+
target.token_usage["phase1"],
|
|
356
|
+
result.wall_seconds,
|
|
357
|
+
)
|
|
284
358
|
|
|
285
359
|
_state.save_state(state_path, state)
|
|
286
|
-
log_phase_usage(logger, 1,
|
|
360
|
+
log_phase_usage(logger, 1, target.token_usage["phase1"])
|
|
287
361
|
logger.info("Phase 1: %d issues created.", len(issue_states))
|
|
288
362
|
|
|
289
363
|
except Exception:
|
|
@@ -18,16 +18,22 @@ You are a senior engineer specialized in testing. Your task is to run the projec
|
|
|
18
18
|
- `Cargo.toml` → Rust/cargo test
|
|
19
19
|
- `go.mod` → Go test
|
|
20
20
|
|
|
21
|
-
2. **Run the full test suite
|
|
21
|
+
2. **Run the full test suite with concise output** (see Constraints):
|
|
22
22
|
```bash
|
|
23
|
-
# Adapt to the detected framework
|
|
24
|
-
pytest -
|
|
25
|
-
npm test -- --run
|
|
23
|
+
# Adapt to the detected framework.
|
|
24
|
+
pytest -q --tb=line # Python — quiet, one-line tracebacks
|
|
25
|
+
npm test -- --run --reporter=default # Vitest
|
|
26
26
|
npm test # Jest/Angular
|
|
27
|
-
cargo test --all
|
|
28
|
-
go test ./...
|
|
27
|
+
cargo test --all --quiet # Rust
|
|
28
|
+
go test ./... 2>&1 | tail -200 # Go — only the last 200 lines
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
+
**Do not use `-v`, `-vv`, `-s`, or any other verbose/stream flag.** A
|
|
32
|
+
tool result returned to the model cannot exceed roughly 1 MB — verbose
|
|
33
|
+
pytest output on a medium-sized project blows past that limit and
|
|
34
|
+
crashes the SDK stream reader mid-cycle. Start quiet; escalate only the
|
|
35
|
+
individual failing test with `-v` after you have the list of failures.
|
|
36
|
+
|
|
31
37
|
3. **If all tests pass on the first attempt:**
|
|
32
38
|
- Also run any available linters/type checkers (`mypy`, `ruff`, `eslint`, `tsc --noEmit`)
|
|
33
39
|
- Collect test coverage if the framework supports it
|
|
@@ -98,6 +104,7 @@ You are a senior engineer specialized in testing. Your task is to run the projec
|
|
|
98
104
|
- **Do not ask the user for confirmation**: act autonomously in YOLO mode.
|
|
99
105
|
- **If you find flakiness** (a test that passes/fails non-deterministically), do not ignore it: document the flaky behavior in the bug issue.
|
|
100
106
|
- **Environment variables already available globally**: `GITHUB_TOKEN`, `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GOOGLE_API_KEY`, `OLLAMA_API_KEY`, `OLLAMA_BASE_URL`. If a test fails because "an API key is missing" among these, the cause is **not** the missing key — check the variable name, the `.env` loading, or an explicit override in the test. Do not add dummy keys as a fix. For tests that make real calls and are slow/expensive, use mocking/VCR cassettes instead of disabling them.
|
|
107
|
+
- **Tool-result size ceiling.** Bash tool-results larger than ~1 MB crash the SDK stream reader. Always prefer `-q`/`--tb=line`/`--quiet` over `-v`/`-vv`/`-s`. If a command genuinely produces more than 1 MB of output, tee it to a file (`cmd > /tmp/out.log 2>&1 || true`) and then `Read` or `Grep` the file — never let the full output flow back through a single tool result.
|
|
101
108
|
|
|
102
109
|
---
|
|
103
110
|
|
|
@@ -1,30 +1,35 @@
|
|
|
1
|
-
"""Tests for the Ollama per-cycle adaptive safety backstop (
|
|
1
|
+
"""Tests for the Ollama per-cycle adaptive safety backstop (0.4.13).
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
pipeline is never aborted on
|
|
5
|
-
the job of :class:`~code_generator.runner.retry.CircuitBreaker`
|
|
6
|
-
rate-limit handlers.
|
|
3
|
+
Both the turn counter and the wall-clock are **non-blocking soft warnings**
|
|
4
|
+
— the pipeline is never aborted on either threshold alone. Real per-call
|
|
5
|
+
failures are the job of :class:`~code_generator.runner.retry.CircuitBreaker`
|
|
6
|
+
and the rate-limit handlers.
|
|
7
7
|
|
|
8
|
-
Backwards compatibility: ``OLLAMA_TURN_BUDGET``
|
|
9
|
-
|
|
8
|
+
Backwards compatibility: ``OLLAMA_TURN_BUDGET`` and
|
|
9
|
+
``OLLAMA_WALLCLOCK_BUDGET_SECONDS`` are preserved as aliases for
|
|
10
|
+
:data:`OLLAMA_SOFT_TURN_WARN` and :data:`OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS`
|
|
11
|
+
respectively, so existing scripts keep importing.
|
|
10
12
|
"""
|
|
11
13
|
|
|
12
14
|
from __future__ import annotations
|
|
13
15
|
|
|
14
16
|
import logging
|
|
15
|
-
|
|
16
|
-
import pytest
|
|
17
|
+
from typing import TYPE_CHECKING
|
|
17
18
|
|
|
18
19
|
from code_generator import state as _state
|
|
19
20
|
from code_generator.orchestrator.ollama_budget import (
|
|
20
21
|
OLLAMA_SOFT_TURN_WARN,
|
|
21
22
|
OLLAMA_TURN_BUDGET,
|
|
22
23
|
OLLAMA_WALLCLOCK_BUDGET_SECONDS,
|
|
24
|
+
OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS,
|
|
23
25
|
OllamaBudgetExceeded,
|
|
24
26
|
OllamaBudgetTracker,
|
|
25
27
|
)
|
|
26
28
|
from code_generator.runner.types import TokenUsage
|
|
27
29
|
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
import pytest
|
|
32
|
+
|
|
28
33
|
# ---------------------------------------------------------------------------
|
|
29
34
|
# Constants
|
|
30
35
|
# ---------------------------------------------------------------------------
|
|
@@ -38,8 +43,12 @@ class TestThresholdConstants:
|
|
|
38
43
|
"""Legacy ``OLLAMA_TURN_BUDGET`` must alias the new soft-warn constant."""
|
|
39
44
|
assert OLLAMA_TURN_BUDGET == OLLAMA_SOFT_TURN_WARN
|
|
40
45
|
|
|
41
|
-
def
|
|
42
|
-
assert
|
|
46
|
+
def test_wallclock_soft_warn_default_is_4_hours(self) -> None:
|
|
47
|
+
assert OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS == 14400
|
|
48
|
+
|
|
49
|
+
def test_wallclock_budget_alias_matches_soft_warn(self) -> None:
|
|
50
|
+
"""Legacy ``OLLAMA_WALLCLOCK_BUDGET_SECONDS`` must alias the soft-warn constant."""
|
|
51
|
+
assert OLLAMA_WALLCLOCK_BUDGET_SECONDS == OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS
|
|
43
52
|
|
|
44
53
|
|
|
45
54
|
# ---------------------------------------------------------------------------
|
|
@@ -149,9 +158,9 @@ class TestTurnSoftWarning:
|
|
|
149
158
|
# ---------------------------------------------------------------------------
|
|
150
159
|
|
|
151
160
|
|
|
152
|
-
class
|
|
153
|
-
def
|
|
154
|
-
"""Elapsed <
|
|
161
|
+
class TestWallclockSoftWarning:
|
|
162
|
+
def test_under_threshold_does_not_warn(self, caplog: pytest.LogCaptureFixture) -> None:
|
|
163
|
+
"""Elapsed < threshold → no WARNING emitted, no raise."""
|
|
155
164
|
st, cycle = _make_state_with_usage({}, cycle_turns={"phase0": 1})
|
|
156
165
|
now = 1_000_000.0
|
|
157
166
|
tracker = OllamaBudgetTracker(
|
|
@@ -159,12 +168,18 @@ class TestWallclockBudget:
|
|
|
159
168
|
clock=lambda: now,
|
|
160
169
|
)
|
|
161
170
|
tracker.start()
|
|
162
|
-
now +=
|
|
171
|
+
now += OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS - 10
|
|
163
172
|
|
|
164
|
-
|
|
173
|
+
with caplog.at_level(logging.WARNING):
|
|
174
|
+
tracker.check(st, cycle)
|
|
175
|
+
|
|
176
|
+
wallclock_warnings = [r for r in caplog.records if "running for" in r.message.lower()]
|
|
177
|
+
assert wallclock_warnings == []
|
|
165
178
|
|
|
166
|
-
def
|
|
167
|
-
|
|
179
|
+
def test_over_threshold_warns_without_raising(
|
|
180
|
+
self, caplog: pytest.LogCaptureFixture
|
|
181
|
+
) -> None:
|
|
182
|
+
"""Crossing the threshold logs a WARNING; the pipeline continues."""
|
|
168
183
|
st, cycle = _make_state_with_usage({}, cycle_turns={"phase0": 1})
|
|
169
184
|
t = [1_000_000.0]
|
|
170
185
|
|
|
@@ -173,14 +188,32 @@ class TestWallclockBudget:
|
|
|
173
188
|
|
|
174
189
|
tracker = OllamaBudgetTracker(provider_is_ollama=True, clock=_clock)
|
|
175
190
|
tracker.start()
|
|
176
|
-
t[0] +=
|
|
191
|
+
t[0] += OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS + 1
|
|
192
|
+
|
|
193
|
+
with caplog.at_level(logging.WARNING):
|
|
194
|
+
tracker.check(st, cycle) # must not raise
|
|
195
|
+
|
|
196
|
+
wallclock_warnings = [r for r in caplog.records if "running for" in r.message.lower()]
|
|
197
|
+
assert len(wallclock_warnings) == 1
|
|
198
|
+
assert str(OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS) in wallclock_warnings[0].message
|
|
199
|
+
|
|
200
|
+
def test_warning_is_emitted_only_once_per_cycle(
|
|
201
|
+
self, caplog: pytest.LogCaptureFixture
|
|
202
|
+
) -> None:
|
|
203
|
+
"""Subsequent checks after the first wall-clock warning must stay silent."""
|
|
204
|
+
st, cycle = _make_state_with_usage({}, cycle_turns={"phase0": 1})
|
|
205
|
+
t = [1_000_000.0]
|
|
206
|
+
tracker = OllamaBudgetTracker(provider_is_ollama=True, clock=lambda: t[0])
|
|
207
|
+
tracker.start()
|
|
208
|
+
t[0] += OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS + 1
|
|
177
209
|
|
|
178
|
-
with
|
|
210
|
+
with caplog.at_level(logging.WARNING):
|
|
211
|
+
tracker.check(st, cycle)
|
|
212
|
+
tracker.check(st, cycle)
|
|
179
213
|
tracker.check(st, cycle)
|
|
180
214
|
|
|
181
|
-
|
|
182
|
-
assert
|
|
183
|
-
assert str(OLLAMA_WALLCLOCK_BUDGET_SECONDS) in msg
|
|
215
|
+
wallclock_warnings = [r for r in caplog.records if "running for" in r.message.lower()]
|
|
216
|
+
assert len(wallclock_warnings) == 1
|
|
184
217
|
|
|
185
218
|
def test_start_is_required_before_check(self) -> None:
|
|
186
219
|
"""check() without start() returns immediately on wall-clock (None start)."""
|
|
@@ -190,7 +223,7 @@ class TestWallclockBudget:
|
|
|
190
223
|
tracker.check(st, cycle) # must not raise
|
|
191
224
|
|
|
192
225
|
def test_extreme_turn_count_does_not_raise(self) -> None:
|
|
193
|
-
"""
|
|
226
|
+
"""Neither counter should raise on its own — both are soft warnings now."""
|
|
194
227
|
st, cycle = _make_state_with_usage(
|
|
195
228
|
{}, cycle_turns={"phase3_4": OLLAMA_SOFT_TURN_WARN * 100}
|
|
196
229
|
)
|
|
@@ -198,6 +231,16 @@ class TestWallclockBudget:
|
|
|
198
231
|
|
|
199
232
|
tracker.check(st, cycle) # must not raise
|
|
200
233
|
|
|
234
|
+
def test_extreme_wallclock_does_not_raise(self) -> None:
|
|
235
|
+
"""Wall-clock far past threshold must not raise either (0.4.13)."""
|
|
236
|
+
st, cycle = _make_state_with_usage({}, cycle_turns={"phase0": 1})
|
|
237
|
+
t = [1_000_000.0]
|
|
238
|
+
tracker = OllamaBudgetTracker(provider_is_ollama=True, clock=lambda: t[0])
|
|
239
|
+
tracker.start()
|
|
240
|
+
t[0] += OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS * 10 # 40 h
|
|
241
|
+
|
|
242
|
+
tracker.check(st, cycle) # must not raise
|
|
243
|
+
|
|
201
244
|
|
|
202
245
|
# ---------------------------------------------------------------------------
|
|
203
246
|
# Anthropic Max path — thresholds do not fire
|
|
@@ -218,15 +261,21 @@ class TestAnthropicMaxUntouched:
|
|
|
218
261
|
turn_warnings = [r for r in caplog.records if "consumed" in r.message.lower()]
|
|
219
262
|
assert turn_warnings == []
|
|
220
263
|
|
|
221
|
-
def
|
|
222
|
-
|
|
264
|
+
def test_anthropic_max_mode_skips_wallclock_warning(
|
|
265
|
+
self, caplog: pytest.LogCaptureFixture
|
|
266
|
+
) -> None:
|
|
267
|
+
"""provider_is_ollama=False → no WARNING, no raise."""
|
|
223
268
|
st, cycle = _make_state_with_usage({}, cycle_turns={"phase0": 1})
|
|
224
269
|
t = [1_000_000.0]
|
|
225
270
|
tracker = OllamaBudgetTracker(provider_is_ollama=False, clock=lambda: t[0])
|
|
226
271
|
tracker.start()
|
|
227
|
-
t[0] +=
|
|
272
|
+
t[0] += OLLAMA_WALLCLOCK_SOFT_WARN_SECONDS + 100
|
|
228
273
|
|
|
229
|
-
|
|
274
|
+
with caplog.at_level(logging.WARNING):
|
|
275
|
+
tracker.check(st, cycle)
|
|
276
|
+
|
|
277
|
+
wallclock_warnings = [r for r in caplog.records if "running for" in r.message.lower()]
|
|
278
|
+
assert wallclock_warnings == []
|
|
230
279
|
|
|
231
280
|
|
|
232
281
|
# ---------------------------------------------------------------------------
|