codeprobe 0.5.2__tar.gz → 0.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeprobe-0.5.2 → codeprobe-0.5.3}/PKG-INFO +1 -1
- {codeprobe-0.5.2 → codeprobe-0.5.3}/pyproject.toml +1 -1
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/stats.py +25 -1
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/PKG-INFO +1 -1
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_stats.py +57 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/LICENSE +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/README.md +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/setup.cfg +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/__main__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/acceptance_compiler.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/_base.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/claude.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/codex.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/copilot.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/openai_compat.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/protocol.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/session.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/telemetry.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/dual.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/ranking.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/report.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/api.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/assess/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/assess/heuristics.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/assess/oracle_diff.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/assess_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/auth_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/doctor_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/experiment_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/init_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/interpret_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/json_display.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/mine_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/preamble_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/probe_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/ratings_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/rich_display.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/run_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/scaffold_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/validate_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/wizard.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/yaml_writer.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/config/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/config/loader.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/config/redact.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/_shared.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/adaptive.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/counterfactual.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/debate.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/decision_tree.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/elo.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/fingerprint.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/mutation.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/pareto.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/sprt.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/tournament.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/__main__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/checkpoint.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/events.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/executor.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/experiment.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/isolation.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/llm.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/mcp_discovery.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/preamble.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/registry.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/repo_hygiene.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/sandbox.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/scoring.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/loaders/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/loaders/suite.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/_graph.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/_lang.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/comprehension.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/comprehension_writer.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/curator.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/curator_backends.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/curator_tiers.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/extractor.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/multi_repo.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale_families.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale_oracle.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale_scanner.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale_validate.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/sg_auth.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/sg_ground_truth.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/sources.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/task_types.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/writer.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/evalrc.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/experiment.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/preamble.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/suite.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/task.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/preambles/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/preambles/github.md +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/preambles/sourcegraph.md +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/probe/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/probe/adapter.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/probe/generator.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/probe/writer.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/ratings/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/ratings/collector.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/scaffold/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/scaffold/writer.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/templates/__init__.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/SOURCES.txt +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/dependency_links.txt +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/entry_points.txt +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/requires.txt +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/top_level.txt +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_acceptance_compiler.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_acceptance_compiler_integration.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_adapter_contracts.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_adapters.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_analysis.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_api.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_artifact_scorer.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_assess.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_auth_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_changed_symbols.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_checkpoint.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_checkpoint_scoring.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_cli.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_comprehension.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_config_loader.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_contrib.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_convergence.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_criteria_loader.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_ctrlc_integration.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_curator_backends.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_curator_core.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_curator_integration.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_curator_tiers.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_doctor_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_adversarial_fixes.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_composite.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_e2e.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_matrix.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_scorer.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_scoring_details.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_events.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_examples_dual.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_executor.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_executor_dual_isolation.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_executor_events.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_experiment_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_experiment_core.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_ground_truth_schema.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_init_wizard.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_isolation.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_json_display.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_listeners_dual.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_llm.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_loader.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_loaders.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_loaders_dual.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mcp_families_mining.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mcp_validate.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mine_cli.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mine_goals.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mine_presets.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mine_profiles.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mining.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mining_dual.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_models.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_multi_repo_e2e.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_multi_repo_mining.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_new_families.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_openai_compat.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_oracle_diff.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_oracle_registry.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_oracle_types.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_org_scale.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_pipeline_integration.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_preamble.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_preamble_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_probe.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_probe_adapter.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_ratings.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_ratings_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_registry.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_regression_gate.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_release_gate.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_repo_hygiene.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_report_dual.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_run_config_resolution.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_safe_leg_score.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scaffold.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scaffold_upgrade.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scanner_refactor.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_score_result.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scoring.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scoring_extended.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scoring_v2.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_sdlc_ground_truth.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_secret_redaction.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_session.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_sg_auth.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_sg_ground_truth.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_shell_shim.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_show_prompt.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_suite.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_suite_manifest.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_task_model.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_task_types.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_telemetry.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_validate_cmd.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_validate_dual.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_verifier.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_verify.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_weighted_checklist.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_weighted_f1.py +0 -0
- {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_writer_dual.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeprobe
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.3
|
|
4
4
|
Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
|
|
5
5
|
Author: codeprobe contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -583,7 +583,31 @@ def compare_configs(
|
|
|
583
583
|
elif speed_diff > 0:
|
|
584
584
|
parts.append(f"{speed_diff:.1f}s slower")
|
|
585
585
|
|
|
586
|
-
|
|
586
|
+
# Soften the verdict when the effect is negligible or the test is
|
|
587
|
+
# underpowered, so we don't confidently declare a "winner" on what may
|
|
588
|
+
# be noise. Thresholds:
|
|
589
|
+
# Cohen's d: |d| < 0.2 is "negligible" (Cohen 1988).
|
|
590
|
+
# Cliff's delta: |delta| < 0.147 is "negligible" (Romano et al. 2006).
|
|
591
|
+
# p-value > 0.05: not significant at the conventional threshold.
|
|
592
|
+
scores_tied = abs(score_diff) < 0.01
|
|
593
|
+
negligible_threshold = 0.2 if eff_method == "cohens_d" else 0.147
|
|
594
|
+
small_effect = (
|
|
595
|
+
eff_size is not None and abs(eff_size) < negligible_threshold
|
|
596
|
+
)
|
|
597
|
+
not_significant = p_val is not None and p_val > 0.05
|
|
598
|
+
|
|
599
|
+
if scores_tied:
|
|
600
|
+
verdict = "effectively tied"
|
|
601
|
+
elif small_effect and not_significant:
|
|
602
|
+
verdict = f"{winner} nominally ahead (not significant; small effect)"
|
|
603
|
+
elif small_effect:
|
|
604
|
+
verdict = f"{winner} nominally ahead (small effect size)"
|
|
605
|
+
elif not_significant:
|
|
606
|
+
verdict = f"{winner} nominally ahead (not significant at p=0.05)"
|
|
607
|
+
else:
|
|
608
|
+
verdict = f"{winner} wins"
|
|
609
|
+
|
|
610
|
+
summary = f"{a.label} vs {b.label}: {', '.join(parts)} \u2192 {verdict}"
|
|
587
611
|
|
|
588
612
|
return PairwiseComparison(
|
|
589
613
|
config_a=a.label,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeprobe
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.3
|
|
4
4
|
Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
|
|
5
5
|
Author: codeprobe contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -259,3 +259,60 @@ class TestComparePairwiseContinuousRouting:
|
|
|
259
259
|
b_scores=[0.0, 1.0, 0.0],
|
|
260
260
|
)
|
|
261
261
|
assert cmp.effect_size_method == "cliffs_delta"
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class TestVerdictSoftening:
|
|
265
|
+
"""Summary text softens the verdict when the effect is small or p is high."""
|
|
266
|
+
|
|
267
|
+
def _run_compare(self, a_scores, b_scores):
|
|
268
|
+
from codeprobe.analysis.stats import compare_configs
|
|
269
|
+
from codeprobe.models.experiment import ConfigResults
|
|
270
|
+
|
|
271
|
+
a_cr = ConfigResults(
|
|
272
|
+
config="a",
|
|
273
|
+
completed=[_task(f"t{i}", s) for i, s in enumerate(a_scores)],
|
|
274
|
+
)
|
|
275
|
+
b_cr = ConfigResults(
|
|
276
|
+
config="b",
|
|
277
|
+
completed=[_task(f"t{i}", s) for i, s in enumerate(b_scores)],
|
|
278
|
+
)
|
|
279
|
+
return compare_configs(
|
|
280
|
+
summarize_config(a_cr), summarize_config(b_cr),
|
|
281
|
+
a_scores=list(a_scores), b_scores=list(b_scores),
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
def test_large_effect_with_power_says_wins(self) -> None:
|
|
285
|
+
"""Consistent large gap across enough samples → unqualified winner."""
|
|
286
|
+
# N=8, unambiguous separation in every paired sample
|
|
287
|
+
a = [0.90, 0.88, 0.92, 0.85, 0.87, 0.93, 0.89, 0.91]
|
|
288
|
+
b = [0.10, 0.12, 0.15, 0.08, 0.18, 0.11, 0.14, 0.09]
|
|
289
|
+
cmp = self._run_compare(a, b)
|
|
290
|
+
assert "a wins" in cmp.summary
|
|
291
|
+
assert "nominally" not in cmp.summary
|
|
292
|
+
|
|
293
|
+
def test_small_effect_softens_verdict(self) -> None:
|
|
294
|
+
"""Noisy data with a tiny gap → softened verdict.
|
|
295
|
+
|
|
296
|
+
The gap (~0.02) clears the 0.01 tied threshold, but high within-
|
|
297
|
+
config variance keeps Cohen's d < 0.2, which should trigger the
|
|
298
|
+
"nominally ahead (small effect)" wording.
|
|
299
|
+
"""
|
|
300
|
+
a = [0.95, 0.10, 0.85, 0.20, 0.75, 0.30]
|
|
301
|
+
b = [0.93, 0.08, 0.83, 0.18, 0.72, 0.28]
|
|
302
|
+
cmp = self._run_compare(a, b)
|
|
303
|
+
assert "nominally ahead" in cmp.summary
|
|
304
|
+
# Should NOT say "wins" unqualified
|
|
305
|
+
assert " a wins" not in cmp.summary
|
|
306
|
+
assert " b wins" not in cmp.summary
|
|
307
|
+
|
|
308
|
+
def test_tied_scores_report_tied(self) -> None:
|
|
309
|
+
cmp = self._run_compare([0.5, 0.5], [0.5, 0.5])
|
|
310
|
+
assert "effectively tied" in cmp.summary
|
|
311
|
+
|
|
312
|
+
def test_real_experiment_numbers_produce_softened_verdict(self) -> None:
|
|
313
|
+
"""Regression: the kubernetes-mcp-comparison scenario (N=5, d=0.076)."""
|
|
314
|
+
baseline = [0.75, 0.40, 0.11, 0.71, 0.14]
|
|
315
|
+
with_mcp = [0.71, 0.36, 0.08, 0.71, 0.14]
|
|
316
|
+
cmp = self._run_compare(baseline, with_mcp)
|
|
317
|
+
# score_diff ~0.02, small cohen's d, high p → softened verdict
|
|
318
|
+
assert "nominally ahead" in cmp.summary
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|