@openhands/extensions 0.0.1-alpha → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/custom-codereview-guide.md +25 -0
- package/.github/pull_request_template.md +38 -0
- package/.github/release.yml +14 -0
- package/.github/workflows/check-extensions.yml +72 -0
- package/.github/workflows/npm-publish.yml +89 -0
- package/.github/workflows/pr.yml +30 -0
- package/.github/workflows/release.yml +24 -0
- package/.github/workflows/tests.yml +25 -0
- package/.github/workflows/vulnerability-scan.yml +87 -0
- package/.release-please-manifest.json +3 -0
- package/AGENTS.md +132 -0
- package/README.md +10 -0
- package/analysis_results.md +162 -0
- package/marketplaces/large-codebase.json +66 -0
- package/marketplaces/openhands-extensions.json +682 -0
- package/package.json +4 -10
- package/plugins/README.md +30 -0
- package/plugins/city-weather/.plugin/plugin.json +13 -0
- package/plugins/city-weather/README.md +145 -0
- package/plugins/city-weather/commands/now.md +56 -0
- package/plugins/cobol-modernization/.plugin/plugin.json +19 -0
- package/plugins/cobol-modernization/README.md +201 -0
- package/plugins/cobol-modernization/references/troubleshooting.md +18 -0
- package/plugins/cobol-modernization/skills/build-setup/SKILL.md +78 -0
- package/plugins/cobol-modernization/skills/build-setup/scripts/install-gnucobol.sh +32 -0
- package/plugins/cobol-modernization/skills/cobol-modernization-overview/SKILL.md +113 -0
- package/plugins/cobol-modernization/skills/mainfraime-removal/SKILL.md +62 -0
- package/plugins/cobol-modernization/skills/mainfraime-removal/references/cics-transformation-examples.md +45 -0
- package/plugins/cobol-modernization/skills/mainframe-planning/SKILL.md +78 -0
- package/plugins/cobol-modernization/skills/to-java-migration/SKILL.md +59 -0
- package/plugins/cobol-modernization/skills/to-java-migration/references/cobol-to-java-example.md +58 -0
- package/plugins/cobol-modernization/skills/to-java-migration/references/datatype-mappings.md +19 -0
- package/plugins/issue-duplicate-checker/.plugin/plugin.json +13 -0
- package/plugins/issue-duplicate-checker/README.md +51 -0
- package/plugins/issue-duplicate-checker/action.yml +349 -0
- package/plugins/issue-duplicate-checker/scripts/auto_close_duplicate_issues.py +569 -0
- package/plugins/issue-duplicate-checker/scripts/issue_duplicate_check_openhands.py +681 -0
- package/plugins/issue-duplicate-checker/scripts/post_duplicate_notice.js +220 -0
- package/plugins/issue-duplicate-checker/scripts/remove_duplicate_candidate_label.js +27 -0
- package/plugins/magic-test/.plugin/plugin.json +13 -0
- package/plugins/magic-test/skills/magic-word/SKILL.md +33 -0
- package/plugins/migration-scoring/.plugin/plugin.json +19 -0
- package/plugins/migration-scoring/README.md +244 -0
- package/plugins/migration-scoring/skills/migration-mapping/SKILL.md +72 -0
- package/plugins/migration-scoring/skills/migration-report/SKILL.md +118 -0
- package/plugins/migration-scoring/skills/migration-scoring-overview/SKILL.md +126 -0
- package/plugins/migration-scoring/skills/score-quality/SKILL.md +54 -0
- package/plugins/migration-scoring/skills/score-quality/references/scoring-criteria.md +30 -0
- package/plugins/migration-scoring/skills/score-style/SKILL.md +106 -0
- package/plugins/onboarding/.plugin/plugin.json +20 -0
- package/plugins/onboarding/README.md +30 -0
- package/plugins/onboarding/references/criteria.md +144 -0
- package/plugins/onboarding/skills/agent-readiness-report/README.md +23 -0
- package/plugins/onboarding/skills/agent-readiness-report/SKILL.md +122 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_agent_instructions.sh +88 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_build_env.sh +114 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_feedback_loops.sh +133 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_policy.sh +113 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_workflows.sh +127 -0
- package/plugins/onboarding/skills/improve-agent-readiness/README.md +19 -0
- package/plugins/onboarding/skills/improve-agent-readiness/SKILL.md +167 -0
- package/plugins/onboarding/skills/setup-agents-md/README.md +15 -0
- package/plugins/onboarding/skills/setup-agents-md/SKILL.md +150 -0
- package/plugins/onboarding/skills/setup-openhands/README.md +20 -0
- package/plugins/onboarding/skills/setup-openhands/SKILL.md +56 -0
- package/plugins/onboarding/skills/setup-pr-review/README.md +23 -0
- package/plugins/onboarding/skills/setup-pr-review/SKILL.md +72 -0
- package/plugins/openhands/.plugin/plugin.json +13 -0
- package/plugins/openhands/README.md +52 -0
- package/plugins/openhands/SKILL.md +61 -0
- package/plugins/openhands/commands/create.md +55 -0
- package/plugins/openhands/commands/openhands-cloud.md +8 -0
- package/plugins/openhands/scripts/run.sh +69 -0
- package/plugins/pr-review/.plugin/plugin.json +13 -0
- package/plugins/pr-review/README.md +393 -0
- package/plugins/pr-review/action.yml +298 -0
- package/plugins/pr-review/scripts/agent_script.py +1282 -0
- package/plugins/pr-review/scripts/evaluate_review.py +655 -0
- package/plugins/pr-review/scripts/prompt.py +260 -0
- package/plugins/pr-review/workflows/pr-review-by-openhands.yml +51 -0
- package/plugins/pr-review/workflows/pr-review-evaluation.yml +85 -0
- package/plugins/qa-changes/.plugin/plugin.json +11 -0
- package/plugins/qa-changes/README.md +185 -0
- package/plugins/qa-changes/action.yml +181 -0
- package/plugins/qa-changes/scripts/agent_script.py +406 -0
- package/plugins/qa-changes/scripts/evaluate_qa_changes.py +385 -0
- package/plugins/qa-changes/scripts/prompt.py +174 -0
- package/plugins/qa-changes/workflows/qa-changes-by-openhands.yml +50 -0
- package/plugins/qa-changes/workflows/qa-changes-evaluation.yml +85 -0
- package/plugins/release-notes/.plugin/plugin.json +19 -0
- package/plugins/release-notes/README.md +283 -0
- package/plugins/release-notes/SKILL.md +83 -0
- package/plugins/release-notes/action.yml +117 -0
- package/plugins/release-notes/commands/release-notes.md +8 -0
- package/plugins/release-notes/scripts/agent_script.py +292 -0
- package/plugins/release-notes/scripts/generate_release_notes.py +733 -0
- package/plugins/release-notes/scripts/prompt.py +90 -0
- package/plugins/release-notes/scripts/validate_release_notes.py +328 -0
- package/plugins/release-notes/workflows/release-notes.yml +76 -0
- package/plugins/vulnerability-remediation/.plugin/plugin.json +19 -0
- package/plugins/vulnerability-remediation/README.md +217 -0
- package/plugins/vulnerability-remediation/action.yml +187 -0
- package/plugins/vulnerability-remediation/scripts/scan_and_remediate.py +561 -0
- package/plugins/vulnerability-remediation/workflows/vulnerability-scan.yml +87 -0
- package/pyproject.toml +12 -0
- package/release-please-config.json +16 -0
- package/scripts/sync_extensions.py +494 -0
- package/scripts/sync_openhands_sdk_skill.py +264 -0
- package/skills/README.md +159 -0
- package/skills/add-javadoc/.plugin/plugin.json +18 -0
- package/skills/add-javadoc/README.md +40 -0
- package/skills/add-javadoc/SKILL.md +35 -0
- package/skills/add-javadoc/references/example.md +32 -0
- package/skills/add-skill/.plugin/plugin.json +18 -0
- package/skills/add-skill/README.md +67 -0
- package/skills/add-skill/SKILL.md +47 -0
- package/skills/add-skill/scripts/fetch_skill.py +259 -0
- package/skills/agent-creator/.plugin/plugin.json +20 -0
- package/skills/agent-creator/README.md +104 -0
- package/skills/agent-creator/SKILL.md +190 -0
- package/skills/agent-creator/commands/agent-creator.md +8 -0
- package/skills/agent-creator/references/fallback.md +117 -0
- package/skills/agent-memory/.plugin/plugin.json +18 -0
- package/skills/agent-memory/README.md +35 -0
- package/skills/agent-memory/SKILL.md +30 -0
- package/skills/agent-memory/commands/remember.md +8 -0
- package/skills/agent-sdk-builder/.plugin/plugin.json +18 -0
- package/skills/agent-sdk-builder/README.md +40 -0
- package/skills/agent-sdk-builder/SKILL.md +37 -0
- package/skills/agent-sdk-builder/commands/agent-builder.md +8 -0
- package/skills/azure-devops/.plugin/plugin.json +18 -0
- package/skills/azure-devops/README.md +55 -0
- package/skills/azure-devops/SKILL.md +50 -0
- package/skills/bitbucket/.plugin/plugin.json +17 -0
- package/skills/bitbucket/README.md +50 -0
- package/skills/bitbucket/SKILL.md +45 -0
- package/skills/code-review/.plugin/plugin.json +19 -0
- package/skills/code-review/README.md +18 -0
- package/skills/code-review/SKILL.md +208 -0
- package/skills/code-review/commands/codereview-roasted.md +8 -0
- package/skills/code-review/commands/codereview.md +8 -0
- package/skills/code-review/references/risk-evaluation.md +41 -0
- package/skills/code-review/references/supply-chain-security.md +31 -0
- package/skills/code-simplifier/.plugin/plugin.json +21 -0
- package/skills/code-simplifier/README.md +30 -0
- package/skills/code-simplifier/SKILL.md +91 -0
- package/skills/code-simplifier/commands/simplify.md +8 -0
- package/skills/code-simplifier/references/code-quality-review.md +86 -0
- package/skills/code-simplifier/references/code-reuse-review.md +63 -0
- package/skills/code-simplifier/references/efficiency-review.md +81 -0
- package/skills/datadog/.plugin/plugin.json +19 -0
- package/skills/datadog/README.md +100 -0
- package/skills/datadog/SKILL.md +95 -0
- package/skills/deno/.plugin/plugin.json +18 -0
- package/skills/deno/README.md +5 -0
- package/skills/deno/SKILL.md +99 -0
- package/skills/deno/references/README.md +6 -0
- package/skills/discord/.plugin/plugin.json +18 -0
- package/skills/discord/README.md +31 -0
- package/skills/discord/SKILL.md +109 -0
- package/skills/discord/__init__.py +0 -0
- package/skills/discord/references/REFERENCE.md +78 -0
- package/skills/discord/scripts/__init__.py +0 -0
- package/skills/discord/scripts/_http.py +127 -0
- package/skills/discord/scripts/post_webhook.py +106 -0
- package/skills/discord/scripts/send_message.py +102 -0
- package/skills/docker/.plugin/plugin.json +17 -0
- package/skills/docker/README.md +34 -0
- package/skills/docker/SKILL.md +29 -0
- package/skills/evidence-based-citations/.plugin/plugin.json +20 -0
- package/skills/evidence-based-citations/README.md +31 -0
- package/skills/evidence-based-citations/SKILL.md +59 -0
- package/skills/flarglebargle/.plugin/plugin.json +16 -0
- package/skills/flarglebargle/README.md +14 -0
- package/skills/flarglebargle/SKILL.md +9 -0
- package/skills/frontend-design/.plugin/plugin.json +21 -0
- package/skills/frontend-design/LICENSE.txt +177 -0
- package/skills/frontend-design/README.md +42 -0
- package/skills/frontend-design/SKILL.md +42 -0
- package/skills/github/.plugin/plugin.json +19 -0
- package/skills/github/README.md +42 -0
- package/skills/github/SKILL.md +106 -0
- package/skills/github-pr-review/.plugin/plugin.json +18 -0
- package/skills/github-pr-review/README.md +145 -0
- package/skills/github-pr-review/SKILL.md +148 -0
- package/skills/github-pr-review/commands/github-pr-review.md +8 -0
- package/skills/github-pr-reviewer/.plugin/plugin.json +20 -0
- package/skills/github-pr-reviewer/README.md +34 -0
- package/skills/github-pr-reviewer/SKILL.md +89 -0
- package/skills/github-pr-reviewer/commands/pr-reviewer:setup.md +8 -0
- package/skills/github-repo-monitor/.plugin/plugin.json +22 -0
- package/skills/github-repo-monitor/README.md +70 -0
- package/skills/github-repo-monitor/SKILL.md +316 -0
- package/skills/github-repo-monitor/commands/github-monitor:poll.md +8 -0
- package/skills/github-repo-monitor/references/github-api.md +241 -0
- package/skills/github-repo-monitor/references/state-schema.md +160 -0
- package/skills/github-repo-monitor/scripts/main.py +915 -0
- package/skills/github-repo-monitor/tests/test_main.py +400 -0
- package/skills/gitlab/.plugin/plugin.json +17 -0
- package/skills/gitlab/README.md +37 -0
- package/skills/gitlab/SKILL.md +32 -0
- package/skills/incident-retrospective/.plugin/plugin.json +21 -0
- package/skills/incident-retrospective/README.md +34 -0
- package/skills/incident-retrospective/SKILL.md +98 -0
- package/skills/incident-retrospective/commands/incident-retro:setup.md +8 -0
- package/skills/iterate/.plugin/plugin.json +13 -0
- package/skills/iterate/README.md +25 -0
- package/skills/iterate/SKILL.md +399 -0
- package/skills/iterate/commands/babysit.md +8 -0
- package/skills/iterate/commands/iterate.md +8 -0
- package/skills/iterate/commands/verify.md +8 -0
- package/skills/iterate/references/heuristics.md +58 -0
- package/skills/iterate/references/verification.md +96 -0
- package/skills/jupyter/.plugin/plugin.json +18 -0
- package/skills/jupyter/README.md +55 -0
- package/skills/jupyter/SKILL.md +50 -0
- package/skills/kubernetes/.plugin/plugin.json +18 -0
- package/skills/kubernetes/README.md +53 -0
- package/skills/kubernetes/SKILL.md +48 -0
- package/skills/learn-from-code-review/.plugin/plugin.json +19 -0
- package/skills/learn-from-code-review/README.md +64 -0
- package/skills/learn-from-code-review/SKILL.md +186 -0
- package/skills/learn-from-code-review/commands/learn-from-reviews.md +8 -0
- package/skills/linear/.plugin/plugin.json +19 -0
- package/skills/linear/README.md +58 -0
- package/skills/linear/SKILL.md +213 -0
- package/skills/linear-triage/.plugin/plugin.json +21 -0
- package/skills/linear-triage/README.md +34 -0
- package/skills/linear-triage/SKILL.md +91 -0
- package/skills/linear-triage/commands/linear-triage:setup.md +8 -0
- package/skills/notion/.plugin/plugin.json +17 -0
- package/skills/notion/README.md +114 -0
- package/skills/notion/SKILL.md +109 -0
- package/skills/npm/.plugin/plugin.json +17 -0
- package/skills/npm/README.md +14 -0
- package/skills/npm/SKILL.md +9 -0
- package/skills/openhands-api/.plugin/plugin.json +22 -0
- package/skills/openhands-api/README.md +48 -0
- package/skills/openhands-api/SKILL.md +399 -0
- package/skills/openhands-api/references/README.md +33 -0
- package/skills/openhands-api/references/TROUBLESHOOTING.md +81 -0
- package/skills/openhands-api/references/example_prompt.md +12 -0
- package/skills/openhands-api/scripts/openhands_api.py +606 -0
- package/skills/openhands-api/scripts/openhands_api.ts +252 -0
- package/skills/openhands-automation/.plugin/plugin.json +19 -0
- package/skills/openhands-automation/README.md +89 -0
- package/skills/openhands-automation/SKILL.md +875 -0
- package/skills/openhands-automation/commands/automation:create.md +8 -0
- package/skills/openhands-automation/references/ab-testing.md +185 -0
- package/skills/openhands-automation/references/custom-automation.md +644 -0
- package/skills/openhands-sdk/.plugin/plugin.json +20 -0
- package/skills/openhands-sdk/README.md +22 -0
- package/skills/openhands-sdk/SKILL.md +229 -0
- package/skills/openhands-sdk/commands/sdk.md +8 -0
- package/skills/pdflatex/.plugin/plugin.json +18 -0
- package/skills/pdflatex/README.md +39 -0
- package/skills/pdflatex/SKILL.md +34 -0
- package/skills/prd/.plugin/plugin.json +19 -0
- package/skills/prd/README.md +28 -0
- package/skills/prd/SKILL.md +237 -0
- package/skills/prd/commands/prd.md +8 -0
- package/skills/qa-changes/README.md +18 -0
- package/skills/qa-changes/SKILL.md +229 -0
- package/skills/qa-changes/commands/qa-changes.md +8 -0
- package/skills/release-notes/README.md +24 -0
- package/skills/release-notes/SKILL.md +19 -0
- package/skills/release-notes/commands/release-notes.md +8 -0
- package/skills/research-brief/.plugin/plugin.json +20 -0
- package/skills/research-brief/README.md +34 -0
- package/skills/research-brief/SKILL.md +99 -0
- package/skills/research-brief/commands/research-brief:setup.md +8 -0
- package/skills/security/.plugin/plugin.json +18 -0
- package/skills/security/README.md +38 -0
- package/skills/security/SKILL.md +33 -0
- package/skills/skill-creator/.plugin/plugin.json +17 -0
- package/skills/skill-creator/LICENSE.txt +202 -0
- package/skills/skill-creator/README.md +182 -0
- package/skills/skill-creator/SKILL.md +545 -0
- package/skills/skill-creator/references/output-patterns.md +82 -0
- package/skills/skill-creator/references/workflows.md +28 -0
- package/skills/skill-creator/scripts/init_skill.py +303 -0
- package/skills/skill-creator/scripts/quick_validate.py +95 -0
- package/skills/slack-channel-monitor/.plugin/plugin.json +21 -0
- package/skills/slack-channel-monitor/README.md +91 -0
- package/skills/slack-channel-monitor/SKILL.md +276 -0
- package/skills/slack-channel-monitor/commands/slack-monitor:poll.md +8 -0
- package/skills/slack-channel-monitor/references/slack-api.md +207 -0
- package/skills/slack-channel-monitor/references/state-schema.md +180 -0
- package/skills/slack-channel-monitor/scripts/main.py +962 -0
- package/skills/slack-standup-digest/.plugin/plugin.json +21 -0
- package/skills/slack-standup-digest/README.md +34 -0
- package/skills/slack-standup-digest/SKILL.md +92 -0
- package/skills/slack-standup-digest/commands/standup-digest:setup.md +8 -0
- package/skills/spark-version-upgrade/.plugin/plugin.json +20 -0
- package/skills/spark-version-upgrade/README.md +54 -0
- package/skills/spark-version-upgrade/SKILL.md +233 -0
- package/skills/ssh/.plugin/plugin.json +18 -0
- package/skills/ssh/README.md +140 -0
- package/skills/ssh/SKILL.md +135 -0
- package/skills/swift-linux/.plugin/plugin.json +17 -0
- package/skills/swift-linux/README.md +86 -0
- package/skills/swift-linux/SKILL.md +81 -0
- package/skills/theme-factory/.plugin/plugin.json +19 -0
- package/skills/theme-factory/LICENSE.txt +202 -0
- package/skills/theme-factory/README.md +58 -0
- package/skills/theme-factory/SKILL.md +59 -0
- package/skills/theme-factory/theme-showcase.pdf +0 -0
- package/skills/theme-factory/themes/arctic-frost.md +19 -0
- package/skills/theme-factory/themes/botanical-garden.md +19 -0
- package/skills/theme-factory/themes/desert-rose.md +19 -0
- package/skills/theme-factory/themes/forest-canopy.md +19 -0
- package/skills/theme-factory/themes/golden-hour.md +19 -0
- package/skills/theme-factory/themes/midnight-galaxy.md +19 -0
- package/skills/theme-factory/themes/modern-minimalist.md +19 -0
- package/skills/theme-factory/themes/ocean-depths.md +19 -0
- package/skills/theme-factory/themes/sunset-boulevard.md +19 -0
- package/skills/theme-factory/themes/tech-innovation.md +19 -0
- package/skills/uv/.plugin/plugin.json +18 -0
- package/skills/uv/README.md +5 -0
- package/skills/uv/SKILL.md +95 -0
- package/skills/uv/references/README.md +5 -0
- package/skills/vercel/.plugin/plugin.json +18 -0
- package/skills/vercel/README.md +108 -0
- package/skills/vercel/SKILL.md +103 -0
- package/tests/test_add_skill_installs_to_agents_dir.py +42 -0
- package/tests/test_catalogs.py +109 -0
- package/tests/test_code_review_risk_evaluation.py +94 -0
- package/tests/test_issue_duplicate_checker.py +240 -0
- package/tests/test_openhands_api_python.py +152 -0
- package/tests/test_plugin_manifest.py +83 -0
- package/tests/test_pr_review_diff_payload.py +202 -0
- package/tests/test_pr_review_feedback.py +263 -0
- package/tests/test_pr_review_prompt.py +152 -0
- package/tests/test_pr_review_review_context.py +253 -0
- package/tests/test_qa_changes.py +232 -0
- package/tests/test_qa_changes_evaluation.py +259 -0
- package/tests/test_release_notes_generator.py +990 -0
- package/tests/test_sdk_loading.py +150 -0
- package/tests/test_skill_plugin_loading.py +149 -0
- package/tests/test_skills_have_readme.py +66 -0
- package/tests/test_sync_extensions.py +292 -0
- package/tests/test_workflow_sync.py +46 -0
- package/utils/analysis/README.md +7 -0
- package/utils/analysis/laminar_signals/README.md +211 -0
- package/utils/analysis/laminar_signals/analyze.py +780 -0
- package/utils/analysis/laminar_signals/templates/default.j2 +49 -0
- package/utils/analysis/laminar_signals/templates/pr_review.j2 +61 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Tests for the qa-changes evaluation script (evaluate_qa_changes.py)."""
|
|
2
|
+
|
|
3
|
+
import importlib.util
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
import types
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
_SCRIPTS_DIR = (
|
|
12
|
+
Path(__file__).parent.parent / "plugins" / "qa-changes" / "scripts"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _load_eval_module():
|
|
17
|
+
"""Load evaluate_qa_changes.py, stubbing out lmnr which needs a project key."""
|
|
18
|
+
lmnr_mod = types.ModuleType("lmnr")
|
|
19
|
+
|
|
20
|
+
class _FakeLaminar:
|
|
21
|
+
@staticmethod
|
|
22
|
+
def initialize():
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def get_trace_id():
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def get_laminar_span_context():
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def set_trace_metadata(meta):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def set_span_output(output):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def flush():
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def start_as_current_span(**kwargs):
|
|
47
|
+
import contextlib
|
|
48
|
+
return contextlib.nullcontext()
|
|
49
|
+
|
|
50
|
+
class _FakeClient:
|
|
51
|
+
class evaluators:
|
|
52
|
+
@staticmethod
|
|
53
|
+
def score(**kwargs):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
class tags:
|
|
57
|
+
@staticmethod
|
|
58
|
+
def tag(trace_id, tags):
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
lmnr_mod.Laminar = _FakeLaminar
|
|
62
|
+
lmnr_mod.LaminarClient = _FakeClient
|
|
63
|
+
|
|
64
|
+
saved = sys.modules.get("lmnr")
|
|
65
|
+
sys.modules["lmnr"] = lmnr_mod
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
path = _SCRIPTS_DIR / "evaluate_qa_changes.py"
|
|
69
|
+
spec = importlib.util.spec_from_file_location("evaluate_qa_changes", path)
|
|
70
|
+
module = importlib.util.module_from_spec(spec)
|
|
71
|
+
sys.modules[spec.name] = module
|
|
72
|
+
spec.loader.exec_module(module)
|
|
73
|
+
return module
|
|
74
|
+
finally:
|
|
75
|
+
if saved is None:
|
|
76
|
+
sys.modules.pop("lmnr", None)
|
|
77
|
+
else:
|
|
78
|
+
sys.modules["lmnr"] = saved
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@pytest.fixture(scope="module")
|
|
82
|
+
def eval_mod():
|
|
83
|
+
return _load_eval_module()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ===================================================================
|
|
87
|
+
# extract_qa_report
|
|
88
|
+
# ===================================================================
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TestExtractQaReport:
|
|
92
|
+
def test_extracts_agent_comments(self, eval_mod):
|
|
93
|
+
comments = [
|
|
94
|
+
{"user": {"login": "openhands-agent"}, "id": 1, "body": "QA report", "created_at": "2024-01-01"},
|
|
95
|
+
{"user": {"login": "human-dev"}, "id": 2, "body": "looks good", "created_at": "2024-01-02"},
|
|
96
|
+
{"user": {"login": "all-hands-bot"}, "id": 3, "body": "another report", "created_at": "2024-01-03"},
|
|
97
|
+
]
|
|
98
|
+
result = eval_mod.extract_qa_report(comments)
|
|
99
|
+
assert len(result) == 2
|
|
100
|
+
assert result[0]["id"] == 1
|
|
101
|
+
assert result[0]["type"] == "qa_report"
|
|
102
|
+
assert result[1]["id"] == 3
|
|
103
|
+
|
|
104
|
+
def test_empty_comments(self, eval_mod):
|
|
105
|
+
assert eval_mod.extract_qa_report([]) == []
|
|
106
|
+
|
|
107
|
+
def test_no_agent_comments(self, eval_mod):
|
|
108
|
+
comments = [
|
|
109
|
+
{"user": {"login": "human"}, "id": 1, "body": "test", "created_at": "2024-01-01"},
|
|
110
|
+
]
|
|
111
|
+
assert eval_mod.extract_qa_report(comments) == []
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ===================================================================
|
|
115
|
+
# extract_human_responses
|
|
116
|
+
# ===================================================================
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class TestExtractHumanResponses:
|
|
120
|
+
def test_extracts_human_comments(self, eval_mod):
|
|
121
|
+
comments = [
|
|
122
|
+
{"user": {"login": "openhands-agent"}, "id": 1, "body": "QA report", "created_at": "2024-01-01"},
|
|
123
|
+
{"user": {"login": "dev-alice"}, "id": 2, "body": "thanks", "created_at": "2024-01-02"},
|
|
124
|
+
{"user": {"login": "dev-bob"}, "id": 3, "body": "agreed", "created_at": "2024-01-03"},
|
|
125
|
+
]
|
|
126
|
+
result = eval_mod.extract_human_responses(comments)
|
|
127
|
+
assert len(result) == 2
|
|
128
|
+
assert result[0]["user"] == "dev-alice"
|
|
129
|
+
assert result[1]["user"] == "dev-bob"
|
|
130
|
+
|
|
131
|
+
def test_empty_comments(self, eval_mod):
|
|
132
|
+
assert eval_mod.extract_human_responses([]) == []
|
|
133
|
+
|
|
134
|
+
def test_all_agent_comments(self, eval_mod):
|
|
135
|
+
comments = [
|
|
136
|
+
{"user": {"login": "openhands-agent"}, "id": 1, "body": "report", "created_at": "2024-01-01"},
|
|
137
|
+
]
|
|
138
|
+
assert eval_mod.extract_human_responses(comments) == []
|
|
139
|
+
|
|
140
|
+
def test_custom_agent_users(self, eval_mod):
|
|
141
|
+
comments = [
|
|
142
|
+
{"user": {"login": "my-bot"}, "id": 1, "body": "report", "created_at": "2024-01-01"},
|
|
143
|
+
{"user": {"login": "human"}, "id": 2, "body": "ok", "created_at": "2024-01-02"},
|
|
144
|
+
]
|
|
145
|
+
result = eval_mod.extract_human_responses(comments, agent_users={"my-bot"})
|
|
146
|
+
assert len(result) == 1
|
|
147
|
+
assert result[0]["user"] == "human"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# ===================================================================
|
|
151
|
+
# truncate_text
|
|
152
|
+
# ===================================================================
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class TestTruncateText:
|
|
156
|
+
def test_short_text_unchanged(self, eval_mod):
|
|
157
|
+
assert eval_mod.truncate_text("hello") == "hello"
|
|
158
|
+
|
|
159
|
+
def test_exact_limit(self, eval_mod):
|
|
160
|
+
text = "x" * 100
|
|
161
|
+
assert eval_mod.truncate_text(text, max_chars=100) == text
|
|
162
|
+
|
|
163
|
+
def test_over_limit(self, eval_mod):
|
|
164
|
+
text = "x" * 200
|
|
165
|
+
result = eval_mod.truncate_text(text, max_chars=100)
|
|
166
|
+
assert result.startswith("x" * 100)
|
|
167
|
+
assert "truncated" in result
|
|
168
|
+
assert "200" in result
|
|
169
|
+
|
|
170
|
+
def test_default_limit_is_50k(self, eval_mod):
|
|
171
|
+
text = "x" * 50000
|
|
172
|
+
assert eval_mod.truncate_text(text) == text
|
|
173
|
+
text_over = "x" * 50001
|
|
174
|
+
assert "truncated" in eval_mod.truncate_text(text_over)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ===================================================================
|
|
178
|
+
# calculate_engagement_score
|
|
179
|
+
# ===================================================================
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class TestCalculateEngagementScore:
|
|
183
|
+
def test_no_comments_no_merge(self, eval_mod):
|
|
184
|
+
score = eval_mod.calculate_engagement_score([], [], False)
|
|
185
|
+
assert score == 0.0
|
|
186
|
+
|
|
187
|
+
def test_qa_report_posted_no_responses(self, eval_mod):
|
|
188
|
+
qa = [{"type": "qa_report", "body": "report"}]
|
|
189
|
+
score = eval_mod.calculate_engagement_score(qa, [], False)
|
|
190
|
+
assert score == pytest.approx(0.3)
|
|
191
|
+
|
|
192
|
+
def test_qa_report_with_responses(self, eval_mod):
|
|
193
|
+
qa = [{"type": "qa_report", "body": "report"}]
|
|
194
|
+
human = [{"type": "issue_comment", "body": "thanks"}]
|
|
195
|
+
score = eval_mod.calculate_engagement_score(qa, human, False)
|
|
196
|
+
# 0.3 (report) + 1.0 * 0.2 (response ratio capped at 1.0)
|
|
197
|
+
assert score == pytest.approx(0.5)
|
|
198
|
+
|
|
199
|
+
def test_merged_bonus(self, eval_mod):
|
|
200
|
+
score = eval_mod.calculate_engagement_score([], [], True)
|
|
201
|
+
assert score == pytest.approx(0.3)
|
|
202
|
+
|
|
203
|
+
def test_full_engagement(self, eval_mod):
|
|
204
|
+
qa = [{"type": "qa_report", "body": "report"}]
|
|
205
|
+
human = [{"type": "issue_comment", "body": "thanks"}]
|
|
206
|
+
score = eval_mod.calculate_engagement_score(qa, human, True)
|
|
207
|
+
# 0.3 (report) + 0.2 (response) + 0.3 (merged)
|
|
208
|
+
assert score == pytest.approx(0.8)
|
|
209
|
+
|
|
210
|
+
def test_many_responses_capped(self, eval_mod):
|
|
211
|
+
qa = [{"type": "qa_report", "body": "report"}]
|
|
212
|
+
human = [{"body": f"msg {i}"} for i in range(10)]
|
|
213
|
+
score = eval_mod.calculate_engagement_score(qa, human, False)
|
|
214
|
+
# ratio capped at 1.0 → 0.3 + 0.2
|
|
215
|
+
assert score == pytest.approx(0.5)
|
|
216
|
+
|
|
217
|
+
def test_multiple_qa_comments_with_fewer_responses(self, eval_mod):
|
|
218
|
+
qa = [{"body": f"qa {i}"} for i in range(4)]
|
|
219
|
+
human = [{"body": "reply"}]
|
|
220
|
+
score = eval_mod.calculate_engagement_score(qa, human, False)
|
|
221
|
+
# 0.3 + (1/4) * 0.2 = 0.35
|
|
222
|
+
assert score == pytest.approx(0.35)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ===================================================================
|
|
226
|
+
# load_trace_info
|
|
227
|
+
# ===================================================================
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class TestLoadTraceInfo:
|
|
231
|
+
def test_file_not_found_returns_empty(self, eval_mod, tmp_path):
|
|
232
|
+
result = eval_mod.load_trace_info(str(tmp_path / "nonexistent.json"))
|
|
233
|
+
assert result == {}
|
|
234
|
+
|
|
235
|
+
def test_loads_valid_trace_file(self, eval_mod, tmp_path):
|
|
236
|
+
trace_data = {
|
|
237
|
+
"trace_id": "abc-123",
|
|
238
|
+
"span_context": {"trace_id": "abc", "span_id": "def"},
|
|
239
|
+
"pr_number": "42",
|
|
240
|
+
"repo_name": "org/repo",
|
|
241
|
+
"commit_id": "deadbeef",
|
|
242
|
+
"model": "claude-sonnet",
|
|
243
|
+
}
|
|
244
|
+
trace_file = tmp_path / "trace.json"
|
|
245
|
+
trace_file.write_text(json.dumps(trace_data))
|
|
246
|
+
|
|
247
|
+
result = eval_mod.load_trace_info(str(trace_file))
|
|
248
|
+
assert result["trace_id"] == "abc-123"
|
|
249
|
+
assert result["pr_number"] == "42"
|
|
250
|
+
assert result["span_context"]["trace_id"] == "abc"
|
|
251
|
+
|
|
252
|
+
def test_trace_without_span_context(self, eval_mod, tmp_path):
|
|
253
|
+
trace_data = {"trace_id": "abc-123"}
|
|
254
|
+
trace_file = tmp_path / "trace.json"
|
|
255
|
+
trace_file.write_text(json.dumps(trace_data))
|
|
256
|
+
|
|
257
|
+
result = eval_mod.load_trace_info(str(trace_file))
|
|
258
|
+
assert result["trace_id"] == "abc-123"
|
|
259
|
+
assert result.get("span_context") is None
|