@openhands/extensions 0.0.1-alpha → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/custom-codereview-guide.md +25 -0
- package/.github/pull_request_template.md +38 -0
- package/.github/release.yml +14 -0
- package/.github/workflows/check-extensions.yml +72 -0
- package/.github/workflows/npm-publish.yml +89 -0
- package/.github/workflows/pr.yml +30 -0
- package/.github/workflows/release.yml +24 -0
- package/.github/workflows/tests.yml +25 -0
- package/.github/workflows/vulnerability-scan.yml +87 -0
- package/.release-please-manifest.json +3 -0
- package/AGENTS.md +132 -0
- package/README.md +10 -0
- package/analysis_results.md +162 -0
- package/marketplaces/large-codebase.json +66 -0
- package/marketplaces/openhands-extensions.json +682 -0
- package/package.json +4 -10
- package/plugins/README.md +30 -0
- package/plugins/city-weather/.plugin/plugin.json +13 -0
- package/plugins/city-weather/README.md +145 -0
- package/plugins/city-weather/commands/now.md +56 -0
- package/plugins/cobol-modernization/.plugin/plugin.json +19 -0
- package/plugins/cobol-modernization/README.md +201 -0
- package/plugins/cobol-modernization/references/troubleshooting.md +18 -0
- package/plugins/cobol-modernization/skills/build-setup/SKILL.md +78 -0
- package/plugins/cobol-modernization/skills/build-setup/scripts/install-gnucobol.sh +32 -0
- package/plugins/cobol-modernization/skills/cobol-modernization-overview/SKILL.md +113 -0
- package/plugins/cobol-modernization/skills/mainfraime-removal/SKILL.md +62 -0
- package/plugins/cobol-modernization/skills/mainfraime-removal/references/cics-transformation-examples.md +45 -0
- package/plugins/cobol-modernization/skills/mainframe-planning/SKILL.md +78 -0
- package/plugins/cobol-modernization/skills/to-java-migration/SKILL.md +59 -0
- package/plugins/cobol-modernization/skills/to-java-migration/references/cobol-to-java-example.md +58 -0
- package/plugins/cobol-modernization/skills/to-java-migration/references/datatype-mappings.md +19 -0
- package/plugins/issue-duplicate-checker/.plugin/plugin.json +13 -0
- package/plugins/issue-duplicate-checker/README.md +51 -0
- package/plugins/issue-duplicate-checker/action.yml +349 -0
- package/plugins/issue-duplicate-checker/scripts/auto_close_duplicate_issues.py +569 -0
- package/plugins/issue-duplicate-checker/scripts/issue_duplicate_check_openhands.py +681 -0
- package/plugins/issue-duplicate-checker/scripts/post_duplicate_notice.js +220 -0
- package/plugins/issue-duplicate-checker/scripts/remove_duplicate_candidate_label.js +27 -0
- package/plugins/magic-test/.plugin/plugin.json +13 -0
- package/plugins/magic-test/skills/magic-word/SKILL.md +33 -0
- package/plugins/migration-scoring/.plugin/plugin.json +19 -0
- package/plugins/migration-scoring/README.md +244 -0
- package/plugins/migration-scoring/skills/migration-mapping/SKILL.md +72 -0
- package/plugins/migration-scoring/skills/migration-report/SKILL.md +118 -0
- package/plugins/migration-scoring/skills/migration-scoring-overview/SKILL.md +126 -0
- package/plugins/migration-scoring/skills/score-quality/SKILL.md +54 -0
- package/plugins/migration-scoring/skills/score-quality/references/scoring-criteria.md +30 -0
- package/plugins/migration-scoring/skills/score-style/SKILL.md +106 -0
- package/plugins/onboarding/.plugin/plugin.json +20 -0
- package/plugins/onboarding/README.md +30 -0
- package/plugins/onboarding/references/criteria.md +144 -0
- package/plugins/onboarding/skills/agent-readiness-report/README.md +23 -0
- package/plugins/onboarding/skills/agent-readiness-report/SKILL.md +122 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_agent_instructions.sh +88 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_build_env.sh +114 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_feedback_loops.sh +133 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_policy.sh +113 -0
- package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_workflows.sh +127 -0
- package/plugins/onboarding/skills/improve-agent-readiness/README.md +19 -0
- package/plugins/onboarding/skills/improve-agent-readiness/SKILL.md +167 -0
- package/plugins/onboarding/skills/setup-agents-md/README.md +15 -0
- package/plugins/onboarding/skills/setup-agents-md/SKILL.md +150 -0
- package/plugins/onboarding/skills/setup-openhands/README.md +20 -0
- package/plugins/onboarding/skills/setup-openhands/SKILL.md +56 -0
- package/plugins/onboarding/skills/setup-pr-review/README.md +23 -0
- package/plugins/onboarding/skills/setup-pr-review/SKILL.md +72 -0
- package/plugins/openhands/.plugin/plugin.json +13 -0
- package/plugins/openhands/README.md +52 -0
- package/plugins/openhands/SKILL.md +61 -0
- package/plugins/openhands/commands/create.md +55 -0
- package/plugins/openhands/commands/openhands-cloud.md +8 -0
- package/plugins/openhands/scripts/run.sh +69 -0
- package/plugins/pr-review/.plugin/plugin.json +13 -0
- package/plugins/pr-review/README.md +393 -0
- package/plugins/pr-review/action.yml +298 -0
- package/plugins/pr-review/scripts/agent_script.py +1282 -0
- package/plugins/pr-review/scripts/evaluate_review.py +655 -0
- package/plugins/pr-review/scripts/prompt.py +260 -0
- package/plugins/pr-review/workflows/pr-review-by-openhands.yml +51 -0
- package/plugins/pr-review/workflows/pr-review-evaluation.yml +85 -0
- package/plugins/qa-changes/.plugin/plugin.json +11 -0
- package/plugins/qa-changes/README.md +185 -0
- package/plugins/qa-changes/action.yml +181 -0
- package/plugins/qa-changes/scripts/agent_script.py +406 -0
- package/plugins/qa-changes/scripts/evaluate_qa_changes.py +385 -0
- package/plugins/qa-changes/scripts/prompt.py +174 -0
- package/plugins/qa-changes/workflows/qa-changes-by-openhands.yml +50 -0
- package/plugins/qa-changes/workflows/qa-changes-evaluation.yml +85 -0
- package/plugins/release-notes/.plugin/plugin.json +19 -0
- package/plugins/release-notes/README.md +283 -0
- package/plugins/release-notes/SKILL.md +83 -0
- package/plugins/release-notes/action.yml +117 -0
- package/plugins/release-notes/commands/release-notes.md +8 -0
- package/plugins/release-notes/scripts/agent_script.py +292 -0
- package/plugins/release-notes/scripts/generate_release_notes.py +733 -0
- package/plugins/release-notes/scripts/prompt.py +90 -0
- package/plugins/release-notes/scripts/validate_release_notes.py +328 -0
- package/plugins/release-notes/workflows/release-notes.yml +76 -0
- package/plugins/vulnerability-remediation/.plugin/plugin.json +19 -0
- package/plugins/vulnerability-remediation/README.md +217 -0
- package/plugins/vulnerability-remediation/action.yml +187 -0
- package/plugins/vulnerability-remediation/scripts/scan_and_remediate.py +561 -0
- package/plugins/vulnerability-remediation/workflows/vulnerability-scan.yml +87 -0
- package/pyproject.toml +12 -0
- package/release-please-config.json +16 -0
- package/scripts/sync_extensions.py +494 -0
- package/scripts/sync_openhands_sdk_skill.py +264 -0
- package/skills/README.md +159 -0
- package/skills/add-javadoc/.plugin/plugin.json +18 -0
- package/skills/add-javadoc/README.md +40 -0
- package/skills/add-javadoc/SKILL.md +35 -0
- package/skills/add-javadoc/references/example.md +32 -0
- package/skills/add-skill/.plugin/plugin.json +18 -0
- package/skills/add-skill/README.md +67 -0
- package/skills/add-skill/SKILL.md +47 -0
- package/skills/add-skill/scripts/fetch_skill.py +259 -0
- package/skills/agent-creator/.plugin/plugin.json +20 -0
- package/skills/agent-creator/README.md +104 -0
- package/skills/agent-creator/SKILL.md +190 -0
- package/skills/agent-creator/commands/agent-creator.md +8 -0
- package/skills/agent-creator/references/fallback.md +117 -0
- package/skills/agent-memory/.plugin/plugin.json +18 -0
- package/skills/agent-memory/README.md +35 -0
- package/skills/agent-memory/SKILL.md +30 -0
- package/skills/agent-memory/commands/remember.md +8 -0
- package/skills/agent-sdk-builder/.plugin/plugin.json +18 -0
- package/skills/agent-sdk-builder/README.md +40 -0
- package/skills/agent-sdk-builder/SKILL.md +37 -0
- package/skills/agent-sdk-builder/commands/agent-builder.md +8 -0
- package/skills/azure-devops/.plugin/plugin.json +18 -0
- package/skills/azure-devops/README.md +55 -0
- package/skills/azure-devops/SKILL.md +50 -0
- package/skills/bitbucket/.plugin/plugin.json +17 -0
- package/skills/bitbucket/README.md +50 -0
- package/skills/bitbucket/SKILL.md +45 -0
- package/skills/code-review/.plugin/plugin.json +19 -0
- package/skills/code-review/README.md +18 -0
- package/skills/code-review/SKILL.md +208 -0
- package/skills/code-review/commands/codereview-roasted.md +8 -0
- package/skills/code-review/commands/codereview.md +8 -0
- package/skills/code-review/references/risk-evaluation.md +41 -0
- package/skills/code-review/references/supply-chain-security.md +31 -0
- package/skills/code-simplifier/.plugin/plugin.json +21 -0
- package/skills/code-simplifier/README.md +30 -0
- package/skills/code-simplifier/SKILL.md +91 -0
- package/skills/code-simplifier/commands/simplify.md +8 -0
- package/skills/code-simplifier/references/code-quality-review.md +86 -0
- package/skills/code-simplifier/references/code-reuse-review.md +63 -0
- package/skills/code-simplifier/references/efficiency-review.md +81 -0
- package/skills/datadog/.plugin/plugin.json +19 -0
- package/skills/datadog/README.md +100 -0
- package/skills/datadog/SKILL.md +95 -0
- package/skills/deno/.plugin/plugin.json +18 -0
- package/skills/deno/README.md +5 -0
- package/skills/deno/SKILL.md +99 -0
- package/skills/deno/references/README.md +6 -0
- package/skills/discord/.plugin/plugin.json +18 -0
- package/skills/discord/README.md +31 -0
- package/skills/discord/SKILL.md +109 -0
- package/skills/discord/__init__.py +0 -0
- package/skills/discord/references/REFERENCE.md +78 -0
- package/skills/discord/scripts/__init__.py +0 -0
- package/skills/discord/scripts/_http.py +127 -0
- package/skills/discord/scripts/post_webhook.py +106 -0
- package/skills/discord/scripts/send_message.py +102 -0
- package/skills/docker/.plugin/plugin.json +17 -0
- package/skills/docker/README.md +34 -0
- package/skills/docker/SKILL.md +29 -0
- package/skills/evidence-based-citations/.plugin/plugin.json +20 -0
- package/skills/evidence-based-citations/README.md +31 -0
- package/skills/evidence-based-citations/SKILL.md +59 -0
- package/skills/flarglebargle/.plugin/plugin.json +16 -0
- package/skills/flarglebargle/README.md +14 -0
- package/skills/flarglebargle/SKILL.md +9 -0
- package/skills/frontend-design/.plugin/plugin.json +21 -0
- package/skills/frontend-design/LICENSE.txt +177 -0
- package/skills/frontend-design/README.md +42 -0
- package/skills/frontend-design/SKILL.md +42 -0
- package/skills/github/.plugin/plugin.json +19 -0
- package/skills/github/README.md +42 -0
- package/skills/github/SKILL.md +106 -0
- package/skills/github-pr-review/.plugin/plugin.json +18 -0
- package/skills/github-pr-review/README.md +145 -0
- package/skills/github-pr-review/SKILL.md +148 -0
- package/skills/github-pr-review/commands/github-pr-review.md +8 -0
- package/skills/github-pr-reviewer/.plugin/plugin.json +20 -0
- package/skills/github-pr-reviewer/README.md +34 -0
- package/skills/github-pr-reviewer/SKILL.md +89 -0
- package/skills/github-pr-reviewer/commands/pr-reviewer:setup.md +8 -0
- package/skills/github-repo-monitor/.plugin/plugin.json +22 -0
- package/skills/github-repo-monitor/README.md +70 -0
- package/skills/github-repo-monitor/SKILL.md +316 -0
- package/skills/github-repo-monitor/commands/github-monitor:poll.md +8 -0
- package/skills/github-repo-monitor/references/github-api.md +241 -0
- package/skills/github-repo-monitor/references/state-schema.md +160 -0
- package/skills/github-repo-monitor/scripts/main.py +915 -0
- package/skills/github-repo-monitor/tests/test_main.py +400 -0
- package/skills/gitlab/.plugin/plugin.json +17 -0
- package/skills/gitlab/README.md +37 -0
- package/skills/gitlab/SKILL.md +32 -0
- package/skills/incident-retrospective/.plugin/plugin.json +21 -0
- package/skills/incident-retrospective/README.md +34 -0
- package/skills/incident-retrospective/SKILL.md +98 -0
- package/skills/incident-retrospective/commands/incident-retro:setup.md +8 -0
- package/skills/iterate/.plugin/plugin.json +13 -0
- package/skills/iterate/README.md +25 -0
- package/skills/iterate/SKILL.md +399 -0
- package/skills/iterate/commands/babysit.md +8 -0
- package/skills/iterate/commands/iterate.md +8 -0
- package/skills/iterate/commands/verify.md +8 -0
- package/skills/iterate/references/heuristics.md +58 -0
- package/skills/iterate/references/verification.md +96 -0
- package/skills/jupyter/.plugin/plugin.json +18 -0
- package/skills/jupyter/README.md +55 -0
- package/skills/jupyter/SKILL.md +50 -0
- package/skills/kubernetes/.plugin/plugin.json +18 -0
- package/skills/kubernetes/README.md +53 -0
- package/skills/kubernetes/SKILL.md +48 -0
- package/skills/learn-from-code-review/.plugin/plugin.json +19 -0
- package/skills/learn-from-code-review/README.md +64 -0
- package/skills/learn-from-code-review/SKILL.md +186 -0
- package/skills/learn-from-code-review/commands/learn-from-reviews.md +8 -0
- package/skills/linear/.plugin/plugin.json +19 -0
- package/skills/linear/README.md +58 -0
- package/skills/linear/SKILL.md +213 -0
- package/skills/linear-triage/.plugin/plugin.json +21 -0
- package/skills/linear-triage/README.md +34 -0
- package/skills/linear-triage/SKILL.md +91 -0
- package/skills/linear-triage/commands/linear-triage:setup.md +8 -0
- package/skills/notion/.plugin/plugin.json +17 -0
- package/skills/notion/README.md +114 -0
- package/skills/notion/SKILL.md +109 -0
- package/skills/npm/.plugin/plugin.json +17 -0
- package/skills/npm/README.md +14 -0
- package/skills/npm/SKILL.md +9 -0
- package/skills/openhands-api/.plugin/plugin.json +22 -0
- package/skills/openhands-api/README.md +48 -0
- package/skills/openhands-api/SKILL.md +399 -0
- package/skills/openhands-api/references/README.md +33 -0
- package/skills/openhands-api/references/TROUBLESHOOTING.md +81 -0
- package/skills/openhands-api/references/example_prompt.md +12 -0
- package/skills/openhands-api/scripts/openhands_api.py +606 -0
- package/skills/openhands-api/scripts/openhands_api.ts +252 -0
- package/skills/openhands-automation/.plugin/plugin.json +19 -0
- package/skills/openhands-automation/README.md +89 -0
- package/skills/openhands-automation/SKILL.md +875 -0
- package/skills/openhands-automation/commands/automation:create.md +8 -0
- package/skills/openhands-automation/references/ab-testing.md +185 -0
- package/skills/openhands-automation/references/custom-automation.md +644 -0
- package/skills/openhands-sdk/.plugin/plugin.json +20 -0
- package/skills/openhands-sdk/README.md +22 -0
- package/skills/openhands-sdk/SKILL.md +229 -0
- package/skills/openhands-sdk/commands/sdk.md +8 -0
- package/skills/pdflatex/.plugin/plugin.json +18 -0
- package/skills/pdflatex/README.md +39 -0
- package/skills/pdflatex/SKILL.md +34 -0
- package/skills/prd/.plugin/plugin.json +19 -0
- package/skills/prd/README.md +28 -0
- package/skills/prd/SKILL.md +237 -0
- package/skills/prd/commands/prd.md +8 -0
- package/skills/qa-changes/README.md +18 -0
- package/skills/qa-changes/SKILL.md +229 -0
- package/skills/qa-changes/commands/qa-changes.md +8 -0
- package/skills/release-notes/README.md +24 -0
- package/skills/release-notes/SKILL.md +19 -0
- package/skills/release-notes/commands/release-notes.md +8 -0
- package/skills/research-brief/.plugin/plugin.json +20 -0
- package/skills/research-brief/README.md +34 -0
- package/skills/research-brief/SKILL.md +99 -0
- package/skills/research-brief/commands/research-brief:setup.md +8 -0
- package/skills/security/.plugin/plugin.json +18 -0
- package/skills/security/README.md +38 -0
- package/skills/security/SKILL.md +33 -0
- package/skills/skill-creator/.plugin/plugin.json +17 -0
- package/skills/skill-creator/LICENSE.txt +202 -0
- package/skills/skill-creator/README.md +182 -0
- package/skills/skill-creator/SKILL.md +545 -0
- package/skills/skill-creator/references/output-patterns.md +82 -0
- package/skills/skill-creator/references/workflows.md +28 -0
- package/skills/skill-creator/scripts/init_skill.py +303 -0
- package/skills/skill-creator/scripts/quick_validate.py +95 -0
- package/skills/slack-channel-monitor/.plugin/plugin.json +21 -0
- package/skills/slack-channel-monitor/README.md +91 -0
- package/skills/slack-channel-monitor/SKILL.md +276 -0
- package/skills/slack-channel-monitor/commands/slack-monitor:poll.md +8 -0
- package/skills/slack-channel-monitor/references/slack-api.md +207 -0
- package/skills/slack-channel-monitor/references/state-schema.md +180 -0
- package/skills/slack-channel-monitor/scripts/main.py +962 -0
- package/skills/slack-standup-digest/.plugin/plugin.json +21 -0
- package/skills/slack-standup-digest/README.md +34 -0
- package/skills/slack-standup-digest/SKILL.md +92 -0
- package/skills/slack-standup-digest/commands/standup-digest:setup.md +8 -0
- package/skills/spark-version-upgrade/.plugin/plugin.json +20 -0
- package/skills/spark-version-upgrade/README.md +54 -0
- package/skills/spark-version-upgrade/SKILL.md +233 -0
- package/skills/ssh/.plugin/plugin.json +18 -0
- package/skills/ssh/README.md +140 -0
- package/skills/ssh/SKILL.md +135 -0
- package/skills/swift-linux/.plugin/plugin.json +17 -0
- package/skills/swift-linux/README.md +86 -0
- package/skills/swift-linux/SKILL.md +81 -0
- package/skills/theme-factory/.plugin/plugin.json +19 -0
- package/skills/theme-factory/LICENSE.txt +202 -0
- package/skills/theme-factory/README.md +58 -0
- package/skills/theme-factory/SKILL.md +59 -0
- package/skills/theme-factory/theme-showcase.pdf +0 -0
- package/skills/theme-factory/themes/arctic-frost.md +19 -0
- package/skills/theme-factory/themes/botanical-garden.md +19 -0
- package/skills/theme-factory/themes/desert-rose.md +19 -0
- package/skills/theme-factory/themes/forest-canopy.md +19 -0
- package/skills/theme-factory/themes/golden-hour.md +19 -0
- package/skills/theme-factory/themes/midnight-galaxy.md +19 -0
- package/skills/theme-factory/themes/modern-minimalist.md +19 -0
- package/skills/theme-factory/themes/ocean-depths.md +19 -0
- package/skills/theme-factory/themes/sunset-boulevard.md +19 -0
- package/skills/theme-factory/themes/tech-innovation.md +19 -0
- package/skills/uv/.plugin/plugin.json +18 -0
- package/skills/uv/README.md +5 -0
- package/skills/uv/SKILL.md +95 -0
- package/skills/uv/references/README.md +5 -0
- package/skills/vercel/.plugin/plugin.json +18 -0
- package/skills/vercel/README.md +108 -0
- package/skills/vercel/SKILL.md +103 -0
- package/tests/test_add_skill_installs_to_agents_dir.py +42 -0
- package/tests/test_catalogs.py +109 -0
- package/tests/test_code_review_risk_evaluation.py +94 -0
- package/tests/test_issue_duplicate_checker.py +240 -0
- package/tests/test_openhands_api_python.py +152 -0
- package/tests/test_plugin_manifest.py +83 -0
- package/tests/test_pr_review_diff_payload.py +202 -0
- package/tests/test_pr_review_feedback.py +263 -0
- package/tests/test_pr_review_prompt.py +152 -0
- package/tests/test_pr_review_review_context.py +253 -0
- package/tests/test_qa_changes.py +232 -0
- package/tests/test_qa_changes_evaluation.py +259 -0
- package/tests/test_release_notes_generator.py +990 -0
- package/tests/test_sdk_loading.py +150 -0
- package/tests/test_skill_plugin_loading.py +149 -0
- package/tests/test_skills_have_readme.py +66 -0
- package/tests/test_sync_extensions.py +292 -0
- package/tests/test_workflow_sync.py +46 -0
- package/utils/analysis/README.md +7 -0
- package/utils/analysis/laminar_signals/README.md +211 -0
- package/utils/analysis/laminar_signals/analyze.py +780 -0
- package/utils/analysis/laminar_signals/templates/default.j2 +49 -0
- package/utils/analysis/laminar_signals/templates/pr_review.j2 +61 -0
|
@@ -0,0 +1,655 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
PR Review Evaluation Script
|
|
4
|
+
|
|
5
|
+
This script runs when a PR is merged or closed to evaluate how well
|
|
6
|
+
the review comments were addressed. It creates an evaluation trace
|
|
7
|
+
in Laminar that can be processed by a signal to determine review
|
|
8
|
+
effectiveness.
|
|
9
|
+
|
|
10
|
+
The evaluation flow:
|
|
11
|
+
1. Read the original trace ID from the artifact
|
|
12
|
+
2. Fetch PR review comments and thread discussion from GitHub
|
|
13
|
+
3. Fetch the final patch/diff
|
|
14
|
+
4. Create an evaluation span with all context
|
|
15
|
+
5. Optionally score the original trace
|
|
16
|
+
|
|
17
|
+
Environment Variables:
|
|
18
|
+
LMNR_PROJECT_API_KEY: Laminar project API key (required)
|
|
19
|
+
GITHUB_TOKEN: GitHub token for API access (required)
|
|
20
|
+
PR_NUMBER: Pull request number (required)
|
|
21
|
+
REPO_NAME: Repository name in format owner/repo (required)
|
|
22
|
+
PR_MERGED: Whether the PR was merged ('true' or 'false')
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
|
|
27
|
+
# Configure logging
|
|
28
|
+
import logging
|
|
29
|
+
import os
|
|
30
|
+
import sys
|
|
31
|
+
import urllib.error
|
|
32
|
+
import urllib.request
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
from lmnr import Laminar, LaminarClient
|
|
36
|
+
|
|
37
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
FEEDBACK_COMMENT_MARKER = "<!-- openhands-pr-review-feedback -->"
|
|
41
|
+
|
|
42
|
+
REVIEWS_QUERY = """
|
|
43
|
+
query($owner: String!, $repo: String!, $pr_number: Int!, $cursor: String) {
|
|
44
|
+
repository(owner: $owner, name: $repo) {
|
|
45
|
+
pullRequest(number: $pr_number) {
|
|
46
|
+
reviews(first: 100, after: $cursor) {
|
|
47
|
+
pageInfo {
|
|
48
|
+
hasNextPage
|
|
49
|
+
endCursor
|
|
50
|
+
}
|
|
51
|
+
nodes {
|
|
52
|
+
id
|
|
53
|
+
body
|
|
54
|
+
state
|
|
55
|
+
submittedAt
|
|
56
|
+
author { login }
|
|
57
|
+
reactionGroups {
|
|
58
|
+
content
|
|
59
|
+
users {
|
|
60
|
+
totalCount
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _get_required_env(name: str) -> str:
|
|
73
|
+
"""Get a required environment variable or raise an error."""
|
|
74
|
+
value = os.getenv(name)
|
|
75
|
+
if not value:
|
|
76
|
+
raise ValueError(f"{name} environment variable is required")
|
|
77
|
+
return value
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _get_github_headers() -> dict[str, str]:
|
|
81
|
+
"""Get headers for GitHub API requests."""
|
|
82
|
+
token = _get_required_env("GITHUB_TOKEN")
|
|
83
|
+
return {
|
|
84
|
+
"Accept": "application/vnd.github.v3+json",
|
|
85
|
+
"Authorization": f"Bearer {token}",
|
|
86
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _get_agent_usernames() -> set[str]:
|
|
91
|
+
"""Get the set of agent usernames to identify agent comments.
|
|
92
|
+
|
|
93
|
+
Configurable via AGENT_USERNAMES environment variable (comma-separated).
|
|
94
|
+
Defaults to 'openhands-agent,all-hands-bot,github-actions[bot]'.
|
|
95
|
+
"""
|
|
96
|
+
usernames = os.getenv(
|
|
97
|
+
"AGENT_USERNAMES",
|
|
98
|
+
"openhands-agent,all-hands-bot,github-actions[bot]",
|
|
99
|
+
)
|
|
100
|
+
return set(name.strip() for name in usernames.split(",") if name.strip())
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _handle_github_api_error(e: urllib.error.HTTPError, context: str) -> None:
|
|
104
|
+
"""Handle GitHub API errors with rate limit awareness."""
|
|
105
|
+
if e.code == 429:
|
|
106
|
+
retry_after = e.headers.get("Retry-After", "60")
|
|
107
|
+
logger.warning(f"Rate limited by GitHub API. Retry after {retry_after}s")
|
|
108
|
+
logger.error(f"Failed to {context}: HTTP {e.code}")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def fetch_pr_review_comments(repo: str, pr_number: str) -> list[dict]:
|
|
112
|
+
"""Fetch all review comments on a PR.
|
|
113
|
+
|
|
114
|
+
This includes inline code review comments, not regular PR comments.
|
|
115
|
+
"""
|
|
116
|
+
url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments"
|
|
117
|
+
request = urllib.request.Request(url, headers=_get_github_headers())
|
|
118
|
+
try:
|
|
119
|
+
with urllib.request.urlopen(request, timeout=60) as response:
|
|
120
|
+
return json.loads(response.read().decode("utf-8"))
|
|
121
|
+
except urllib.error.HTTPError as e:
|
|
122
|
+
_handle_github_api_error(e, "fetch review comments")
|
|
123
|
+
return []
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def fetch_pr_issue_comments(repo: str, pr_number: str) -> list[dict]:
|
|
127
|
+
"""Fetch issue-style comments on a PR (the main thread)."""
|
|
128
|
+
url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
|
|
129
|
+
request = urllib.request.Request(url, headers=_get_github_headers())
|
|
130
|
+
try:
|
|
131
|
+
with urllib.request.urlopen(request, timeout=60) as response:
|
|
132
|
+
return json.loads(response.read().decode("utf-8"))
|
|
133
|
+
except urllib.error.HTTPError as e:
|
|
134
|
+
_handle_github_api_error(e, "fetch issue comments")
|
|
135
|
+
return []
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _call_github_graphql(query: str, variables: dict) -> dict:
|
|
139
|
+
"""Execute a GitHub GraphQL query and return the `data` payload."""
|
|
140
|
+
request = urllib.request.Request(
|
|
141
|
+
"https://api.github.com/graphql",
|
|
142
|
+
headers=_get_github_headers(),
|
|
143
|
+
method="POST",
|
|
144
|
+
data=json.dumps({"query": query, "variables": variables}).encode("utf-8"),
|
|
145
|
+
)
|
|
146
|
+
request.add_header("Content-Type", "application/json")
|
|
147
|
+
try:
|
|
148
|
+
with urllib.request.urlopen(request, timeout=60) as response:
|
|
149
|
+
payload = json.loads(response.read().decode("utf-8"))
|
|
150
|
+
except urllib.error.HTTPError as e:
|
|
151
|
+
_handle_github_api_error(e, "fetch GraphQL data")
|
|
152
|
+
return {}
|
|
153
|
+
|
|
154
|
+
if payload.get("errors"):
|
|
155
|
+
logger.error("GitHub GraphQL returned errors: %s", payload["errors"])
|
|
156
|
+
return {}
|
|
157
|
+
|
|
158
|
+
return payload.get("data") or {}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _normalize_review_reactions(reaction_groups: list[dict] | None) -> dict[str, int]:
|
|
162
|
+
"""Map GraphQL reaction groups to GitHub-style thumbs-up/down counters."""
|
|
163
|
+
thumbs_up = 0
|
|
164
|
+
thumbs_down = 0
|
|
165
|
+
|
|
166
|
+
for group in reaction_groups or []:
|
|
167
|
+
total_count = ((group.get("users") or {}).get("totalCount")) or 0
|
|
168
|
+
content = group.get("content")
|
|
169
|
+
if content == "THUMBS_UP":
|
|
170
|
+
thumbs_up = total_count
|
|
171
|
+
elif content == "THUMBS_DOWN":
|
|
172
|
+
thumbs_down = total_count
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
"+1": thumbs_up,
|
|
176
|
+
"-1": thumbs_down,
|
|
177
|
+
"total_count": thumbs_up + thumbs_down,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def fetch_pr_reviews(repo: str, pr_number: str) -> list[dict]:
|
|
182
|
+
"""Fetch all reviews on a PR, including thumbs-up/down reaction counts."""
|
|
183
|
+
owner, repo_name = repo.split("/", 1)
|
|
184
|
+
reviews = []
|
|
185
|
+
cursor = None
|
|
186
|
+
|
|
187
|
+
while True:
|
|
188
|
+
data = _call_github_graphql(
|
|
189
|
+
REVIEWS_QUERY,
|
|
190
|
+
{
|
|
191
|
+
"owner": owner,
|
|
192
|
+
"repo": repo_name,
|
|
193
|
+
"pr_number": int(pr_number),
|
|
194
|
+
"cursor": cursor,
|
|
195
|
+
},
|
|
196
|
+
)
|
|
197
|
+
reviews_data = (
|
|
198
|
+
data.get("repository", {})
|
|
199
|
+
.get("pullRequest", {})
|
|
200
|
+
.get("reviews", {})
|
|
201
|
+
)
|
|
202
|
+
nodes = reviews_data.get("nodes") or []
|
|
203
|
+
|
|
204
|
+
for review in nodes:
|
|
205
|
+
author = review.get("author") or {}
|
|
206
|
+
reviews.append(
|
|
207
|
+
{
|
|
208
|
+
"id": review.get("id"),
|
|
209
|
+
"user": {"login": author.get("login")},
|
|
210
|
+
"body": review.get("body") or "",
|
|
211
|
+
"state": review.get("state"),
|
|
212
|
+
"submitted_at": review.get("submittedAt"),
|
|
213
|
+
"reactions": _normalize_review_reactions(
|
|
214
|
+
review.get("reactionGroups")
|
|
215
|
+
),
|
|
216
|
+
}
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
page_info = reviews_data.get("pageInfo") or {}
|
|
220
|
+
if not page_info.get("hasNextPage"):
|
|
221
|
+
break
|
|
222
|
+
cursor = page_info.get("endCursor")
|
|
223
|
+
if not cursor:
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
return reviews
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def fetch_pr_diff(repo: str, pr_number: str) -> str:
|
|
230
|
+
"""Fetch the final diff of the PR."""
|
|
231
|
+
url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
|
|
232
|
+
headers = _get_github_headers()
|
|
233
|
+
headers["Accept"] = "application/vnd.github.v3.diff"
|
|
234
|
+
request = urllib.request.Request(url, headers=headers)
|
|
235
|
+
try:
|
|
236
|
+
with urllib.request.urlopen(request, timeout=60) as response:
|
|
237
|
+
return response.read().decode("utf-8", errors="replace")
|
|
238
|
+
except urllib.error.HTTPError as e:
|
|
239
|
+
_handle_github_api_error(e, "fetch PR diff")
|
|
240
|
+
return ""
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def fetch_pr_info(repo: str, pr_number: str) -> dict:
|
|
244
|
+
"""Fetch PR metadata."""
|
|
245
|
+
url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
|
|
246
|
+
request = urllib.request.Request(url, headers=_get_github_headers())
|
|
247
|
+
try:
|
|
248
|
+
with urllib.request.urlopen(request, timeout=60) as response:
|
|
249
|
+
return json.loads(response.read().decode("utf-8"))
|
|
250
|
+
except urllib.error.HTTPError as e:
|
|
251
|
+
_handle_github_api_error(e, "fetch PR info")
|
|
252
|
+
return {}
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def extract_agent_comments(
|
|
256
|
+
review_comments: list[dict], issue_comments: list[dict], reviews: list[dict]
|
|
257
|
+
) -> list[dict]:
|
|
258
|
+
"""Extract comments made by the review agent.
|
|
259
|
+
|
|
260
|
+
Agent usernames are configurable via AGENT_USERNAMES environment variable.
|
|
261
|
+
"""
|
|
262
|
+
agent_users = _get_agent_usernames()
|
|
263
|
+
agent_comments = []
|
|
264
|
+
|
|
265
|
+
# Review comments (inline code comments)
|
|
266
|
+
for comment in review_comments:
|
|
267
|
+
if comment.get("user", {}).get("login") in agent_users:
|
|
268
|
+
agent_comments.append(
|
|
269
|
+
{
|
|
270
|
+
"type": "review_comment",
|
|
271
|
+
"id": comment.get("id"),
|
|
272
|
+
"body": comment.get("body", ""),
|
|
273
|
+
"path": comment.get("path"),
|
|
274
|
+
"line": comment.get("line") or comment.get("original_line"),
|
|
275
|
+
"created_at": comment.get("created_at"),
|
|
276
|
+
}
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Issue comments (main thread)
|
|
280
|
+
for comment in issue_comments:
|
|
281
|
+
if comment.get("user", {}).get("login") in agent_users:
|
|
282
|
+
agent_comments.append(
|
|
283
|
+
{
|
|
284
|
+
"type": "issue_comment",
|
|
285
|
+
"id": comment.get("id"),
|
|
286
|
+
"body": comment.get("body", ""),
|
|
287
|
+
"created_at": comment.get("created_at"),
|
|
288
|
+
}
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# Review bodies
|
|
292
|
+
for review in reviews:
|
|
293
|
+
if review.get("user", {}).get("login") in agent_users and review.get("body"):
|
|
294
|
+
agent_comments.append(
|
|
295
|
+
{
|
|
296
|
+
"type": "review",
|
|
297
|
+
"id": review.get("id"),
|
|
298
|
+
"body": review.get("body", ""),
|
|
299
|
+
"state": review.get("state"),
|
|
300
|
+
"created_at": review.get("submitted_at"),
|
|
301
|
+
}
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
return agent_comments
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def extract_human_responses(
|
|
308
|
+
review_comments: list[dict],
|
|
309
|
+
issue_comments: list[dict],
|
|
310
|
+
agent_users: set[str] | None = None,
|
|
311
|
+
) -> list[dict]:
|
|
312
|
+
"""Extract comments/responses from humans (non-agent users).
|
|
313
|
+
|
|
314
|
+
Agent usernames are configurable via AGENT_USERNAMES environment variable.
|
|
315
|
+
"""
|
|
316
|
+
if agent_users is None:
|
|
317
|
+
agent_users = _get_agent_usernames()
|
|
318
|
+
|
|
319
|
+
human_responses = []
|
|
320
|
+
|
|
321
|
+
for comment in review_comments:
|
|
322
|
+
if comment.get("user", {}).get("login") not in agent_users:
|
|
323
|
+
human_responses.append(
|
|
324
|
+
{
|
|
325
|
+
"type": "review_comment",
|
|
326
|
+
"user": comment.get("user", {}).get("login"),
|
|
327
|
+
"body": comment.get("body", ""),
|
|
328
|
+
"in_reply_to_id": comment.get("in_reply_to_id"),
|
|
329
|
+
"created_at": comment.get("created_at"),
|
|
330
|
+
}
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
for comment in issue_comments:
|
|
334
|
+
if comment.get("user", {}).get("login") not in agent_users:
|
|
335
|
+
human_responses.append(
|
|
336
|
+
{
|
|
337
|
+
"type": "issue_comment",
|
|
338
|
+
"user": comment.get("user", {}).get("login"),
|
|
339
|
+
"body": comment.get("body", ""),
|
|
340
|
+
"created_at": comment.get("created_at"),
|
|
341
|
+
}
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
return human_responses
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def extract_review_feedback(
|
|
348
|
+
issue_comments: list[dict], reviews: list[dict] | None = None
|
|
349
|
+
) -> list[dict]:
|
|
350
|
+
"""Extract thumbs-up/down feedback from review bodies or legacy comments."""
|
|
351
|
+
agent_users = _get_agent_usernames()
|
|
352
|
+
feedback = []
|
|
353
|
+
|
|
354
|
+
for comment in [*issue_comments, *(reviews or [])]:
|
|
355
|
+
if FEEDBACK_COMMENT_MARKER not in (comment.get("body") or ""):
|
|
356
|
+
continue
|
|
357
|
+
if comment.get("user", {}).get("login") not in agent_users:
|
|
358
|
+
continue
|
|
359
|
+
|
|
360
|
+
reactions = comment.get("reactions") or {}
|
|
361
|
+
thumbs_up = reactions.get("+1", 0) or 0
|
|
362
|
+
thumbs_down = reactions.get("-1", 0) or 0
|
|
363
|
+
feedback.append(
|
|
364
|
+
{
|
|
365
|
+
"comment_id": comment.get("id"),
|
|
366
|
+
"created_at": comment.get("created_at")
|
|
367
|
+
or comment.get("submitted_at"),
|
|
368
|
+
"thumbs_up": thumbs_up,
|
|
369
|
+
"thumbs_down": thumbs_down,
|
|
370
|
+
"total": thumbs_up + thumbs_down,
|
|
371
|
+
}
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
return feedback
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def truncate_text(text: str, max_chars: int = 50000) -> str:
|
|
378
|
+
"""Truncate text to stay within reasonable API payload limits.
|
|
379
|
+
|
|
380
|
+
Max 50k chars chosen to stay well under typical API payload limits
|
|
381
|
+
while preserving enough context for evaluation. This keeps the
|
|
382
|
+
evaluation trace size manageable for Laminar processing.
|
|
383
|
+
"""
|
|
384
|
+
if len(text) <= max_chars:
|
|
385
|
+
return text
|
|
386
|
+
return text[:max_chars] + f"\n\n... [truncated, {len(text)} total chars]"
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def load_trace_info(trace_file_path: str | None = None) -> dict:
|
|
390
|
+
"""Load trace info from artifact file.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
trace_file_path: Path to trace info JSON file. If None, uses default path.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
Dictionary with trace_id, span_context, and other metadata.
|
|
397
|
+
Empty dict if file not found.
|
|
398
|
+
"""
|
|
399
|
+
trace_info_path = Path(trace_file_path) if trace_file_path else Path("laminar_trace_info.json")
|
|
400
|
+
|
|
401
|
+
if not trace_info_path.exists():
|
|
402
|
+
logger.warning(
|
|
403
|
+
"No trace info file found - evaluation will create standalone trace"
|
|
404
|
+
)
|
|
405
|
+
return {}
|
|
406
|
+
|
|
407
|
+
with open(trace_info_path) as f:
|
|
408
|
+
data = json.load(f)
|
|
409
|
+
|
|
410
|
+
logger.info(f"Original trace ID: {data.get('trace_id')}")
|
|
411
|
+
if data.get("span_context"):
|
|
412
|
+
logger.info("Found span context - will add evaluation to original trace")
|
|
413
|
+
else:
|
|
414
|
+
logger.info("No span context - evaluation will create standalone trace")
|
|
415
|
+
|
|
416
|
+
return data
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def fetch_pr_data(repo: str, pr_number: str) -> dict:
|
|
420
|
+
"""Fetch all PR data from GitHub.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
repo: Repository in format owner/repo
|
|
424
|
+
pr_number: PR number
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
Dictionary with review_comments, issue_comments, reviews,
|
|
428
|
+
final_diff, pr_info, agent_comments, and human_responses
|
|
429
|
+
"""
|
|
430
|
+
logger.info("Fetching PR data from GitHub...")
|
|
431
|
+
|
|
432
|
+
review_comments = fetch_pr_review_comments(repo, pr_number)
|
|
433
|
+
issue_comments = fetch_pr_issue_comments(repo, pr_number)
|
|
434
|
+
reviews = fetch_pr_reviews(repo, pr_number)
|
|
435
|
+
final_diff = fetch_pr_diff(repo, pr_number)
|
|
436
|
+
pr_info = fetch_pr_info(repo, pr_number)
|
|
437
|
+
|
|
438
|
+
logger.info(f"Found {len(review_comments)} review comments")
|
|
439
|
+
logger.info(f"Found {len(issue_comments)} issue comments")
|
|
440
|
+
logger.info(f"Found {len(reviews)} reviews")
|
|
441
|
+
|
|
442
|
+
agent_comments = extract_agent_comments(review_comments, issue_comments, reviews)
|
|
443
|
+
human_responses = extract_human_responses(review_comments, issue_comments)
|
|
444
|
+
review_feedback = extract_review_feedback(issue_comments, reviews)
|
|
445
|
+
|
|
446
|
+
logger.info(f"Agent made {len(agent_comments)} comments")
|
|
447
|
+
logger.info(f"Humans made {len(human_responses)} responses")
|
|
448
|
+
logger.info(f"Found {len(review_feedback)} review feedback prompts")
|
|
449
|
+
|
|
450
|
+
return {
|
|
451
|
+
"review_comments": review_comments,
|
|
452
|
+
"issue_comments": issue_comments,
|
|
453
|
+
"reviews": reviews,
|
|
454
|
+
"final_diff": final_diff,
|
|
455
|
+
"pr_info": pr_info,
|
|
456
|
+
"agent_comments": agent_comments,
|
|
457
|
+
"human_responses": human_responses,
|
|
458
|
+
"review_feedback": review_feedback,
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def calculate_engagement_score(
|
|
463
|
+
agent_comments: list[dict],
|
|
464
|
+
human_responses: list[dict],
|
|
465
|
+
pr_merged: bool,
|
|
466
|
+
) -> float:
|
|
467
|
+
"""Calculate engagement score based on interaction metrics.
|
|
468
|
+
|
|
469
|
+
Components:
|
|
470
|
+
- Response ratio: humans responded to agent comments (0-0.5)
|
|
471
|
+
- Completion bonus: PR was merged (0.3)
|
|
472
|
+
Max score: 0.8
|
|
473
|
+
|
|
474
|
+
Args:
|
|
475
|
+
agent_comments: List of agent comments
|
|
476
|
+
human_responses: List of human responses
|
|
477
|
+
pr_merged: Whether the PR was merged
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
Engagement score between 0.0 and 0.8
|
|
481
|
+
"""
|
|
482
|
+
score = 0.0
|
|
483
|
+
if agent_comments:
|
|
484
|
+
engagement_ratio = min(len(human_responses) / len(agent_comments), 1.0)
|
|
485
|
+
score = engagement_ratio * 0.5
|
|
486
|
+
if pr_merged:
|
|
487
|
+
score += 0.3
|
|
488
|
+
return score
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def create_evaluation_span(
|
|
492
|
+
pr_number: str,
|
|
493
|
+
repo_name: str,
|
|
494
|
+
pr_merged: bool,
|
|
495
|
+
pr_data: dict,
|
|
496
|
+
trace_info: dict,
|
|
497
|
+
) -> str | None:
|
|
498
|
+
"""Create Laminar evaluation span and return trace ID.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
pr_number: PR number
|
|
502
|
+
repo_name: Repository name
|
|
503
|
+
pr_merged: Whether PR was merged
|
|
504
|
+
pr_data: Dictionary from fetch_pr_data()
|
|
505
|
+
trace_info: Dictionary from load_trace_info()
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
Evaluation trace ID, or None if not available
|
|
509
|
+
"""
|
|
510
|
+
Laminar.initialize()
|
|
511
|
+
|
|
512
|
+
evaluation_context = {
|
|
513
|
+
"pr_number": pr_number,
|
|
514
|
+
"repo_name": repo_name,
|
|
515
|
+
"pr_merged": pr_merged,
|
|
516
|
+
"pr_title": pr_data["pr_info"].get("title", ""),
|
|
517
|
+
"pr_state": pr_data["pr_info"].get("state", ""),
|
|
518
|
+
"original_trace_id": trace_info.get("trace_id"),
|
|
519
|
+
"agent_comments": pr_data["agent_comments"],
|
|
520
|
+
"human_responses": pr_data["human_responses"],
|
|
521
|
+
"review_feedback": pr_data["review_feedback"],
|
|
522
|
+
"final_diff": truncate_text(pr_data["final_diff"]),
|
|
523
|
+
"total_review_comments": len(pr_data["review_comments"]),
|
|
524
|
+
"total_issue_comments": len(pr_data["issue_comments"]),
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
with Laminar.start_as_current_span(
|
|
528
|
+
name="pr_review_evaluation",
|
|
529
|
+
input=evaluation_context,
|
|
530
|
+
tags=["pr-review-evaluation"],
|
|
531
|
+
parent_span_context=trace_info.get("span_context"),
|
|
532
|
+
):
|
|
533
|
+
Laminar.set_trace_metadata(
|
|
534
|
+
{
|
|
535
|
+
"original_trace_id": trace_info.get("trace_id") or "none",
|
|
536
|
+
"evaluation_type": "pr_review_effectiveness",
|
|
537
|
+
"pr_number": pr_number,
|
|
538
|
+
"repo_name": repo_name,
|
|
539
|
+
"pr_merged": str(pr_merged),
|
|
540
|
+
}
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
summary = {
|
|
544
|
+
"pr": f"{repo_name}#{pr_number}",
|
|
545
|
+
"merged": pr_merged,
|
|
546
|
+
"agent_comments_count": len(pr_data["agent_comments"]),
|
|
547
|
+
"human_responses_count": len(pr_data["human_responses"]),
|
|
548
|
+
"review_feedback": pr_data["review_feedback"],
|
|
549
|
+
"diff_length": len(pr_data["final_diff"]),
|
|
550
|
+
}
|
|
551
|
+
logger.info(f"Evaluation summary: {json.dumps(summary)}")
|
|
552
|
+
|
|
553
|
+
Laminar.set_span_output(
|
|
554
|
+
{
|
|
555
|
+
"summary": summary,
|
|
556
|
+
"ready_for_signal": True,
|
|
557
|
+
}
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
eval_trace_id = Laminar.get_trace_id()
|
|
561
|
+
|
|
562
|
+
Laminar.flush()
|
|
563
|
+
return str(eval_trace_id) if eval_trace_id else None
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def main(trace_file_path: str | None = None):
|
|
567
|
+
"""Run the PR review evaluation.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
trace_file_path: Optional path to trace info JSON file.
|
|
571
|
+
"""
|
|
572
|
+
logger.info("Starting PR review evaluation...")
|
|
573
|
+
|
|
574
|
+
pr_number = _get_required_env("PR_NUMBER")
|
|
575
|
+
repo_name = _get_required_env("REPO_NAME")
|
|
576
|
+
pr_merged = os.getenv("PR_MERGED", "false").lower() == "true"
|
|
577
|
+
|
|
578
|
+
logger.info(f"Evaluating PR #{pr_number} in {repo_name}")
|
|
579
|
+
logger.info(f"PR was merged: {pr_merged}")
|
|
580
|
+
|
|
581
|
+
trace_info = load_trace_info(trace_file_path)
|
|
582
|
+
pr_data = fetch_pr_data(repo_name, pr_number)
|
|
583
|
+
eval_trace_id = create_evaluation_span(
|
|
584
|
+
pr_number, repo_name, pr_merged, pr_data, trace_info
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
original_trace_id = trace_info.get("trace_id")
|
|
588
|
+
agent_comments = pr_data["agent_comments"]
|
|
589
|
+
human_responses = pr_data["human_responses"]
|
|
590
|
+
review_feedback = pr_data["review_feedback"]
|
|
591
|
+
|
|
592
|
+
# Score engagement on the original trace for immediate feedback
|
|
593
|
+
if original_trace_id:
|
|
594
|
+
try:
|
|
595
|
+
client = LaminarClient()
|
|
596
|
+
engagement_score = calculate_engagement_score(
|
|
597
|
+
agent_comments, human_responses, pr_merged
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
client.evaluators.score(
|
|
601
|
+
name="review_engagement",
|
|
602
|
+
trace_id=original_trace_id,
|
|
603
|
+
score=engagement_score,
|
|
604
|
+
metadata={
|
|
605
|
+
"agent_comments": len(agent_comments),
|
|
606
|
+
"human_responses": len(human_responses),
|
|
607
|
+
"pr_merged": pr_merged,
|
|
608
|
+
"review_feedback": review_feedback,
|
|
609
|
+
"score_type": "engagement",
|
|
610
|
+
},
|
|
611
|
+
)
|
|
612
|
+
logger.info(
|
|
613
|
+
f"Added engagement score {engagement_score:.2f} "
|
|
614
|
+
f"to original trace {original_trace_id}"
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
client.tags.tag(original_trace_id, ["evaluated", f"pr-{pr_number}"])
|
|
618
|
+
logger.info(f"Tagged original trace {original_trace_id}")
|
|
619
|
+
|
|
620
|
+
except Exception as e:
|
|
621
|
+
logger.warning(f"Failed to score original trace: {e}")
|
|
622
|
+
|
|
623
|
+
# Print evaluation summary
|
|
624
|
+
print("\n=== PR Review Evaluation ===")
|
|
625
|
+
print(f"PR: {repo_name}#{pr_number}")
|
|
626
|
+
print(f"Merged: {pr_merged}")
|
|
627
|
+
print(f"Agent Comments: {len(agent_comments)}")
|
|
628
|
+
print(f"Human Responses: {len(human_responses)}")
|
|
629
|
+
if review_feedback:
|
|
630
|
+
thumbs_up = sum(item["thumbs_up"] for item in review_feedback)
|
|
631
|
+
thumbs_down = sum(item["thumbs_down"] for item in review_feedback)
|
|
632
|
+
print(f"Review Feedback: 👍 {thumbs_up} / 👎 {thumbs_down}")
|
|
633
|
+
if original_trace_id:
|
|
634
|
+
print(f"Original Review Trace: {original_trace_id}")
|
|
635
|
+
if eval_trace_id:
|
|
636
|
+
print(f"Evaluation Trace: {eval_trace_id}")
|
|
637
|
+
|
|
638
|
+
logger.info("PR review evaluation completed successfully")
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
if __name__ == "__main__":
|
|
642
|
+
import argparse
|
|
643
|
+
|
|
644
|
+
parser = argparse.ArgumentParser(description="Evaluate PR review effectiveness")
|
|
645
|
+
parser.add_argument(
|
|
646
|
+
"--trace-file",
|
|
647
|
+
help="Path to trace info JSON file (default: laminar_trace_info.json)",
|
|
648
|
+
)
|
|
649
|
+
args = parser.parse_args()
|
|
650
|
+
|
|
651
|
+
try:
|
|
652
|
+
main(trace_file_path=args.trace_file)
|
|
653
|
+
except Exception as e:
|
|
654
|
+
logger.error(f"Evaluation failed: {e}")
|
|
655
|
+
sys.exit(1)
|