@biggora/claude-plugins 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -4
- package/package.json +1 -1
- package/registry/registry.json +319 -244
- package/specs/coding.md +24 -0
- package/specs/pod.md +2 -0
- package/src/skills/captcha/README.md +221 -0
- package/src/skills/captcha/SKILL.md +355 -0
- package/src/skills/captcha/references/captcha-types.md +254 -0
- package/src/skills/captcha/references/services.md +172 -0
- package/src/skills/captcha/references/stealth.md +238 -0
- package/src/skills/captcha/scripts/solve_captcha.py +323 -0
- package/src/skills/captcha/scripts/solve_image_grid.py +350 -0
- package/src/skills/google-merchant-api/SKILL.md +581 -0
- package/src/skills/google-merchant-api/references/accounts.md +247 -0
- package/src/skills/google-merchant-api/references/content-api-legacy.md +216 -0
- package/src/skills/google-merchant-api/references/datasources.md +233 -0
- package/src/skills/google-merchant-api/references/inventories.md +201 -0
- package/src/skills/google-merchant-api/references/migration.md +267 -0
- package/src/skills/google-merchant-api/references/products.md +316 -0
- package/src/skills/google-merchant-api/references/promotions.md +201 -0
- package/src/skills/google-merchant-api/references/reports.md +240 -0
- package/src/skills/lv-aggregators-api/SKILL.md +113 -0
- package/src/skills/lv-aggregators-api/references/integration-guide.md +368 -0
- package/src/skills/lv-aggregators-api/references/kurpirkt.md +103 -0
- package/src/skills/lv-aggregators-api/references/salidzini.md +122 -0
- package/src/skills/tailwindcss-best-practices/SKILL.md +180 -0
- package/src/skills/tailwindcss-best-practices/references/best-practices-utility-patterns.md +87 -0
- package/src/skills/tailwindcss-best-practices/references/core-installation.md +109 -0
- package/src/skills/tailwindcss-best-practices/references/core-preflight.md +200 -0
- package/src/skills/tailwindcss-best-practices/references/core-responsive.md +163 -0
- package/src/skills/tailwindcss-best-practices/references/core-source-detection.md +114 -0
- package/src/skills/tailwindcss-best-practices/references/core-theme.md +108 -0
- package/src/skills/tailwindcss-best-practices/references/core-utility-classes.md +59 -0
- package/src/skills/tailwindcss-best-practices/references/core-variants.md +204 -0
- package/src/skills/tailwindcss-best-practices/references/effects-form-controls.md +76 -0
- package/src/skills/tailwindcss-best-practices/references/effects-mask.md +91 -0
- package/src/skills/tailwindcss-best-practices/references/effects-scroll-snap.md +59 -0
- package/src/skills/tailwindcss-best-practices/references/effects-text-shadow.md +78 -0
- package/src/skills/tailwindcss-best-practices/references/effects-transition-animation.md +80 -0
- package/src/skills/tailwindcss-best-practices/references/effects-visibility-interactivity.md +82 -0
- package/src/skills/tailwindcss-best-practices/references/features-content-detection.md +175 -0
- package/src/skills/tailwindcss-best-practices/references/features-custom-styles.md +203 -0
- package/src/skills/tailwindcss-best-practices/references/features-dark-mode.md +137 -0
- package/src/skills/tailwindcss-best-practices/references/features-functions-directives.md +241 -0
- package/src/skills/tailwindcss-best-practices/references/features-upgrade.md +160 -0
- package/src/skills/tailwindcss-best-practices/references/layout-aspect-ratio.md +39 -0
- package/src/skills/tailwindcss-best-practices/references/layout-columns.md +80 -0
- package/src/skills/tailwindcss-best-practices/references/layout-display.md +110 -0
- package/src/skills/tailwindcss-best-practices/references/layout-flexbox.md +112 -0
- package/src/skills/tailwindcss-best-practices/references/layout-grid.md +87 -0
- package/src/skills/tailwindcss-best-practices/references/layout-height.md +97 -0
- package/src/skills/tailwindcss-best-practices/references/layout-inset.md +103 -0
- package/src/skills/tailwindcss-best-practices/references/layout-logical-properties.md +92 -0
- package/src/skills/tailwindcss-best-practices/references/layout-margin.md +126 -0
- package/src/skills/tailwindcss-best-practices/references/layout-min-max-sizing.md +63 -0
- package/src/skills/tailwindcss-best-practices/references/layout-object-fit-position.md +64 -0
- package/src/skills/tailwindcss-best-practices/references/layout-overflow.md +57 -0
- package/src/skills/tailwindcss-best-practices/references/layout-padding.md +77 -0
- package/src/skills/tailwindcss-best-practices/references/layout-position.md +85 -0
- package/src/skills/tailwindcss-best-practices/references/layout-tables.md +67 -0
- package/src/skills/tailwindcss-best-practices/references/layout-width.md +102 -0
- package/src/skills/tailwindcss-best-practices/references/transform-base.md +68 -0
- package/src/skills/tailwindcss-best-practices/references/transform-rotate.md +70 -0
- package/src/skills/tailwindcss-best-practices/references/transform-scale.md +83 -0
- package/src/skills/tailwindcss-best-practices/references/transform-skew.md +62 -0
- package/src/skills/tailwindcss-best-practices/references/transform-translate.md +77 -0
- package/src/skills/tailwindcss-best-practices/references/typography-font-text.md +142 -0
- package/src/skills/tailwindcss-best-practices/references/typography-list-style.md +65 -0
- package/src/skills/tailwindcss-best-practices/references/typography-text-align.md +60 -0
- package/src/skills/tailwindcss-best-practices/references/visual-background.md +76 -0
- package/src/skills/tailwindcss-best-practices/references/visual-border.md +108 -0
- package/src/skills/tailwindcss-best-practices/references/visual-effects.md +111 -0
- package/src/skills/tailwindcss-best-practices/references/visual-svg.md +82 -0
- package/src/skills/test-mobile-app/SKILL.md +11 -6
- package/src/skills/test-mobile-app/scripts/analyze_apk.py +15 -4
- package/src/skills/test-mobile-app/scripts/check_environment.py +5 -5
- package/src/skills/test-mobile-app/scripts/run_tests.py +1 -1
- package/src/skills/test-web-ui/SKILL.md +264 -84
- package/src/skills/test-web-ui/scripts/discover.py +25 -12
- package/src/skills/test-web-ui/scripts/run_tests.py +3 -2
- package/src/skills/vite-best-practices/SKILL.md +115 -0
- package/src/skills/vite-best-practices/references/build-and-ssr.md +255 -0
- package/src/skills/vite-best-practices/references/core-config.md +231 -0
- package/src/skills/vite-best-practices/references/core-features.md +222 -0
- package/src/skills/vite-best-practices/references/core-plugin-api.md +294 -0
- package/src/skills/vite-best-practices/references/environment-api.md +108 -0
- package/src/skills/vite-best-practices/references/rolldown-migration.md +242 -0
- package/codex-cli-workspace/iteration-1/benchmark.json +0 -122
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/eval_metadata.json +0 -13
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/grading.json +0 -52
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/outputs/response.md +0 -163
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/grading.json +0 -58
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/outputs/response.md +0 -151
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/eval_metadata.json +0 -13
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/grading.json +0 -52
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -86
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/grading.json +0 -58
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -164
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/eval_metadata.json +0 -13
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/grading.json +0 -52
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/outputs/response.md +0 -130
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/grading.json +0 -64
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/outputs/response.md +0 -209
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/review.html +0 -1325
- package/gemini-cli-workspace/iteration-1/benchmark.json +0 -86
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/eval_metadata.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/outputs/response.md +0 -401
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/outputs/response.md +0 -405
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/eval_metadata.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -212
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -427
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/eval_metadata.json +0 -32
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/grading.json +0 -32
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/outputs/response.md +0 -171
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/grading.json +0 -32
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/outputs/response.md +0 -199
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/review.html +0 -1325
- package/gemini-cli-workspace/iteration-2/benchmark.json +0 -173
- package/gemini-cli-workspace/iteration-2/benchmark.md +0 -28
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/eval_metadata.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/outputs/response.md +0 -195
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/outputs/response.md +0 -377
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/eval_metadata.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -127
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -164
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/eval_metadata.json +0 -32
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/grading.json +0 -32
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/outputs/response.md +0 -91
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/grading.json +0 -32
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/outputs/response.md +0 -112
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-viewer.html +0 -1325
- package/screen-recording-workspace/evals.json +0 -41
- package/screen-recording-workspace/iteration-1/benchmark.json +0 -102
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/eval_metadata.json +0 -31
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/outputs/demo.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/outputs/demo.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/eval_metadata.json +0 -31
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/outputs/region_capture.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/outputs/region_capture.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/eval_metadata.json +0 -31
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/outputs/fallback_recording.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/outputs/fallback_recording.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/outputs/record_screen.py +0 -67
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/review.html +0 -1325
- package/src/skills/codex-cli/evals/evals.json +0 -47
- package/src/skills/gemini-cli/evals/evals.json +0 -46
- package/src/skills/tm-search/evals/evals.json +0 -23
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"skill_name": "gemini-cli",
|
|
3
|
-
"iteration": 1,
|
|
4
|
-
"configurations": [
|
|
5
|
-
{
|
|
6
|
-
"name": "with_skill",
|
|
7
|
-
"evals": [
|
|
8
|
-
{
|
|
9
|
-
"eval_name": "cicd-setup",
|
|
10
|
-
"pass_rate": 1.0,
|
|
11
|
-
"passed": 6,
|
|
12
|
-
"total": 6,
|
|
13
|
-
"tokens": 73417,
|
|
14
|
-
"duration_seconds": 92.4
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"eval_name": "mcp-server-config",
|
|
18
|
-
"pass_rate": 1.0,
|
|
19
|
-
"passed": 6,
|
|
20
|
-
"total": 6,
|
|
21
|
-
"tokens": 67196,
|
|
22
|
-
"duration_seconds": 64.1
|
|
23
|
-
},
|
|
24
|
-
{
|
|
25
|
-
"eval_name": "custom-slash-command",
|
|
26
|
-
"pass_rate": 1.0,
|
|
27
|
-
"passed": 5,
|
|
28
|
-
"total": 5,
|
|
29
|
-
"tokens": 70530,
|
|
30
|
-
"duration_seconds": 49.5
|
|
31
|
-
}
|
|
32
|
-
],
|
|
33
|
-
"aggregate": {
|
|
34
|
-
"mean_pass_rate": 1.0,
|
|
35
|
-
"mean_tokens": 70381,
|
|
36
|
-
"mean_duration_seconds": 68.7
|
|
37
|
-
}
|
|
38
|
-
},
|
|
39
|
-
{
|
|
40
|
-
"name": "without_skill",
|
|
41
|
-
"evals": [
|
|
42
|
-
{
|
|
43
|
-
"eval_name": "cicd-setup",
|
|
44
|
-
"pass_rate": 0.33,
|
|
45
|
-
"passed": 2,
|
|
46
|
-
"total": 6,
|
|
47
|
-
"tokens": 60035,
|
|
48
|
-
"duration_seconds": 77.9
|
|
49
|
-
},
|
|
50
|
-
{
|
|
51
|
-
"eval_name": "mcp-server-config",
|
|
52
|
-
"pass_rate": 0.33,
|
|
53
|
-
"passed": 2,
|
|
54
|
-
"total": 6,
|
|
55
|
-
"tokens": 59258,
|
|
56
|
-
"duration_seconds": 65.4
|
|
57
|
-
},
|
|
58
|
-
{
|
|
59
|
-
"eval_name": "custom-slash-command",
|
|
60
|
-
"pass_rate": 0.0,
|
|
61
|
-
"passed": 0,
|
|
62
|
-
"total": 5,
|
|
63
|
-
"tokens": 57648,
|
|
64
|
-
"duration_seconds": 56.7
|
|
65
|
-
}
|
|
66
|
-
],
|
|
67
|
-
"aggregate": {
|
|
68
|
-
"mean_pass_rate": 0.22,
|
|
69
|
-
"mean_tokens": 58980,
|
|
70
|
-
"mean_duration_seconds": 66.7
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
],
|
|
74
|
-
"delta": {
|
|
75
|
-
"pass_rate": "+0.78",
|
|
76
|
-
"tokens": "+11401 (19% more)",
|
|
77
|
-
"duration": "+2.0s (3% more)"
|
|
78
|
-
},
|
|
79
|
-
"analyst_observations": [
|
|
80
|
-
"The skill provides a massive accuracy improvement (100% vs 22% pass rate) with only marginal cost increase (~19% more tokens, ~3% more time).",
|
|
81
|
-
"Eval 3 (custom slash command) shows the largest gap: the baseline gets the entire mechanism wrong (uses extensions instead of .toml commands), scoring 0/5.",
|
|
82
|
-
"Eval 1 (CI/CD) baseline hallucinates non-existent flags (--api-key, --json) and even the wrong npm package name (@anthropic-ai/gemini-cli).",
|
|
83
|
-
"Eval 2 (MCP config) baseline doesn't know about includeTools/excludeTools and recommends hardcoding API keys, which is a security anti-pattern.",
|
|
84
|
-
"All assertions are discriminating - none pass for both configurations, confirming the skill provides genuine value over baseline knowledge."
|
|
85
|
-
]
|
|
86
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"eval_id": 1,
|
|
3
|
-
"eval_name": "cicd-setup",
|
|
4
|
-
"prompt": "I want to set up gemini cli in my CI/CD pipeline on GitHub Actions so it can automatically review PRs. We use an API key stored in GitHub secrets. How do I set this up with structured JSON output so I can parse the review?",
|
|
5
|
-
"assertions": [
|
|
6
|
-
{
|
|
7
|
-
"id": "mentions-p-flag",
|
|
8
|
-
"text": "Uses -p or --prompt flag for headless/non-interactive mode",
|
|
9
|
-
"type": "content_check"
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"id": "mentions-json-output",
|
|
13
|
-
"text": "Includes --output-format json flag for structured output",
|
|
14
|
-
"type": "content_check"
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"id": "mentions-api-key-env",
|
|
18
|
-
"text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
|
|
19
|
-
"type": "content_check"
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"id": "provides-workflow-yaml",
|
|
23
|
-
"text": "Provides a GitHub Actions workflow YAML snippet",
|
|
24
|
-
"type": "content_check"
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"id": "mentions-jq-parsing",
|
|
28
|
-
"text": "Shows how to parse JSON response (e.g., with jq .response)",
|
|
29
|
-
"type": "content_check"
|
|
30
|
-
},
|
|
31
|
-
{
|
|
32
|
-
"id": "no-hallucinated-flags",
|
|
33
|
-
"text": "Does not invent non-existent CLI flags or options",
|
|
34
|
-
"type": "accuracy_check"
|
|
35
|
-
}
|
|
36
|
-
]
|
|
37
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"eval_id": 1,
|
|
3
|
-
"eval_name": "cicd-setup",
|
|
4
|
-
"configuration": "with_skill",
|
|
5
|
-
"expectations": [
|
|
6
|
-
{
|
|
7
|
-
"text": "Uses -p or --prompt flag for headless/non-interactive mode",
|
|
8
|
-
"passed": true,
|
|
9
|
-
"evidence": "Uses `-p` flag throughout: `gemini -p \"...\" --output-format json`"
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"text": "Includes --output-format json flag for structured output",
|
|
13
|
-
"passed": true,
|
|
14
|
-
"evidence": "Correctly uses `--output-format json` in all examples"
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
|
|
18
|
-
"passed": true,
|
|
19
|
-
"evidence": "Shows `GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}` in workflow env block"
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"text": "Provides a GitHub Actions workflow YAML snippet",
|
|
23
|
-
"passed": true,
|
|
24
|
-
"evidence": "Provides 3 complete workflows: basic, advanced, and minimal quick-start"
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"text": "Shows how to parse JSON response (e.g., with jq .response)",
|
|
28
|
-
"passed": true,
|
|
29
|
-
"evidence": "Shows `jq -r '.response'` and detailed stats extraction with jq"
|
|
30
|
-
},
|
|
31
|
-
{
|
|
32
|
-
"text": "Does not invent non-existent CLI flags or options",
|
|
33
|
-
"passed": true,
|
|
34
|
-
"evidence": "All flags used (-p, --output-format json, -m, --yolo, --debug-log) are real Gemini CLI flags"
|
|
35
|
-
}
|
|
36
|
-
]
|
|
37
|
-
}
|
|
@@ -1,401 +0,0 @@
|
|
|
1
|
-
# Setting Up Gemini CLI for Automated PR Reviews in GitHub Actions
|
|
2
|
-
|
|
3
|
-
Here is a complete guide to configuring Gemini CLI in your CI/CD pipeline to automatically review pull requests with structured JSON output.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Prerequisites
|
|
8
|
-
|
|
9
|
-
1. A Gemini API key (get one at https://aistudio.google.com/apikey).
|
|
10
|
-
2. The API key stored as a GitHub Actions secret named `GEMINI_API_KEY`.
|
|
11
|
-
|
|
12
|
-
---
|
|
13
|
-
|
|
14
|
-
## Step 1: Basic GitHub Actions Workflow
|
|
15
|
-
|
|
16
|
-
Create `.github/workflows/gemini-pr-review.yml`:
|
|
17
|
-
|
|
18
|
-
```yaml
|
|
19
|
-
name: Gemini PR Review
|
|
20
|
-
|
|
21
|
-
on:
|
|
22
|
-
pull_request:
|
|
23
|
-
types: [opened, synchronize, reopened]
|
|
24
|
-
|
|
25
|
-
permissions:
|
|
26
|
-
pull-requests: write
|
|
27
|
-
contents: read
|
|
28
|
-
|
|
29
|
-
jobs:
|
|
30
|
-
review:
|
|
31
|
-
runs-on: ubuntu-latest
|
|
32
|
-
steps:
|
|
33
|
-
- name: Checkout code
|
|
34
|
-
uses: actions/checkout@v4
|
|
35
|
-
with:
|
|
36
|
-
fetch-depth: 0 # Full history so we can diff against base
|
|
37
|
-
|
|
38
|
-
- name: Set up Node.js
|
|
39
|
-
uses: actions/setup-node@v4
|
|
40
|
-
with:
|
|
41
|
-
node-version: '20'
|
|
42
|
-
|
|
43
|
-
- name: Install Gemini CLI
|
|
44
|
-
run: npm install -g @google/gemini-cli
|
|
45
|
-
|
|
46
|
-
- name: Get PR diff
|
|
47
|
-
id: diff
|
|
48
|
-
run: |
|
|
49
|
-
git fetch origin ${{ github.base_ref }}
|
|
50
|
-
git diff origin/${{ github.base_ref }}...HEAD > /tmp/pr-diff.txt
|
|
51
|
-
echo "diff_size=$(wc -c < /tmp/pr-diff.txt)" >> "$GITHUB_OUTPUT"
|
|
52
|
-
|
|
53
|
-
- name: Run Gemini PR Review
|
|
54
|
-
id: review
|
|
55
|
-
env:
|
|
56
|
-
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
|
57
|
-
run: |
|
|
58
|
-
REVIEW_JSON=$(cat /tmp/pr-diff.txt | gemini -p "You are a senior code reviewer. Review the following pull request diff for:
|
|
59
|
-
1. Bugs and logical errors
|
|
60
|
-
2. Security vulnerabilities
|
|
61
|
-
3. Performance issues
|
|
62
|
-
4. Code style and best practices
|
|
63
|
-
|
|
64
|
-
Provide your review as structured text with sections for each category.
|
|
65
|
-
If no issues are found in a category, say 'No issues found.'
|
|
66
|
-
End with an overall summary and a verdict: APPROVE, REQUEST_CHANGES, or COMMENT." \
|
|
67
|
-
--output-format json)
|
|
68
|
-
|
|
69
|
-
# Extract the response text from the JSON envelope
|
|
70
|
-
REVIEW_TEXT=$(echo "$REVIEW_JSON" | jq -r '.response')
|
|
71
|
-
|
|
72
|
-
# Extract token stats for logging
|
|
73
|
-
TOTAL_TOKENS=$(echo "$REVIEW_JSON" | jq '.stats.models | to_entries[0].value.tokens.total // 0')
|
|
74
|
-
echo "Tokens used: $TOTAL_TOKENS"
|
|
75
|
-
|
|
76
|
-
# Save review to file (handles multi-line safely)
|
|
77
|
-
echo "$REVIEW_TEXT" > /tmp/review-output.txt
|
|
78
|
-
|
|
79
|
-
# Save for the summary
|
|
80
|
-
echo "$REVIEW_TEXT" >> "$GITHUB_STEP_SUMMARY"
|
|
81
|
-
|
|
82
|
-
- name: Post review as PR comment
|
|
83
|
-
uses: actions/github-script@v7
|
|
84
|
-
with:
|
|
85
|
-
script: |
|
|
86
|
-
const fs = require('fs');
|
|
87
|
-
const review = fs.readFileSync('/tmp/review-output.txt', 'utf8');
|
|
88
|
-
await github.rest.issues.createComment({
|
|
89
|
-
owner: context.repo.owner,
|
|
90
|
-
repo: context.repo.repo,
|
|
91
|
-
issue_number: context.issue.number,
|
|
92
|
-
body: `## Gemini Code Review\n\n${review}\n\n---\n*Automated review by Gemini CLI*`
|
|
93
|
-
});
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
This workflow triggers on every PR event, runs the diff through Gemini CLI with `--output-format json`, extracts the response with `jq`, and posts it as a PR comment.
|
|
97
|
-
|
|
98
|
-
---
|
|
99
|
-
|
|
100
|
-
## Step 2: Advanced Workflow with Full JSON Parsing
|
|
101
|
-
|
|
102
|
-
If you want to parse the structured JSON output more thoroughly -- for example, to extract token usage, check for errors, or conditionally fail the build -- use this expanded version:
|
|
103
|
-
|
|
104
|
-
```yaml
|
|
105
|
-
name: Gemini PR Review (Advanced)
|
|
106
|
-
|
|
107
|
-
on:
|
|
108
|
-
pull_request:
|
|
109
|
-
types: [opened, synchronize]
|
|
110
|
-
|
|
111
|
-
permissions:
|
|
112
|
-
pull-requests: write
|
|
113
|
-
contents: read
|
|
114
|
-
|
|
115
|
-
jobs:
|
|
116
|
-
review:
|
|
117
|
-
runs-on: ubuntu-latest
|
|
118
|
-
steps:
|
|
119
|
-
- name: Checkout
|
|
120
|
-
uses: actions/checkout@v4
|
|
121
|
-
with:
|
|
122
|
-
fetch-depth: 0
|
|
123
|
-
|
|
124
|
-
- name: Setup Node.js
|
|
125
|
-
uses: actions/setup-node@v4
|
|
126
|
-
with:
|
|
127
|
-
node-version: '20'
|
|
128
|
-
|
|
129
|
-
- name: Install Gemini CLI and jq
|
|
130
|
-
run: |
|
|
131
|
-
npm install -g @google/gemini-cli
|
|
132
|
-
# jq is pre-installed on ubuntu-latest, but just in case:
|
|
133
|
-
which jq || sudo apt-get install -y jq
|
|
134
|
-
|
|
135
|
-
- name: Run structured review
|
|
136
|
-
id: gemini
|
|
137
|
-
env:
|
|
138
|
-
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
|
139
|
-
run: |
|
|
140
|
-
git fetch origin ${{ github.base_ref }}
|
|
141
|
-
DIFF=$(git diff origin/${{ github.base_ref }}...HEAD)
|
|
142
|
-
|
|
143
|
-
# Skip review if diff is empty
|
|
144
|
-
if [ -z "$DIFF" ]; then
|
|
145
|
-
echo "No changes to review."
|
|
146
|
-
echo "skip=true" >> "$GITHUB_OUTPUT"
|
|
147
|
-
exit 0
|
|
148
|
-
fi
|
|
149
|
-
|
|
150
|
-
# Run Gemini with JSON output
|
|
151
|
-
RESULT=$(echo "$DIFF" | gemini -p "You are a code review bot. Analyze this PR diff and respond with ONLY a valid JSON object (no markdown fences) using this exact schema:
|
|
152
|
-
{
|
|
153
|
-
\"verdict\": \"APPROVE\" | \"REQUEST_CHANGES\" | \"COMMENT\",
|
|
154
|
-
\"summary\": \"Brief overall summary\",
|
|
155
|
-
\"bugs\": [{ \"file\": \"path\", \"line\": number, \"description\": \"...\" }],
|
|
156
|
-
\"security\": [{ \"file\": \"path\", \"severity\": \"low|medium|high|critical\", \"description\": \"...\" }],
|
|
157
|
-
\"performance\": [{ \"file\": \"path\", \"description\": \"...\" }],
|
|
158
|
-
\"suggestions\": [{ \"file\": \"path\", \"description\": \"...\" }]
|
|
159
|
-
}
|
|
160
|
-
Return empty arrays if no issues found in a category." \
|
|
161
|
-
--output-format json)
|
|
162
|
-
|
|
163
|
-
# The outer JSON has a .response field containing the model's text.
|
|
164
|
-
# The model's text itself is a JSON string we need to parse.
|
|
165
|
-
REVIEW_RESPONSE=$(echo "$RESULT" | jq -r '.response')
|
|
166
|
-
|
|
167
|
-
# Parse the inner JSON (the actual review)
|
|
168
|
-
echo "$REVIEW_RESPONSE" | jq '.' > /tmp/review.json
|
|
169
|
-
|
|
170
|
-
# Extract fields for downstream steps
|
|
171
|
-
VERDICT=$(cat /tmp/review.json | jq -r '.verdict')
|
|
172
|
-
SUMMARY=$(cat /tmp/review.json | jq -r '.summary')
|
|
173
|
-
BUG_COUNT=$(cat /tmp/review.json | jq '.bugs | length')
|
|
174
|
-
SECURITY_COUNT=$(cat /tmp/review.json | jq '.security | length')
|
|
175
|
-
CRITICAL_COUNT=$(cat /tmp/review.json | jq '[.security[] | select(.severity == "critical")] | length')
|
|
176
|
-
|
|
177
|
-
echo "verdict=$VERDICT" >> "$GITHUB_OUTPUT"
|
|
178
|
-
echo "bug_count=$BUG_COUNT" >> "$GITHUB_OUTPUT"
|
|
179
|
-
echo "security_count=$SECURITY_COUNT" >> "$GITHUB_OUTPUT"
|
|
180
|
-
echo "critical_count=$CRITICAL_COUNT" >> "$GITHUB_OUTPUT"
|
|
181
|
-
echo "skip=false" >> "$GITHUB_OUTPUT"
|
|
182
|
-
|
|
183
|
-
# Log token usage from the Gemini stats envelope
|
|
184
|
-
TOKENS=$(echo "$RESULT" | jq '.stats.models | to_entries[0].value.tokens.total // 0')
|
|
185
|
-
LATENCY=$(echo "$RESULT" | jq '.stats.models | to_entries[0].value.api.totalLatencyMs // 0')
|
|
186
|
-
echo "Tokens used: $TOKENS, Latency: ${LATENCY}ms"
|
|
187
|
-
|
|
188
|
-
- name: Format and post review comment
|
|
189
|
-
if: steps.gemini.outputs.skip != 'true'
|
|
190
|
-
uses: actions/github-script@v7
|
|
191
|
-
with:
|
|
192
|
-
script: |
|
|
193
|
-
const fs = require('fs');
|
|
194
|
-
const review = JSON.parse(fs.readFileSync('/tmp/review.json', 'utf8'));
|
|
195
|
-
|
|
196
|
-
let body = `## Gemini Code Review\n\n`;
|
|
197
|
-
body += `**Verdict:** ${review.verdict}\n\n`;
|
|
198
|
-
body += `**Summary:** ${review.summary}\n\n`;
|
|
199
|
-
|
|
200
|
-
if (review.bugs.length > 0) {
|
|
201
|
-
body += `### Bugs (${review.bugs.length})\n`;
|
|
202
|
-
for (const bug of review.bugs) {
|
|
203
|
-
body += `- **${bug.file}** (line ${bug.line}): ${bug.description}\n`;
|
|
204
|
-
}
|
|
205
|
-
body += '\n';
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
if (review.security.length > 0) {
|
|
209
|
-
body += `### Security Issues (${review.security.length})\n`;
|
|
210
|
-
for (const issue of review.security) {
|
|
211
|
-
const emoji = issue.severity === 'critical' ? '🔴' :
|
|
212
|
-
issue.severity === 'high' ? '🟠' :
|
|
213
|
-
issue.severity === 'medium' ? '🟡' : '🟢';
|
|
214
|
-
body += `- ${emoji} **[${issue.severity.toUpperCase()}]** ${issue.file}: ${issue.description}\n`;
|
|
215
|
-
}
|
|
216
|
-
body += '\n';
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
if (review.performance.length > 0) {
|
|
220
|
-
body += `### Performance (${review.performance.length})\n`;
|
|
221
|
-
for (const perf of review.performance) {
|
|
222
|
-
body += `- **${perf.file}**: ${perf.description}\n`;
|
|
223
|
-
}
|
|
224
|
-
body += '\n';
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
if (review.suggestions.length > 0) {
|
|
228
|
-
body += `### Suggestions (${review.suggestions.length})\n`;
|
|
229
|
-
for (const sug of review.suggestions) {
|
|
230
|
-
body += `- **${sug.file}**: ${sug.description}\n`;
|
|
231
|
-
}
|
|
232
|
-
body += '\n';
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
body += `---\n*Automated review by Gemini CLI*`;
|
|
236
|
-
|
|
237
|
-
await github.rest.issues.createComment({
|
|
238
|
-
owner: context.repo.owner,
|
|
239
|
-
repo: context.repo.repo,
|
|
240
|
-
issue_number: context.issue.number,
|
|
241
|
-
body
|
|
242
|
-
});
|
|
243
|
-
|
|
244
|
-
- name: Fail on critical security issues
|
|
245
|
-
if: steps.gemini.outputs.critical_count != '0' && steps.gemini.outputs.skip != 'true'
|
|
246
|
-
run: |
|
|
247
|
-
echo "::error::Gemini found ${{ steps.gemini.outputs.critical_count }} critical security issue(s). Review required."
|
|
248
|
-
exit 1
|
|
249
|
-
```
|
|
250
|
-
|
|
251
|
-
---
|
|
252
|
-
|
|
253
|
-
## Understanding the JSON Output
|
|
254
|
-
|
|
255
|
-
Gemini CLI's `--output-format json` wraps the model's response in an envelope with this schema:
|
|
256
|
-
|
|
257
|
-
```json
|
|
258
|
-
{
|
|
259
|
-
"response": "The model's text response (string)",
|
|
260
|
-
"stats": {
|
|
261
|
-
"models": {
|
|
262
|
-
"gemini-2.5-pro": {
|
|
263
|
-
"api": {
|
|
264
|
-
"totalRequests": 2,
|
|
265
|
-
"totalErrors": 0,
|
|
266
|
-
"totalLatencyMs": 5053
|
|
267
|
-
},
|
|
268
|
-
"tokens": {
|
|
269
|
-
"prompt": 24939,
|
|
270
|
-
"candidates": 20,
|
|
271
|
-
"total": 25113,
|
|
272
|
-
"cached": 21263,
|
|
273
|
-
"thoughts": 154,
|
|
274
|
-
"tool": 0
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
},
|
|
278
|
-
"tools": {
|
|
279
|
-
"totalCalls": 1,
|
|
280
|
-
"totalSuccess": 1,
|
|
281
|
-
"totalFail": 0,
|
|
282
|
-
"totalDurationMs": 1881
|
|
283
|
-
},
|
|
284
|
-
"files": {
|
|
285
|
-
"totalLinesAdded": 5,
|
|
286
|
-
"totalLinesRemoved": 2
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
```
|
|
291
|
-
|
|
292
|
-
To parse this in your pipeline:
|
|
293
|
-
|
|
294
|
-
```bash
|
|
295
|
-
# Get the model's text response
|
|
296
|
-
echo "$RESULT" | jq -r '.response'
|
|
297
|
-
|
|
298
|
-
# Get total tokens used
|
|
299
|
-
echo "$RESULT" | jq '.stats.models | to_entries[0].value.tokens.total'
|
|
300
|
-
|
|
301
|
-
# Get API latency
|
|
302
|
-
echo "$RESULT" | jq '.stats.models | to_entries[0].value.api.totalLatencyMs'
|
|
303
|
-
|
|
304
|
-
# Check for API errors
|
|
305
|
-
echo "$RESULT" | jq '.stats.models | to_entries[0].value.api.totalErrors'
|
|
306
|
-
```
|
|
307
|
-
|
|
308
|
-
---
|
|
309
|
-
|
|
310
|
-
## Exit Codes for CI Logic
|
|
311
|
-
|
|
312
|
-
Gemini CLI returns meaningful exit codes you can use for conditional logic:
|
|
313
|
-
|
|
314
|
-
| Exit Code | Meaning |
|
|
315
|
-
|-----------|---------|
|
|
316
|
-
| 0 | Success |
|
|
317
|
-
| 1 | General error |
|
|
318
|
-
| 2 | Authentication error (bad or missing API key) |
|
|
319
|
-
| 3 | Rate limit / quota exceeded |
|
|
320
|
-
|
|
321
|
-
Example usage:
|
|
322
|
-
|
|
323
|
-
```bash
|
|
324
|
-
gemini -p "Review this code" --output-format json > /tmp/result.json 2>/dev/null
|
|
325
|
-
EXIT_CODE=$?
|
|
326
|
-
|
|
327
|
-
if [ $EXIT_CODE -eq 2 ]; then
|
|
328
|
-
echo "::error::Gemini API key is invalid or missing. Check your GEMINI_API_KEY secret."
|
|
329
|
-
exit 1
|
|
330
|
-
elif [ $EXIT_CODE -eq 3 ]; then
|
|
331
|
-
echo "::warning::Gemini rate limit reached. Skipping review."
|
|
332
|
-
exit 0
|
|
333
|
-
elif [ $EXIT_CODE -ne 0 ]; then
|
|
334
|
-
echo "::warning::Gemini CLI failed with exit code $EXIT_CODE. Skipping review."
|
|
335
|
-
exit 0
|
|
336
|
-
fi
|
|
337
|
-
```
|
|
338
|
-
|
|
339
|
-
---
|
|
340
|
-
|
|
341
|
-
## Tips and Best Practices
|
|
342
|
-
|
|
343
|
-
1. **Use `--output-format json` (not `stream-json`)** for CI. The `json` format gives you a single complete JSON object after all processing is done. The `stream-json` format emits newline-delimited JSON events in real-time, which is harder to parse in a script.
|
|
344
|
-
|
|
345
|
-
2. **Authentication**: Set `GEMINI_API_KEY` as an environment variable. Gemini CLI picks it up automatically. No `settings.json` or `.env` file is needed in CI.
|
|
346
|
-
|
|
347
|
-
3. **Model selection**: Add `-m gemini-2.5-flash` for faster, cheaper reviews or `-m gemini-2.5-pro` for deeper analysis:
|
|
348
|
-
```bash
|
|
349
|
-
gemini -m gemini-2.5-flash -p "Review this diff" --output-format json
|
|
350
|
-
```
|
|
351
|
-
|
|
352
|
-
4. **Large diffs**: Gemini models support up to 1M tokens of context. For very large PRs, consider filtering the diff to only changed files that matter:
|
|
353
|
-
```bash
|
|
354
|
-
git diff origin/main...HEAD -- '*.py' '*.js' '*.ts' | gemini -p "Review" --output-format json
|
|
355
|
-
```
|
|
356
|
-
|
|
357
|
-
5. **File references with `@` syntax**: You can reference specific files directly in your prompt:
|
|
358
|
-
```bash
|
|
359
|
-
gemini -p "Review @./src/auth.py and @./src/middleware.py for security issues" --output-format json
|
|
360
|
-
```
|
|
361
|
-
|
|
362
|
-
6. **Do NOT use `--yolo` for reviews**. The `--yolo` flag auto-accepts all tool actions (file writes, shell commands). For a read-only review, you do not need it. Only use `--yolo` if you want Gemini to make actual code changes.
|
|
363
|
-
|
|
364
|
-
7. **Rate limits on the free tier**: 60 requests/minute, 1,000 requests/day. For high-volume repos, consider using a paid API key or adding concurrency limits to your workflow.
|
|
365
|
-
|
|
366
|
-
8. **Debug logging**: If reviews are producing unexpected output, add `--debug-log` to capture full API traces:
|
|
367
|
-
```bash
|
|
368
|
-
gemini -p "Review this code" --output-format json --debug-log ./gemini-debug.jsonl
|
|
369
|
-
```
|
|
370
|
-
Then upload `gemini-debug.jsonl` as a workflow artifact for inspection.
|
|
371
|
-
|
|
372
|
-
---
|
|
373
|
-
|
|
374
|
-
## Minimal Quick-Start Example
|
|
375
|
-
|
|
376
|
-
If you just want the simplest possible setup to get started:
|
|
377
|
-
|
|
378
|
-
```yaml
|
|
379
|
-
name: Gemini Review
|
|
380
|
-
on: [pull_request]
|
|
381
|
-
permissions:
|
|
382
|
-
pull-requests: write
|
|
383
|
-
contents: read
|
|
384
|
-
jobs:
|
|
385
|
-
review:
|
|
386
|
-
runs-on: ubuntu-latest
|
|
387
|
-
steps:
|
|
388
|
-
- uses: actions/checkout@v4
|
|
389
|
-
with:
|
|
390
|
-
fetch-depth: 0
|
|
391
|
-
- run: npm install -g @google/gemini-cli
|
|
392
|
-
- name: Review
|
|
393
|
-
env:
|
|
394
|
-
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
|
395
|
-
run: |
|
|
396
|
-
git fetch origin ${{ github.base_ref }}
|
|
397
|
-
REVIEW=$(git diff origin/${{ github.base_ref }}...HEAD | gemini -p "Review these changes for bugs and issues" --output-format json | jq -r '.response')
|
|
398
|
-
echo "$REVIEW" >> $GITHUB_STEP_SUMMARY
|
|
399
|
-
```
|
|
400
|
-
|
|
401
|
-
This pipes the PR diff into Gemini CLI, extracts the review text from the JSON output with `jq`, and writes it to the GitHub Actions step summary where it is visible directly on the workflow run page.
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"eval_id": 1,
|
|
3
|
-
"eval_name": "cicd-setup",
|
|
4
|
-
"configuration": "without_skill",
|
|
5
|
-
"expectations": [
|
|
6
|
-
{
|
|
7
|
-
"text": "Uses -p or --prompt flag for headless/non-interactive mode",
|
|
8
|
-
"passed": false,
|
|
9
|
-
"evidence": "Pipes prompt via stdin (`echo \"$PROMPT\" | gemini`) instead of using -p flag. Does not demonstrate the correct headless invocation."
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"text": "Includes --output-format json flag for structured output",
|
|
13
|
-
"passed": false,
|
|
14
|
-
"evidence": "Mentions `--json` in Step 4 but says 'if supported' and 'check your version'. Does not use the correct `--output-format json` flag."
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
|
|
18
|
-
"passed": true,
|
|
19
|
-
"evidence": "Shows `GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}` in env block"
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"text": "Provides a GitHub Actions workflow YAML snippet",
|
|
23
|
-
"passed": true,
|
|
24
|
-
"evidence": "Provides multiple complete workflow YAML files"
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"text": "Shows how to parse JSON response (e.g., with jq .response)",
|
|
28
|
-
"passed": false,
|
|
29
|
-
"evidence": "Parses the REST API response structure (candidates[0].content.parts[0].text) rather than Gemini CLI's JSON envelope (.response). Falls back to curl-based API calls."
|
|
30
|
-
},
|
|
31
|
-
{
|
|
32
|
-
"text": "Does not invent non-existent CLI flags or options",
|
|
33
|
-
"passed": false,
|
|
34
|
-
"evidence": "Uses `--api-key` flag (doesn't exist), `--json` flag (doesn't exist), and references wrong npm package `@anthropic-ai/gemini-cli`"
|
|
35
|
-
}
|
|
36
|
-
]
|
|
37
|
-
}
|