@biggora/claude-plugins 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -4
- package/package.json +1 -1
- package/registry/registry.json +319 -244
- package/specs/coding.md +24 -0
- package/specs/pod.md +2 -0
- package/src/skills/captcha/README.md +221 -0
- package/src/skills/captcha/SKILL.md +355 -0
- package/src/skills/captcha/references/captcha-types.md +254 -0
- package/src/skills/captcha/references/services.md +172 -0
- package/src/skills/captcha/references/stealth.md +238 -0
- package/src/skills/captcha/scripts/solve_captcha.py +323 -0
- package/src/skills/captcha/scripts/solve_image_grid.py +350 -0
- package/src/skills/google-merchant-api/SKILL.md +581 -0
- package/src/skills/google-merchant-api/references/accounts.md +247 -0
- package/src/skills/google-merchant-api/references/content-api-legacy.md +216 -0
- package/src/skills/google-merchant-api/references/datasources.md +233 -0
- package/src/skills/google-merchant-api/references/inventories.md +201 -0
- package/src/skills/google-merchant-api/references/migration.md +267 -0
- package/src/skills/google-merchant-api/references/products.md +316 -0
- package/src/skills/google-merchant-api/references/promotions.md +201 -0
- package/src/skills/google-merchant-api/references/reports.md +240 -0
- package/src/skills/lv-aggregators-api/SKILL.md +113 -0
- package/src/skills/lv-aggregators-api/references/integration-guide.md +368 -0
- package/src/skills/lv-aggregators-api/references/kurpirkt.md +103 -0
- package/src/skills/lv-aggregators-api/references/salidzini.md +122 -0
- package/src/skills/tailwindcss-best-practices/SKILL.md +180 -0
- package/src/skills/tailwindcss-best-practices/references/best-practices-utility-patterns.md +87 -0
- package/src/skills/tailwindcss-best-practices/references/core-installation.md +109 -0
- package/src/skills/tailwindcss-best-practices/references/core-preflight.md +200 -0
- package/src/skills/tailwindcss-best-practices/references/core-responsive.md +163 -0
- package/src/skills/tailwindcss-best-practices/references/core-source-detection.md +114 -0
- package/src/skills/tailwindcss-best-practices/references/core-theme.md +108 -0
- package/src/skills/tailwindcss-best-practices/references/core-utility-classes.md +59 -0
- package/src/skills/tailwindcss-best-practices/references/core-variants.md +204 -0
- package/src/skills/tailwindcss-best-practices/references/effects-form-controls.md +76 -0
- package/src/skills/tailwindcss-best-practices/references/effects-mask.md +91 -0
- package/src/skills/tailwindcss-best-practices/references/effects-scroll-snap.md +59 -0
- package/src/skills/tailwindcss-best-practices/references/effects-text-shadow.md +78 -0
- package/src/skills/tailwindcss-best-practices/references/effects-transition-animation.md +80 -0
- package/src/skills/tailwindcss-best-practices/references/effects-visibility-interactivity.md +82 -0
- package/src/skills/tailwindcss-best-practices/references/features-content-detection.md +175 -0
- package/src/skills/tailwindcss-best-practices/references/features-custom-styles.md +203 -0
- package/src/skills/tailwindcss-best-practices/references/features-dark-mode.md +137 -0
- package/src/skills/tailwindcss-best-practices/references/features-functions-directives.md +241 -0
- package/src/skills/tailwindcss-best-practices/references/features-upgrade.md +160 -0
- package/src/skills/tailwindcss-best-practices/references/layout-aspect-ratio.md +39 -0
- package/src/skills/tailwindcss-best-practices/references/layout-columns.md +80 -0
- package/src/skills/tailwindcss-best-practices/references/layout-display.md +110 -0
- package/src/skills/tailwindcss-best-practices/references/layout-flexbox.md +112 -0
- package/src/skills/tailwindcss-best-practices/references/layout-grid.md +87 -0
- package/src/skills/tailwindcss-best-practices/references/layout-height.md +97 -0
- package/src/skills/tailwindcss-best-practices/references/layout-inset.md +103 -0
- package/src/skills/tailwindcss-best-practices/references/layout-logical-properties.md +92 -0
- package/src/skills/tailwindcss-best-practices/references/layout-margin.md +126 -0
- package/src/skills/tailwindcss-best-practices/references/layout-min-max-sizing.md +63 -0
- package/src/skills/tailwindcss-best-practices/references/layout-object-fit-position.md +64 -0
- package/src/skills/tailwindcss-best-practices/references/layout-overflow.md +57 -0
- package/src/skills/tailwindcss-best-practices/references/layout-padding.md +77 -0
- package/src/skills/tailwindcss-best-practices/references/layout-position.md +85 -0
- package/src/skills/tailwindcss-best-practices/references/layout-tables.md +67 -0
- package/src/skills/tailwindcss-best-practices/references/layout-width.md +102 -0
- package/src/skills/tailwindcss-best-practices/references/transform-base.md +68 -0
- package/src/skills/tailwindcss-best-practices/references/transform-rotate.md +70 -0
- package/src/skills/tailwindcss-best-practices/references/transform-scale.md +83 -0
- package/src/skills/tailwindcss-best-practices/references/transform-skew.md +62 -0
- package/src/skills/tailwindcss-best-practices/references/transform-translate.md +77 -0
- package/src/skills/tailwindcss-best-practices/references/typography-font-text.md +142 -0
- package/src/skills/tailwindcss-best-practices/references/typography-list-style.md +65 -0
- package/src/skills/tailwindcss-best-practices/references/typography-text-align.md +60 -0
- package/src/skills/tailwindcss-best-practices/references/visual-background.md +76 -0
- package/src/skills/tailwindcss-best-practices/references/visual-border.md +108 -0
- package/src/skills/tailwindcss-best-practices/references/visual-effects.md +111 -0
- package/src/skills/tailwindcss-best-practices/references/visual-svg.md +82 -0
- package/src/skills/test-mobile-app/SKILL.md +11 -6
- package/src/skills/test-mobile-app/scripts/analyze_apk.py +15 -4
- package/src/skills/test-mobile-app/scripts/check_environment.py +5 -5
- package/src/skills/test-mobile-app/scripts/run_tests.py +1 -1
- package/src/skills/test-web-ui/SKILL.md +264 -84
- package/src/skills/test-web-ui/scripts/discover.py +25 -12
- package/src/skills/test-web-ui/scripts/run_tests.py +3 -2
- package/src/skills/vite-best-practices/SKILL.md +115 -0
- package/src/skills/vite-best-practices/references/build-and-ssr.md +255 -0
- package/src/skills/vite-best-practices/references/core-config.md +231 -0
- package/src/skills/vite-best-practices/references/core-features.md +222 -0
- package/src/skills/vite-best-practices/references/core-plugin-api.md +294 -0
- package/src/skills/vite-best-practices/references/environment-api.md +108 -0
- package/src/skills/vite-best-practices/references/rolldown-migration.md +242 -0
- package/codex-cli-workspace/iteration-1/benchmark.json +0 -122
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/eval_metadata.json +0 -13
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/grading.json +0 -52
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/outputs/response.md +0 -163
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/grading.json +0 -58
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/outputs/response.md +0 -151
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/eval_metadata.json +0 -13
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/grading.json +0 -52
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -86
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/grading.json +0 -58
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -164
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/eval_metadata.json +0 -13
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/grading.json +0 -52
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/outputs/response.md +0 -130
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/grading.json +0 -64
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/outputs/response.md +0 -209
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/timing.json +0 -5
- package/codex-cli-workspace/iteration-1/review.html +0 -1325
- package/gemini-cli-workspace/iteration-1/benchmark.json +0 -86
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/eval_metadata.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/outputs/response.md +0 -401
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/outputs/response.md +0 -405
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/eval_metadata.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -212
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -427
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/eval_metadata.json +0 -32
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/grading.json +0 -32
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/outputs/response.md +0 -171
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/grading.json +0 -32
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/outputs/response.md +0 -199
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-1/review.html +0 -1325
- package/gemini-cli-workspace/iteration-2/benchmark.json +0 -173
- package/gemini-cli-workspace/iteration-2/benchmark.md +0 -28
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/eval_metadata.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/outputs/response.md +0 -195
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/outputs/response.md +0 -377
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/eval_metadata.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -127
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/grading.json +0 -37
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -164
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/eval_metadata.json +0 -32
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/grading.json +0 -32
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/outputs/response.md +0 -91
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/grading.json +0 -32
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/outputs/response.md +0 -112
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/timing.json +0 -5
- package/gemini-cli-workspace/iteration-2/eval-viewer.html +0 -1325
- package/screen-recording-workspace/evals.json +0 -41
- package/screen-recording-workspace/iteration-1/benchmark.json +0 -102
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/eval_metadata.json +0 -31
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/outputs/demo.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/outputs/demo.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/eval_metadata.json +0 -31
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/outputs/region_capture.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/outputs/region_capture.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/eval_metadata.json +0 -31
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/outputs/fallback_recording.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/grading.json +0 -11
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/outputs/fallback_recording.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/outputs/record_screen.py +0 -67
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/timing.json +0 -5
- package/screen-recording-workspace/iteration-1/review.html +0 -1325
- package/src/skills/codex-cli/evals/evals.json +0 -47
- package/src/skills/gemini-cli/evals/evals.json +0 -46
- package/src/skills/tm-search/evals/evals.json +0 -23
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"metadata": {
|
|
3
|
-
"skill_name": "gemini-cli",
|
|
4
|
-
"skill_path": "src/skills/gemini-cli/",
|
|
5
|
-
"executor_model": "claude-opus-4-6",
|
|
6
|
-
"analyzer_model": "claude-opus-4-6",
|
|
7
|
-
"timestamp": "2026-03-11T18:15:00Z",
|
|
8
|
-
"evals_run": [1, 2, 3],
|
|
9
|
-
"runs_per_configuration": 3
|
|
10
|
-
},
|
|
11
|
-
"runs": [
|
|
12
|
-
{
|
|
13
|
-
"eval_id": 1,
|
|
14
|
-
"eval_name": "cicd-setup",
|
|
15
|
-
"configuration": "with_skill",
|
|
16
|
-
"run_number": 1,
|
|
17
|
-
"result": {
|
|
18
|
-
"pass_rate": 1.0,
|
|
19
|
-
"passed": 6,
|
|
20
|
-
"failed": 0,
|
|
21
|
-
"total": 6,
|
|
22
|
-
"time_seconds": 53.6,
|
|
23
|
-
"tokens": 64216
|
|
24
|
-
},
|
|
25
|
-
"expectations": [
|
|
26
|
-
{"text": "Uses -p or --prompt flag for headless/non-interactive mode", "passed": true, "evidence": "Line 53: gemini -p with --output-format json"},
|
|
27
|
-
{"text": "Includes --output-format json flag for structured output", "passed": true, "evidence": "Line 62: --output-format json flag used correctly"},
|
|
28
|
-
{"text": "Shows how to set GEMINI_API_KEY from GitHub secrets", "passed": true, "evidence": "Line 42: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}"},
|
|
29
|
-
{"text": "Provides a GitHub Actions workflow YAML snippet", "passed": true, "evidence": "Complete workflow YAML with checkout, node setup, install, review, and comment steps"},
|
|
30
|
-
{"text": "Shows how to parse JSON response (e.g., with jq .response)", "passed": true, "evidence": "Line 65: jq -r '.response' and line 68: token stats extraction"},
|
|
31
|
-
{"text": "Does not invent non-existent CLI flags or options", "passed": true, "evidence": "Only uses verified flags: -p and --output-format json. Exit codes match corrected values."}
|
|
32
|
-
],
|
|
33
|
-
"notes": []
|
|
34
|
-
},
|
|
35
|
-
{
|
|
36
|
-
"eval_id": 1,
|
|
37
|
-
"eval_name": "cicd-setup",
|
|
38
|
-
"configuration": "without_skill",
|
|
39
|
-
"run_number": 1,
|
|
40
|
-
"result": {
|
|
41
|
-
"pass_rate": 0.6667,
|
|
42
|
-
"passed": 4,
|
|
43
|
-
"failed": 2,
|
|
44
|
-
"total": 6,
|
|
45
|
-
"time_seconds": 68.0,
|
|
46
|
-
"tokens": 59322
|
|
47
|
-
},
|
|
48
|
-
"expectations": [
|
|
49
|
-
{"text": "Uses -p or --prompt flag for headless/non-interactive mode", "passed": true, "evidence": "Line 96: gemini -p"},
|
|
50
|
-
{"text": "Includes --output-format json flag for structured output", "passed": false, "evidence": "Does not use --output-format json. Line 95 comment mentions '--json flag if available' which is not a real flag."},
|
|
51
|
-
{"text": "Shows how to set GEMINI_API_KEY from GitHub secrets", "passed": true, "evidence": "Line 62: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}"},
|
|
52
|
-
{"text": "Provides a GitHub Actions workflow YAML snippet", "passed": true, "evidence": "Complete workflow YAML provided"},
|
|
53
|
-
{"text": "Shows how to parse JSON response (e.g., with jq .response)", "passed": true, "evidence": "Uses jq for JSON validation and parsing"},
|
|
54
|
-
{"text": "Does not invent non-existent CLI flags or options", "passed": false, "evidence": "Line 95: references '--json flag if available' which is not a real Gemini CLI flag"}
|
|
55
|
-
],
|
|
56
|
-
"notes": []
|
|
57
|
-
},
|
|
58
|
-
{
|
|
59
|
-
"eval_id": 2,
|
|
60
|
-
"eval_name": "mcp-server-config",
|
|
61
|
-
"configuration": "with_skill",
|
|
62
|
-
"run_number": 1,
|
|
63
|
-
"result": {
|
|
64
|
-
"pass_rate": 1.0,
|
|
65
|
-
"passed": 6,
|
|
66
|
-
"failed": 0,
|
|
67
|
-
"total": 6,
|
|
68
|
-
"time_seconds": 47.3,
|
|
69
|
-
"tokens": 66522
|
|
70
|
-
},
|
|
71
|
-
"expectations": [
|
|
72
|
-
{"text": "Points to ~/.gemini/settings.json or project-level settings.json", "passed": true, "evidence": "Opens with ~/.gemini/settings.json (global) or <project>/.gemini/settings.json (project-scoped)"},
|
|
73
|
-
{"text": "Shows mcpServers config with command, args fields", "passed": true, "evidence": "Full JSON config with mcpServers, command, args, env, timeout, trust, includeTools"},
|
|
74
|
-
{"text": "Shows $VAR pattern for environment variable references in env field", "passed": true, "evidence": "Shows \"API_KEY\": \"$MY_API_KEY\" pattern"},
|
|
75
|
-
{"text": "Explains includeTools and/or excludeTools for restricting exposed tools", "passed": true, "evidence": "Dedicated section for both includeTools and excludeTools with examples"},
|
|
76
|
-
{"text": "Mentions security considerations (trust field, not hardcoding keys, etc.)", "passed": true, "evidence": "Explains trust field, warns never to hardcode secrets, mentions env var redaction"},
|
|
77
|
-
{"text": "Uses correct Python command (python/python3) in the command field", "passed": true, "evidence": "Uses \"command\": \"python\" consistently"}
|
|
78
|
-
],
|
|
79
|
-
"notes": []
|
|
80
|
-
},
|
|
81
|
-
{
|
|
82
|
-
"eval_id": 2,
|
|
83
|
-
"eval_name": "mcp-server-config",
|
|
84
|
-
"configuration": "without_skill",
|
|
85
|
-
"run_number": 1,
|
|
86
|
-
"result": {
|
|
87
|
-
"pass_rate": 1.0,
|
|
88
|
-
"passed": 6,
|
|
89
|
-
"failed": 0,
|
|
90
|
-
"total": 6,
|
|
91
|
-
"time_seconds": 86.7,
|
|
92
|
-
"tokens": 61417
|
|
93
|
-
},
|
|
94
|
-
"expectations": [
|
|
95
|
-
{"text": "Points to ~/.gemini/settings.json or project-level settings.json", "passed": true, "evidence": "States ~/.gemini/settings.json (global) and .gemini/settings.json (project-level)"},
|
|
96
|
-
{"text": "Shows mcpServers config with command, args fields", "passed": true, "evidence": "Full JSON config with mcpServers, command, args, env"},
|
|
97
|
-
{"text": "Shows $VAR pattern for environment variable references in env field", "passed": true, "evidence": "Shows both $MY_API_KEY and ${ANOTHER_ENV_VAR} syntax"},
|
|
98
|
-
{"text": "Explains includeTools and/or excludeTools for restricting exposed tools", "passed": true, "evidence": "Dedicated sections for includeTools and excludeTools with examples"},
|
|
99
|
-
{"text": "Mentions security considerations (trust field, not hardcoding keys, etc.)", "passed": true, "evidence": "Mentions trust field, automatic redaction, env variable security"},
|
|
100
|
-
{"text": "Uses correct Python command (python/python3) in the command field", "passed": true, "evidence": "Uses \"command\": \"python\" consistently"}
|
|
101
|
-
],
|
|
102
|
-
"notes": []
|
|
103
|
-
},
|
|
104
|
-
{
|
|
105
|
-
"eval_id": 3,
|
|
106
|
-
"eval_name": "custom-slash-command",
|
|
107
|
-
"configuration": "with_skill",
|
|
108
|
-
"run_number": 1,
|
|
109
|
-
"result": {
|
|
110
|
-
"pass_rate": 1.0,
|
|
111
|
-
"passed": 5,
|
|
112
|
-
"failed": 0,
|
|
113
|
-
"total": 5,
|
|
114
|
-
"time_seconds": 43.7,
|
|
115
|
-
"tokens": 63047
|
|
116
|
-
},
|
|
117
|
-
"expectations": [
|
|
118
|
-
{"text": "Points to ~/.gemini/commands/ for global user-scoped commands", "passed": true, "evidence": "States ~/.gemini/commands/commit.toml for global commands"},
|
|
119
|
-
{"text": "Shows .toml file format with prompt field", "passed": true, "evidence": "Complete .toml example with description and prompt fields"},
|
|
120
|
-
{"text": "Uses !{git diff --cached} shell execution syntax in the prompt", "passed": true, "evidence": "Uses !{git diff --cached} and explains the syntax"},
|
|
121
|
-
{"text": "Warns that custom slash commands don't work in headless/non-interactive mode", "passed": true, "evidence": "States 'Custom slash commands work in interactive mode only. They are not available in headless mode (-p flag).'"},
|
|
122
|
-
{"text": "Provides a complete, copy-pasteable .toml file example", "passed": true, "evidence": "Complete commit.toml ready to copy-paste"}
|
|
123
|
-
],
|
|
124
|
-
"notes": []
|
|
125
|
-
},
|
|
126
|
-
{
|
|
127
|
-
"eval_id": 3,
|
|
128
|
-
"eval_name": "custom-slash-command",
|
|
129
|
-
"configuration": "without_skill",
|
|
130
|
-
"run_number": 1,
|
|
131
|
-
"result": {
|
|
132
|
-
"pass_rate": 0.8,
|
|
133
|
-
"passed": 4,
|
|
134
|
-
"failed": 1,
|
|
135
|
-
"total": 5,
|
|
136
|
-
"time_seconds": 73.6,
|
|
137
|
-
"tokens": 59908
|
|
138
|
-
},
|
|
139
|
-
"expectations": [
|
|
140
|
-
{"text": "Points to ~/.gemini/commands/ for global user-scoped commands", "passed": true, "evidence": "States ~/.gemini/commands/ for global commands"},
|
|
141
|
-
{"text": "Shows .toml file format with prompt field", "passed": true, "evidence": "Complete .toml example with description and prompt fields"},
|
|
142
|
-
{"text": "Uses !{git diff --cached} shell execution syntax in the prompt", "passed": true, "evidence": "Uses !{git diff --cached} in the prompt field"},
|
|
143
|
-
{"text": "Warns that custom slash commands don't work in headless/non-interactive mode", "passed": false, "evidence": "No mention of headless mode limitations anywhere in the response"},
|
|
144
|
-
{"text": "Provides a complete, copy-pasteable .toml file example", "passed": true, "evidence": "Complete commit.toml file provided"}
|
|
145
|
-
],
|
|
146
|
-
"notes": []
|
|
147
|
-
}
|
|
148
|
-
],
|
|
149
|
-
"run_summary": {
|
|
150
|
-
"with_skill": {
|
|
151
|
-
"pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
|
|
152
|
-
"time_seconds": {"mean": 48.2, "stddev": 5.0, "min": 43.7, "max": 53.6},
|
|
153
|
-
"tokens": {"mean": 64595, "stddev": 1741, "min": 63047, "max": 66522}
|
|
154
|
-
},
|
|
155
|
-
"without_skill": {
|
|
156
|
-
"pass_rate": {"mean": 0.822, "stddev": 0.167, "min": 0.667, "max": 1.0},
|
|
157
|
-
"time_seconds": {"mean": 76.1, "stddev": 9.7, "min": 68.0, "max": 86.7},
|
|
158
|
-
"tokens": {"mean": 60216, "stddev": 1372, "min": 59322, "max": 61417}
|
|
159
|
-
},
|
|
160
|
-
"delta": {
|
|
161
|
-
"pass_rate": "+0.18",
|
|
162
|
-
"time_seconds": "-27.9",
|
|
163
|
-
"tokens": "+4379"
|
|
164
|
-
}
|
|
165
|
-
},
|
|
166
|
-
"notes": [
|
|
167
|
-
"With-skill achieves 100% pass rate across all 3 evals (17/17 assertions) vs 82.2% baseline (14/17)",
|
|
168
|
-
"Eval 2 (MCP server config) is non-discriminating — both configs pass all 6 assertions. This eval could be made harder.",
|
|
169
|
-
"With-skill is ~28s faster on average despite using ~4k more tokens, suggesting more focused responses",
|
|
170
|
-
"Baseline failures are accuracy-related: hallucinated --json flag (eval 1) and missing headless limitation warning (eval 3)",
|
|
171
|
-
"The skill's value is strongest on accuracy assertions — it prevents hallucination of non-existent flags and ensures important caveats are mentioned"
|
|
172
|
-
]
|
|
173
|
-
}
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
# Skill Benchmark: gemini-cli
|
|
2
|
-
|
|
3
|
-
**Model**: claude-opus-4-6
|
|
4
|
-
**Date**: 2026-03-11T18:15:00Z
|
|
5
|
-
**Evals**: 1, 2, 3 (1 run each per configuration)
|
|
6
|
-
|
|
7
|
-
## Summary
|
|
8
|
-
|
|
9
|
-
| Metric | With Skill | Without Skill | Delta |
|
|
10
|
-
|--------|------------|---------------|-------|
|
|
11
|
-
| Pass Rate | 100% ± 0% | 82% ± 17% | +0.18 |
|
|
12
|
-
| Time | 48.2s ± 5.0s | 76.1s ± 9.7s | -27.9s |
|
|
13
|
-
| Tokens | 64595 ± 1741 | 60216 ± 1372 | +4379 |
|
|
14
|
-
|
|
15
|
-
## Per-Eval Breakdown
|
|
16
|
-
|
|
17
|
-
| Eval | With Skill | Without Skill |
|
|
18
|
-
|------|-----------|---------------|
|
|
19
|
-
| 1: CI/CD Setup | 6/6 (100%) | 4/6 (67%) |
|
|
20
|
-
| 2: MCP Server Config | 6/6 (100%) | 6/6 (100%) |
|
|
21
|
-
| 3: Custom Slash Command | 5/5 (100%) | 4/5 (80%) |
|
|
22
|
-
|
|
23
|
-
## Analyst Notes
|
|
24
|
-
|
|
25
|
-
- **Eval 2 is non-discriminating**: Both configs pass all 6 assertions. Consider adding harder assertions (e.g., correct timeout default of 600000ms, httpUrl transport option).
|
|
26
|
-
- **Baseline failures are accuracy-related**: The without-skill run hallucinated a `--json` flag (eval 1) and omitted the headless limitation warning (eval 3). These are exactly the kinds of errors the skill prevents.
|
|
27
|
-
- **Speed advantage**: With-skill runs are ~28s faster on average despite using ~4k more tokens, suggesting the skill helps produce more focused, direct responses.
|
|
28
|
-
- **The skill's value is strongest on accuracy**: It prevents hallucination of non-existent flags and ensures important caveats are mentioned.
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"eval_id": 1,
|
|
3
|
-
"eval_name": "cicd-setup",
|
|
4
|
-
"prompt": "I want to set up gemini cli in my CI/CD pipeline on GitHub Actions so it can automatically review PRs. We use an API key stored in GitHub secrets. How do I set this up with structured JSON output so I can parse the review?",
|
|
5
|
-
"assertions": [
|
|
6
|
-
{
|
|
7
|
-
"id": "mentions-p-flag",
|
|
8
|
-
"text": "Uses -p or --prompt flag for headless/non-interactive mode",
|
|
9
|
-
"type": "content_check"
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"id": "mentions-json-output",
|
|
13
|
-
"text": "Includes --output-format json flag for structured output",
|
|
14
|
-
"type": "content_check"
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"id": "mentions-api-key-env",
|
|
18
|
-
"text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
|
|
19
|
-
"type": "content_check"
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"id": "provides-workflow-yaml",
|
|
23
|
-
"text": "Provides a GitHub Actions workflow YAML snippet",
|
|
24
|
-
"type": "content_check"
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"id": "mentions-jq-parsing",
|
|
28
|
-
"text": "Shows how to parse JSON response (e.g., with jq .response)",
|
|
29
|
-
"type": "content_check"
|
|
30
|
-
},
|
|
31
|
-
{
|
|
32
|
-
"id": "no-hallucinated-flags",
|
|
33
|
-
"text": "Does not invent non-existent CLI flags or options",
|
|
34
|
-
"type": "accuracy_check"
|
|
35
|
-
}
|
|
36
|
-
]
|
|
37
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"eval_id": 1,
|
|
3
|
-
"eval_name": "cicd-setup",
|
|
4
|
-
"config": "with_skill",
|
|
5
|
-
"expectations": [
|
|
6
|
-
{
|
|
7
|
-
"text": "Uses -p or --prompt flag for headless/non-interactive mode",
|
|
8
|
-
"passed": true,
|
|
9
|
-
"evidence": "Line 53: gemini -p \"You are a senior code reviewer...\" with --output-format json"
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"text": "Includes --output-format json flag for structured output",
|
|
13
|
-
"passed": true,
|
|
14
|
-
"evidence": "Line 62: --output-format json flag used correctly"
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
|
|
18
|
-
"passed": true,
|
|
19
|
-
"evidence": "Line 42: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} in env block"
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"text": "Provides a GitHub Actions workflow YAML snippet",
|
|
23
|
-
"passed": true,
|
|
24
|
-
"evidence": "Complete workflow YAML at .github/workflows/gemini-pr-review.yml with checkout, node setup, install, review, and comment steps"
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"text": "Shows how to parse JSON response (e.g., with jq .response)",
|
|
28
|
-
"passed": true,
|
|
29
|
-
"evidence": "Line 65: REVIEW=$(echo \"$RESULT\" | jq -r '.response') and line 68: token stats extraction with jq"
|
|
30
|
-
},
|
|
31
|
-
{
|
|
32
|
-
"text": "Does not invent non-existent CLI flags or options",
|
|
33
|
-
"passed": true,
|
|
34
|
-
"evidence": "Only uses verified flags: -p and --output-format json. Exit codes match corrected values (0, 1, 42, 53)."
|
|
35
|
-
}
|
|
36
|
-
]
|
|
37
|
-
}
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
# Setting Up Gemini CLI for Automated PR Reviews in GitHub Actions
|
|
2
|
-
|
|
3
|
-
## Prerequisites
|
|
4
|
-
|
|
5
|
-
1. A Gemini API key (get one at https://aistudio.google.com/apikey)
|
|
6
|
-
2. The API key stored as a GitHub Actions secret named `GEMINI_API_KEY`
|
|
7
|
-
|
|
8
|
-
## GitHub Actions Workflow
|
|
9
|
-
|
|
10
|
-
Create a file at `.github/workflows/gemini-pr-review.yml`:
|
|
11
|
-
|
|
12
|
-
```yaml
|
|
13
|
-
name: Gemini PR Review
|
|
14
|
-
|
|
15
|
-
on:
|
|
16
|
-
pull_request:
|
|
17
|
-
types: [opened, synchronize]
|
|
18
|
-
|
|
19
|
-
permissions:
|
|
20
|
-
pull-requests: write
|
|
21
|
-
contents: read
|
|
22
|
-
|
|
23
|
-
jobs:
|
|
24
|
-
review:
|
|
25
|
-
runs-on: ubuntu-latest
|
|
26
|
-
steps:
|
|
27
|
-
- name: Checkout code
|
|
28
|
-
uses: actions/checkout@v4
|
|
29
|
-
with:
|
|
30
|
-
fetch-depth: 0 # Full history so we can diff against base
|
|
31
|
-
|
|
32
|
-
- name: Set up Node.js
|
|
33
|
-
uses: actions/setup-node@v4
|
|
34
|
-
with:
|
|
35
|
-
node-version: '20'
|
|
36
|
-
|
|
37
|
-
- name: Install Gemini CLI
|
|
38
|
-
run: npm install -g @google/gemini-cli
|
|
39
|
-
|
|
40
|
-
- name: Run AI Code Review
|
|
41
|
-
env:
|
|
42
|
-
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
|
43
|
-
run: |
|
|
44
|
-
# Get the diff between the PR branch and the base branch
|
|
45
|
-
DIFF=$(git diff origin/${{ github.base_ref }}...HEAD)
|
|
46
|
-
|
|
47
|
-
if [ -z "$DIFF" ]; then
|
|
48
|
-
echo "No changes to review."
|
|
49
|
-
exit 0
|
|
50
|
-
fi
|
|
51
|
-
|
|
52
|
-
# Run Gemini CLI with structured JSON output
|
|
53
|
-
RESULT=$(echo "$DIFF" | gemini -p "You are a senior code reviewer. Review the following pull request diff for:
|
|
54
|
-
1. Bugs and logical errors
|
|
55
|
-
2. Security vulnerabilities
|
|
56
|
-
3. Performance issues
|
|
57
|
-
4. Code style and best practices
|
|
58
|
-
|
|
59
|
-
Provide your review in a structured format with sections for each category. For each issue found, include the file name, line context, severity (critical/warning/info), and a suggested fix.
|
|
60
|
-
|
|
61
|
-
If no issues are found in a category, say so explicitly." \
|
|
62
|
-
--output-format json)
|
|
63
|
-
|
|
64
|
-
# Extract just the response text
|
|
65
|
-
REVIEW=$(echo "$RESULT" | jq -r '.response')
|
|
66
|
-
|
|
67
|
-
# Extract token usage stats for logging
|
|
68
|
-
TOKENS=$(echo "$RESULT" | jq '.stats.models | to_entries[0].value.tokens.total // "unknown"')
|
|
69
|
-
echo "Tokens used: $TOKENS"
|
|
70
|
-
|
|
71
|
-
# Write the review to the GitHub Step Summary
|
|
72
|
-
echo "## Gemini Code Review" >> $GITHUB_STEP_SUMMARY
|
|
73
|
-
echo "" >> $GITHUB_STEP_SUMMARY
|
|
74
|
-
echo "$REVIEW" >> $GITHUB_STEP_SUMMARY
|
|
75
|
-
|
|
76
|
-
# Save the full JSON result for downstream steps
|
|
77
|
-
echo "$RESULT" > review-result.json
|
|
78
|
-
|
|
79
|
-
- name: Post review as PR comment
|
|
80
|
-
if: success()
|
|
81
|
-
env:
|
|
82
|
-
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
83
|
-
run: |
|
|
84
|
-
REVIEW=$(jq -r '.response' review-result.json)
|
|
85
|
-
TOKENS=$(jq '.stats.models | to_entries[0].value.tokens.total // "unknown"' review-result.json)
|
|
86
|
-
|
|
87
|
-
COMMENT_BODY=$(cat <<EOF
|
|
88
|
-
## Gemini Automated Code Review
|
|
89
|
-
|
|
90
|
-
$REVIEW
|
|
91
|
-
|
|
92
|
-
---
|
|
93
|
-
*Reviewed by Gemini CLI | Tokens used: $TOKENS*
|
|
94
|
-
EOF
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
gh pr comment ${{ github.event.pull_request.number }} --body "$COMMENT_BODY"
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
## How It Works
|
|
101
|
-
|
|
102
|
-
### Authentication
|
|
103
|
-
|
|
104
|
-
The workflow uses the `GEMINI_API_KEY` environment variable, which is pulled from GitHub Secrets. Gemini CLI automatically detects this variable -- no additional configuration is needed. The free tier allows 1,000 requests per day, which is sufficient for most PR review workflows.
|
|
105
|
-
|
|
106
|
-
### Structured JSON Output
|
|
107
|
-
|
|
108
|
-
The `--output-format json` flag causes Gemini CLI to return a single JSON object once all processing is complete. The schema looks like this:
|
|
109
|
-
|
|
110
|
-
```json
|
|
111
|
-
{
|
|
112
|
-
"response": "The review text content...",
|
|
113
|
-
"stats": {
|
|
114
|
-
"models": {
|
|
115
|
-
"gemini-2.5-pro": {
|
|
116
|
-
"tokens": {
|
|
117
|
-
"prompt": 24939,
|
|
118
|
-
"candidates": 20,
|
|
119
|
-
"total": 25113
|
|
120
|
-
},
|
|
121
|
-
"api": {
|
|
122
|
-
"totalRequests": 2,
|
|
123
|
-
"totalErrors": 0,
|
|
124
|
-
"totalLatencyMs": 5053
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
},
|
|
128
|
-
"tools": {
|
|
129
|
-
"totalCalls": 1,
|
|
130
|
-
"totalSuccess": 1,
|
|
131
|
-
"totalFail": 0
|
|
132
|
-
},
|
|
133
|
-
"files": {
|
|
134
|
-
"totalLinesAdded": 0,
|
|
135
|
-
"totalLinesRemoved": 0
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
```
|
|
140
|
-
|
|
141
|
-
You extract the review text with `jq -r '.response'` and can inspect token usage or tool call stats from the `stats` object.
|
|
142
|
-
|
|
143
|
-
### Exit Codes
|
|
144
|
-
|
|
145
|
-
Gemini CLI uses these exit codes, which you can use for conditional logic:
|
|
146
|
-
|
|
147
|
-
| Code | Meaning |
|
|
148
|
-
|------|---------|
|
|
149
|
-
| 0 | Success |
|
|
150
|
-
| 1 | General error |
|
|
151
|
-
| 42 | Input error (invalid arguments, missing config) |
|
|
152
|
-
| 53 | Turn limit exceeded |
|
|
153
|
-
|
|
154
|
-
## Parsing the JSON Output in a Downstream Step
|
|
155
|
-
|
|
156
|
-
If you need to parse the review in a separate step (for example, to fail the build on critical findings), you can do it like this:
|
|
157
|
-
|
|
158
|
-
```yaml
|
|
159
|
-
- name: Check for critical issues
|
|
160
|
-
run: |
|
|
161
|
-
REVIEW=$(jq -r '.response' review-result.json)
|
|
162
|
-
|
|
163
|
-
# Simple check: fail if the review mentions critical severity
|
|
164
|
-
if echo "$REVIEW" | grep -qi "critical"; then
|
|
165
|
-
echo "::error::Critical issues found in code review"
|
|
166
|
-
exit 1
|
|
167
|
-
fi
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
## Advanced: Using a GEMINI.md for Consistent Review Context
|
|
171
|
-
|
|
172
|
-
Create a `.gemini/GEMINI.md` file in your repository root to give Gemini persistent context about your project's standards:
|
|
173
|
-
|
|
174
|
-
```markdown
|
|
175
|
-
# Project Review Guidelines
|
|
176
|
-
|
|
177
|
-
## Code Standards
|
|
178
|
-
- All functions must have JSDoc comments
|
|
179
|
-
- Use 2-space indentation
|
|
180
|
-
- No console.log in production code
|
|
181
|
-
|
|
182
|
-
## Security Requirements
|
|
183
|
-
- Never hardcode secrets or API keys
|
|
184
|
-
- All user input must be validated
|
|
185
|
-
- SQL queries must use parameterized statements
|
|
186
|
-
```
|
|
187
|
-
|
|
188
|
-
Gemini CLI automatically loads this file when running in your repo directory, so the reviewer will apply your project-specific rules during every PR review.
|
|
189
|
-
|
|
190
|
-
## Known Limitations
|
|
191
|
-
|
|
192
|
-
- Custom slash commands (`.toml` files) do not work in headless/non-interactive mode. You must embed the full prompt text directly in your script, as shown above.
|
|
193
|
-
- Extensions are also unavailable in headless mode.
|
|
194
|
-
- The free tier is rate-limited to 60 requests/minute and 1,000 requests/day. For high-volume repos, consider using a paid API key.
|
|
195
|
-
- Very large diffs may exceed the model's context window. Consider filtering the diff to only changed files that matter (e.g., exclude lockfiles and generated code).
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"eval_id": 1,
|
|
3
|
-
"eval_name": "cicd-setup",
|
|
4
|
-
"config": "without_skill",
|
|
5
|
-
"expectations": [
|
|
6
|
-
{
|
|
7
|
-
"text": "Uses -p or --prompt flag for headless/non-interactive mode",
|
|
8
|
-
"passed": true,
|
|
9
|
-
"evidence": "Line 96: gemini -p \"$(cat review_prompt.txt)\" and line 351: gemini -p \"Review this code diff...\""
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"text": "Includes --output-format json flag for structured output",
|
|
13
|
-
"passed": false,
|
|
14
|
-
"evidence": "Does not use --output-format json. Instead relies on prompt-based JSON (asking model to return JSON in prompt text). Line 95 comment mentions '--json flag if available' which is not a real Gemini CLI flag."
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
|
|
18
|
-
"passed": true,
|
|
19
|
-
"evidence": "Line 62: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} in env block"
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"text": "Provides a GitHub Actions workflow YAML snippet",
|
|
23
|
-
"passed": true,
|
|
24
|
-
"evidence": "Complete workflow YAML provided with checkout, node setup, install, review, and post-comment steps"
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"text": "Shows how to parse JSON response (e.g., with jq .response)",
|
|
28
|
-
"passed": true,
|
|
29
|
-
"evidence": "Uses jq for JSON validation (line 102) and parsing (line 248: jq -r '.candidates[0].content.parts[0].text'). Different schema than CLI --output-format json but still demonstrates jq parsing."
|
|
30
|
-
},
|
|
31
|
-
{
|
|
32
|
-
"text": "Does not invent non-existent CLI flags or options",
|
|
33
|
-
"passed": false,
|
|
34
|
-
"evidence": "Line 95: comment references '--json flag if available' which is not a real Gemini CLI flag. Should use --output-format json instead."
|
|
35
|
-
}
|
|
36
|
-
]
|
|
37
|
-
}
|