@biggora/claude-plugins 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/README.md +11 -4
  2. package/package.json +1 -1
  3. package/registry/registry.json +319 -244
  4. package/specs/coding.md +24 -0
  5. package/specs/pod.md +2 -0
  6. package/src/skills/captcha/README.md +221 -0
  7. package/src/skills/captcha/SKILL.md +355 -0
  8. package/src/skills/captcha/references/captcha-types.md +254 -0
  9. package/src/skills/captcha/references/services.md +172 -0
  10. package/src/skills/captcha/references/stealth.md +238 -0
  11. package/src/skills/captcha/scripts/solve_captcha.py +323 -0
  12. package/src/skills/captcha/scripts/solve_image_grid.py +350 -0
  13. package/src/skills/google-merchant-api/SKILL.md +581 -0
  14. package/src/skills/google-merchant-api/references/accounts.md +247 -0
  15. package/src/skills/google-merchant-api/references/content-api-legacy.md +216 -0
  16. package/src/skills/google-merchant-api/references/datasources.md +233 -0
  17. package/src/skills/google-merchant-api/references/inventories.md +201 -0
  18. package/src/skills/google-merchant-api/references/migration.md +267 -0
  19. package/src/skills/google-merchant-api/references/products.md +316 -0
  20. package/src/skills/google-merchant-api/references/promotions.md +201 -0
  21. package/src/skills/google-merchant-api/references/reports.md +240 -0
  22. package/src/skills/lv-aggregators-api/SKILL.md +113 -0
  23. package/src/skills/lv-aggregators-api/references/integration-guide.md +368 -0
  24. package/src/skills/lv-aggregators-api/references/kurpirkt.md +103 -0
  25. package/src/skills/lv-aggregators-api/references/salidzini.md +122 -0
  26. package/src/skills/tailwindcss-best-practices/SKILL.md +180 -0
  27. package/src/skills/tailwindcss-best-practices/references/best-practices-utility-patterns.md +87 -0
  28. package/src/skills/tailwindcss-best-practices/references/core-installation.md +109 -0
  29. package/src/skills/tailwindcss-best-practices/references/core-preflight.md +200 -0
  30. package/src/skills/tailwindcss-best-practices/references/core-responsive.md +163 -0
  31. package/src/skills/tailwindcss-best-practices/references/core-source-detection.md +114 -0
  32. package/src/skills/tailwindcss-best-practices/references/core-theme.md +108 -0
  33. package/src/skills/tailwindcss-best-practices/references/core-utility-classes.md +59 -0
  34. package/src/skills/tailwindcss-best-practices/references/core-variants.md +204 -0
  35. package/src/skills/tailwindcss-best-practices/references/effects-form-controls.md +76 -0
  36. package/src/skills/tailwindcss-best-practices/references/effects-mask.md +91 -0
  37. package/src/skills/tailwindcss-best-practices/references/effects-scroll-snap.md +59 -0
  38. package/src/skills/tailwindcss-best-practices/references/effects-text-shadow.md +78 -0
  39. package/src/skills/tailwindcss-best-practices/references/effects-transition-animation.md +80 -0
  40. package/src/skills/tailwindcss-best-practices/references/effects-visibility-interactivity.md +82 -0
  41. package/src/skills/tailwindcss-best-practices/references/features-content-detection.md +175 -0
  42. package/src/skills/tailwindcss-best-practices/references/features-custom-styles.md +203 -0
  43. package/src/skills/tailwindcss-best-practices/references/features-dark-mode.md +137 -0
  44. package/src/skills/tailwindcss-best-practices/references/features-functions-directives.md +241 -0
  45. package/src/skills/tailwindcss-best-practices/references/features-upgrade.md +160 -0
  46. package/src/skills/tailwindcss-best-practices/references/layout-aspect-ratio.md +39 -0
  47. package/src/skills/tailwindcss-best-practices/references/layout-columns.md +80 -0
  48. package/src/skills/tailwindcss-best-practices/references/layout-display.md +110 -0
  49. package/src/skills/tailwindcss-best-practices/references/layout-flexbox.md +112 -0
  50. package/src/skills/tailwindcss-best-practices/references/layout-grid.md +87 -0
  51. package/src/skills/tailwindcss-best-practices/references/layout-height.md +97 -0
  52. package/src/skills/tailwindcss-best-practices/references/layout-inset.md +103 -0
  53. package/src/skills/tailwindcss-best-practices/references/layout-logical-properties.md +92 -0
  54. package/src/skills/tailwindcss-best-practices/references/layout-margin.md +126 -0
  55. package/src/skills/tailwindcss-best-practices/references/layout-min-max-sizing.md +63 -0
  56. package/src/skills/tailwindcss-best-practices/references/layout-object-fit-position.md +64 -0
  57. package/src/skills/tailwindcss-best-practices/references/layout-overflow.md +57 -0
  58. package/src/skills/tailwindcss-best-practices/references/layout-padding.md +77 -0
  59. package/src/skills/tailwindcss-best-practices/references/layout-position.md +85 -0
  60. package/src/skills/tailwindcss-best-practices/references/layout-tables.md +67 -0
  61. package/src/skills/tailwindcss-best-practices/references/layout-width.md +102 -0
  62. package/src/skills/tailwindcss-best-practices/references/transform-base.md +68 -0
  63. package/src/skills/tailwindcss-best-practices/references/transform-rotate.md +70 -0
  64. package/src/skills/tailwindcss-best-practices/references/transform-scale.md +83 -0
  65. package/src/skills/tailwindcss-best-practices/references/transform-skew.md +62 -0
  66. package/src/skills/tailwindcss-best-practices/references/transform-translate.md +77 -0
  67. package/src/skills/tailwindcss-best-practices/references/typography-font-text.md +142 -0
  68. package/src/skills/tailwindcss-best-practices/references/typography-list-style.md +65 -0
  69. package/src/skills/tailwindcss-best-practices/references/typography-text-align.md +60 -0
  70. package/src/skills/tailwindcss-best-practices/references/visual-background.md +76 -0
  71. package/src/skills/tailwindcss-best-practices/references/visual-border.md +108 -0
  72. package/src/skills/tailwindcss-best-practices/references/visual-effects.md +111 -0
  73. package/src/skills/tailwindcss-best-practices/references/visual-svg.md +82 -0
  74. package/src/skills/test-mobile-app/SKILL.md +11 -6
  75. package/src/skills/test-mobile-app/scripts/analyze_apk.py +15 -4
  76. package/src/skills/test-mobile-app/scripts/check_environment.py +5 -5
  77. package/src/skills/test-mobile-app/scripts/run_tests.py +1 -1
  78. package/src/skills/test-web-ui/SKILL.md +264 -84
  79. package/src/skills/test-web-ui/scripts/discover.py +25 -12
  80. package/src/skills/test-web-ui/scripts/run_tests.py +3 -2
  81. package/src/skills/vite-best-practices/SKILL.md +115 -0
  82. package/src/skills/vite-best-practices/references/build-and-ssr.md +255 -0
  83. package/src/skills/vite-best-practices/references/core-config.md +231 -0
  84. package/src/skills/vite-best-practices/references/core-features.md +222 -0
  85. package/src/skills/vite-best-practices/references/core-plugin-api.md +294 -0
  86. package/src/skills/vite-best-practices/references/environment-api.md +108 -0
  87. package/src/skills/vite-best-practices/references/rolldown-migration.md +242 -0
  88. package/codex-cli-workspace/iteration-1/benchmark.json +0 -122
  89. package/codex-cli-workspace/iteration-1/eval-1-ci-integration/eval_metadata.json +0 -13
  90. package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/grading.json +0 -52
  91. package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/outputs/response.md +0 -163
  92. package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/timing.json +0 -5
  93. package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/grading.json +0 -58
  94. package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/outputs/response.md +0 -151
  95. package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/timing.json +0 -5
  96. package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/eval_metadata.json +0 -13
  97. package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/grading.json +0 -52
  98. package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -86
  99. package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/timing.json +0 -5
  100. package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/grading.json +0 -58
  101. package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -164
  102. package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/timing.json +0 -5
  103. package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/eval_metadata.json +0 -13
  104. package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/grading.json +0 -52
  105. package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/outputs/response.md +0 -130
  106. package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/timing.json +0 -5
  107. package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/grading.json +0 -64
  108. package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/outputs/response.md +0 -209
  109. package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/timing.json +0 -5
  110. package/codex-cli-workspace/iteration-1/review.html +0 -1325
  111. package/gemini-cli-workspace/iteration-1/benchmark.json +0 -86
  112. package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/eval_metadata.json +0 -37
  113. package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/grading.json +0 -37
  114. package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/outputs/response.md +0 -401
  115. package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/timing.json +0 -5
  116. package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/grading.json +0 -37
  117. package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/outputs/response.md +0 -405
  118. package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/timing.json +0 -5
  119. package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/eval_metadata.json +0 -37
  120. package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/grading.json +0 -37
  121. package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -212
  122. package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/timing.json +0 -5
  123. package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/grading.json +0 -37
  124. package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -427
  125. package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/timing.json +0 -5
  126. package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/eval_metadata.json +0 -32
  127. package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/grading.json +0 -32
  128. package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/outputs/response.md +0 -171
  129. package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/timing.json +0 -5
  130. package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/grading.json +0 -32
  131. package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/outputs/response.md +0 -199
  132. package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/timing.json +0 -5
  133. package/gemini-cli-workspace/iteration-1/review.html +0 -1325
  134. package/gemini-cli-workspace/iteration-2/benchmark.json +0 -173
  135. package/gemini-cli-workspace/iteration-2/benchmark.md +0 -28
  136. package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/eval_metadata.json +0 -37
  137. package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/grading.json +0 -37
  138. package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/outputs/response.md +0 -195
  139. package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/timing.json +0 -5
  140. package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/grading.json +0 -37
  141. package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/outputs/response.md +0 -377
  142. package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/timing.json +0 -5
  143. package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/eval_metadata.json +0 -37
  144. package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/grading.json +0 -37
  145. package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/outputs/response.md +0 -127
  146. package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/timing.json +0 -5
  147. package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/grading.json +0 -37
  148. package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/outputs/response.md +0 -164
  149. package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/timing.json +0 -5
  150. package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/eval_metadata.json +0 -32
  151. package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/grading.json +0 -32
  152. package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/outputs/response.md +0 -91
  153. package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/timing.json +0 -5
  154. package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/grading.json +0 -32
  155. package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/outputs/response.md +0 -112
  156. package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/timing.json +0 -5
  157. package/gemini-cli-workspace/iteration-2/eval-viewer.html +0 -1325
  158. package/screen-recording-workspace/evals.json +0 -41
  159. package/screen-recording-workspace/iteration-1/benchmark.json +0 -102
  160. package/screen-recording-workspace/iteration-1/eval-0-fullscreen/eval_metadata.json +0 -31
  161. package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/grading.json +0 -11
  162. package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/outputs/demo.mp4 +0 -0
  163. package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/timing.json +0 -5
  164. package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/grading.json +0 -11
  165. package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/outputs/demo.mp4 +0 -0
  166. package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/timing.json +0 -5
  167. package/screen-recording-workspace/iteration-1/eval-1-region-audio/eval_metadata.json +0 -31
  168. package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/grading.json +0 -11
  169. package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/outputs/region_capture.mp4 +0 -0
  170. package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/timing.json +0 -5
  171. package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/grading.json +0 -11
  172. package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/outputs/region_capture.mp4 +0 -0
  173. package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/timing.json +0 -5
  174. package/screen-recording-workspace/iteration-1/eval-2-python-fallback/eval_metadata.json +0 -31
  175. package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/grading.json +0 -11
  176. package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/outputs/fallback_recording.mp4 +0 -0
  177. package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/timing.json +0 -5
  178. package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/grading.json +0 -11
  179. package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/outputs/fallback_recording.mp4 +0 -0
  180. package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/outputs/record_screen.py +0 -67
  181. package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/timing.json +0 -5
  182. package/screen-recording-workspace/iteration-1/review.html +0 -1325
  183. package/src/skills/codex-cli/evals/evals.json +0 -47
  184. package/src/skills/gemini-cli/evals/evals.json +0 -46
  185. package/src/skills/tm-search/evals/evals.json +0 -23
@@ -1,173 +0,0 @@
1
- {
2
- "metadata": {
3
- "skill_name": "gemini-cli",
4
- "skill_path": "src/skills/gemini-cli/",
5
- "executor_model": "claude-opus-4-6",
6
- "analyzer_model": "claude-opus-4-6",
7
- "timestamp": "2026-03-11T18:15:00Z",
8
- "evals_run": [1, 2, 3],
9
- "runs_per_configuration": 3
10
- },
11
- "runs": [
12
- {
13
- "eval_id": 1,
14
- "eval_name": "cicd-setup",
15
- "configuration": "with_skill",
16
- "run_number": 1,
17
- "result": {
18
- "pass_rate": 1.0,
19
- "passed": 6,
20
- "failed": 0,
21
- "total": 6,
22
- "time_seconds": 53.6,
23
- "tokens": 64216
24
- },
25
- "expectations": [
26
- {"text": "Uses -p or --prompt flag for headless/non-interactive mode", "passed": true, "evidence": "Line 53: gemini -p with --output-format json"},
27
- {"text": "Includes --output-format json flag for structured output", "passed": true, "evidence": "Line 62: --output-format json flag used correctly"},
28
- {"text": "Shows how to set GEMINI_API_KEY from GitHub secrets", "passed": true, "evidence": "Line 42: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}"},
29
- {"text": "Provides a GitHub Actions workflow YAML snippet", "passed": true, "evidence": "Complete workflow YAML with checkout, node setup, install, review, and comment steps"},
30
- {"text": "Shows how to parse JSON response (e.g., with jq .response)", "passed": true, "evidence": "Line 65: jq -r '.response' and line 68: token stats extraction"},
31
- {"text": "Does not invent non-existent CLI flags or options", "passed": true, "evidence": "Only uses verified flags: -p and --output-format json. Exit codes match corrected values."}
32
- ],
33
- "notes": []
34
- },
35
- {
36
- "eval_id": 1,
37
- "eval_name": "cicd-setup",
38
- "configuration": "without_skill",
39
- "run_number": 1,
40
- "result": {
41
- "pass_rate": 0.6667,
42
- "passed": 4,
43
- "failed": 2,
44
- "total": 6,
45
- "time_seconds": 68.0,
46
- "tokens": 59322
47
- },
48
- "expectations": [
49
- {"text": "Uses -p or --prompt flag for headless/non-interactive mode", "passed": true, "evidence": "Line 96: gemini -p"},
50
- {"text": "Includes --output-format json flag for structured output", "passed": false, "evidence": "Does not use --output-format json. Line 95 comment mentions '--json flag if available' which is not a real flag."},
51
- {"text": "Shows how to set GEMINI_API_KEY from GitHub secrets", "passed": true, "evidence": "Line 62: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}"},
52
- {"text": "Provides a GitHub Actions workflow YAML snippet", "passed": true, "evidence": "Complete workflow YAML provided"},
53
- {"text": "Shows how to parse JSON response (e.g., with jq .response)", "passed": true, "evidence": "Uses jq for JSON validation and parsing"},
54
- {"text": "Does not invent non-existent CLI flags or options", "passed": false, "evidence": "Line 95: references '--json flag if available' which is not a real Gemini CLI flag"}
55
- ],
56
- "notes": []
57
- },
58
- {
59
- "eval_id": 2,
60
- "eval_name": "mcp-server-config",
61
- "configuration": "with_skill",
62
- "run_number": 1,
63
- "result": {
64
- "pass_rate": 1.0,
65
- "passed": 6,
66
- "failed": 0,
67
- "total": 6,
68
- "time_seconds": 47.3,
69
- "tokens": 66522
70
- },
71
- "expectations": [
72
- {"text": "Points to ~/.gemini/settings.json or project-level settings.json", "passed": true, "evidence": "Opens with ~/.gemini/settings.json (global) or <project>/.gemini/settings.json (project-scoped)"},
73
- {"text": "Shows mcpServers config with command, args fields", "passed": true, "evidence": "Full JSON config with mcpServers, command, args, env, timeout, trust, includeTools"},
74
- {"text": "Shows $VAR pattern for environment variable references in env field", "passed": true, "evidence": "Shows \"API_KEY\": \"$MY_API_KEY\" pattern"},
75
- {"text": "Explains includeTools and/or excludeTools for restricting exposed tools", "passed": true, "evidence": "Dedicated section for both includeTools and excludeTools with examples"},
76
- {"text": "Mentions security considerations (trust field, not hardcoding keys, etc.)", "passed": true, "evidence": "Explains trust field, warns never to hardcode secrets, mentions env var redaction"},
77
- {"text": "Uses correct Python command (python/python3) in the command field", "passed": true, "evidence": "Uses \"command\": \"python\" consistently"}
78
- ],
79
- "notes": []
80
- },
81
- {
82
- "eval_id": 2,
83
- "eval_name": "mcp-server-config",
84
- "configuration": "without_skill",
85
- "run_number": 1,
86
- "result": {
87
- "pass_rate": 1.0,
88
- "passed": 6,
89
- "failed": 0,
90
- "total": 6,
91
- "time_seconds": 86.7,
92
- "tokens": 61417
93
- },
94
- "expectations": [
95
- {"text": "Points to ~/.gemini/settings.json or project-level settings.json", "passed": true, "evidence": "States ~/.gemini/settings.json (global) and .gemini/settings.json (project-level)"},
96
- {"text": "Shows mcpServers config with command, args fields", "passed": true, "evidence": "Full JSON config with mcpServers, command, args, env"},
97
- {"text": "Shows $VAR pattern for environment variable references in env field", "passed": true, "evidence": "Shows both $MY_API_KEY and ${ANOTHER_ENV_VAR} syntax"},
98
- {"text": "Explains includeTools and/or excludeTools for restricting exposed tools", "passed": true, "evidence": "Dedicated sections for includeTools and excludeTools with examples"},
99
- {"text": "Mentions security considerations (trust field, not hardcoding keys, etc.)", "passed": true, "evidence": "Mentions trust field, automatic redaction, env variable security"},
100
- {"text": "Uses correct Python command (python/python3) in the command field", "passed": true, "evidence": "Uses \"command\": \"python\" consistently"}
101
- ],
102
- "notes": []
103
- },
104
- {
105
- "eval_id": 3,
106
- "eval_name": "custom-slash-command",
107
- "configuration": "with_skill",
108
- "run_number": 1,
109
- "result": {
110
- "pass_rate": 1.0,
111
- "passed": 5,
112
- "failed": 0,
113
- "total": 5,
114
- "time_seconds": 43.7,
115
- "tokens": 63047
116
- },
117
- "expectations": [
118
- {"text": "Points to ~/.gemini/commands/ for global user-scoped commands", "passed": true, "evidence": "States ~/.gemini/commands/commit.toml for global commands"},
119
- {"text": "Shows .toml file format with prompt field", "passed": true, "evidence": "Complete .toml example with description and prompt fields"},
120
- {"text": "Uses !{git diff --cached} shell execution syntax in the prompt", "passed": true, "evidence": "Uses !{git diff --cached} and explains the syntax"},
121
- {"text": "Warns that custom slash commands don't work in headless/non-interactive mode", "passed": true, "evidence": "States 'Custom slash commands work in interactive mode only. They are not available in headless mode (-p flag).'"},
122
- {"text": "Provides a complete, copy-pasteable .toml file example", "passed": true, "evidence": "Complete commit.toml ready to copy-paste"}
123
- ],
124
- "notes": []
125
- },
126
- {
127
- "eval_id": 3,
128
- "eval_name": "custom-slash-command",
129
- "configuration": "without_skill",
130
- "run_number": 1,
131
- "result": {
132
- "pass_rate": 0.8,
133
- "passed": 4,
134
- "failed": 1,
135
- "total": 5,
136
- "time_seconds": 73.6,
137
- "tokens": 59908
138
- },
139
- "expectations": [
140
- {"text": "Points to ~/.gemini/commands/ for global user-scoped commands", "passed": true, "evidence": "States ~/.gemini/commands/ for global commands"},
141
- {"text": "Shows .toml file format with prompt field", "passed": true, "evidence": "Complete .toml example with description and prompt fields"},
142
- {"text": "Uses !{git diff --cached} shell execution syntax in the prompt", "passed": true, "evidence": "Uses !{git diff --cached} in the prompt field"},
143
- {"text": "Warns that custom slash commands don't work in headless/non-interactive mode", "passed": false, "evidence": "No mention of headless mode limitations anywhere in the response"},
144
- {"text": "Provides a complete, copy-pasteable .toml file example", "passed": true, "evidence": "Complete commit.toml file provided"}
145
- ],
146
- "notes": []
147
- }
148
- ],
149
- "run_summary": {
150
- "with_skill": {
151
- "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
152
- "time_seconds": {"mean": 48.2, "stddev": 5.0, "min": 43.7, "max": 53.6},
153
- "tokens": {"mean": 64595, "stddev": 1741, "min": 63047, "max": 66522}
154
- },
155
- "without_skill": {
156
- "pass_rate": {"mean": 0.822, "stddev": 0.167, "min": 0.667, "max": 1.0},
157
- "time_seconds": {"mean": 76.1, "stddev": 9.7, "min": 68.0, "max": 86.7},
158
- "tokens": {"mean": 60216, "stddev": 1372, "min": 59322, "max": 61417}
159
- },
160
- "delta": {
161
- "pass_rate": "+0.18",
162
- "time_seconds": "-27.9",
163
- "tokens": "+4379"
164
- }
165
- },
166
- "notes": [
167
- "With-skill achieves 100% pass rate across all 3 evals (17/17 assertions) vs 82.2% baseline (14/17)",
168
- "Eval 2 (MCP server config) is non-discriminating — both configs pass all 6 assertions. This eval could be made harder.",
169
- "With-skill is ~28s faster on average despite using ~4k more tokens, suggesting more focused responses",
170
- "Baseline failures are accuracy-related: hallucinated --json flag (eval 1) and missing headless limitation warning (eval 3)",
171
- "The skill's value is strongest on accuracy assertions — it prevents hallucination of non-existent flags and ensures important caveats are mentioned"
172
- ]
173
- }
@@ -1,28 +0,0 @@
1
- # Skill Benchmark: gemini-cli
2
-
3
- **Model**: claude-opus-4-6
4
- **Date**: 2026-03-11T18:15:00Z
5
- **Evals**: 1, 2, 3 (1 run each per configuration)
6
-
7
- ## Summary
8
-
9
- | Metric | With Skill | Without Skill | Delta |
10
- |--------|------------|---------------|-------|
11
- | Pass Rate | 100% ± 0% | 82% ± 17% | +0.18 |
12
- | Time | 48.2s ± 5.0s | 76.1s ± 9.7s | -27.9s |
13
- | Tokens | 64595 ± 1741 | 60216 ± 1372 | +4379 |
14
-
15
- ## Per-Eval Breakdown
16
-
17
- | Eval | With Skill | Without Skill |
18
- |------|-----------|---------------|
19
- | 1: CI/CD Setup | 6/6 (100%) | 4/6 (67%) |
20
- | 2: MCP Server Config | 6/6 (100%) | 6/6 (100%) |
21
- | 3: Custom Slash Command | 5/5 (100%) | 4/5 (80%) |
22
-
23
- ## Analyst Notes
24
-
25
- - **Eval 2 is non-discriminating**: Both configs pass all 6 assertions. Consider adding harder assertions (e.g., correct timeout default of 600000ms, httpUrl transport option).
26
- - **Baseline failures are accuracy-related**: The without-skill run hallucinated a `--json` flag (eval 1) and omitted the headless limitation warning (eval 3). These are exactly the kinds of errors the skill prevents.
27
- - **Speed advantage**: With-skill runs are ~28s faster on average despite using ~4k more tokens, suggesting the skill helps produce more focused, direct responses.
28
- - **The skill's value is strongest on accuracy**: It prevents hallucination of non-existent flags and ensures important caveats are mentioned.
@@ -1,37 +0,0 @@
1
- {
2
- "eval_id": 1,
3
- "eval_name": "cicd-setup",
4
- "prompt": "I want to set up gemini cli in my CI/CD pipeline on GitHub Actions so it can automatically review PRs. We use an API key stored in GitHub secrets. How do I set this up with structured JSON output so I can parse the review?",
5
- "assertions": [
6
- {
7
- "id": "mentions-p-flag",
8
- "text": "Uses -p or --prompt flag for headless/non-interactive mode",
9
- "type": "content_check"
10
- },
11
- {
12
- "id": "mentions-json-output",
13
- "text": "Includes --output-format json flag for structured output",
14
- "type": "content_check"
15
- },
16
- {
17
- "id": "mentions-api-key-env",
18
- "text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
19
- "type": "content_check"
20
- },
21
- {
22
- "id": "provides-workflow-yaml",
23
- "text": "Provides a GitHub Actions workflow YAML snippet",
24
- "type": "content_check"
25
- },
26
- {
27
- "id": "mentions-jq-parsing",
28
- "text": "Shows how to parse JSON response (e.g., with jq .response)",
29
- "type": "content_check"
30
- },
31
- {
32
- "id": "no-hallucinated-flags",
33
- "text": "Does not invent non-existent CLI flags or options",
34
- "type": "accuracy_check"
35
- }
36
- ]
37
- }
@@ -1,37 +0,0 @@
1
- {
2
- "eval_id": 1,
3
- "eval_name": "cicd-setup",
4
- "config": "with_skill",
5
- "expectations": [
6
- {
7
- "text": "Uses -p or --prompt flag for headless/non-interactive mode",
8
- "passed": true,
9
- "evidence": "Line 53: gemini -p \"You are a senior code reviewer...\" with --output-format json"
10
- },
11
- {
12
- "text": "Includes --output-format json flag for structured output",
13
- "passed": true,
14
- "evidence": "Line 62: --output-format json flag used correctly"
15
- },
16
- {
17
- "text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
18
- "passed": true,
19
- "evidence": "Line 42: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} in env block"
20
- },
21
- {
22
- "text": "Provides a GitHub Actions workflow YAML snippet",
23
- "passed": true,
24
- "evidence": "Complete workflow YAML at .github/workflows/gemini-pr-review.yml with checkout, node setup, install, review, and comment steps"
25
- },
26
- {
27
- "text": "Shows how to parse JSON response (e.g., with jq .response)",
28
- "passed": true,
29
- "evidence": "Line 65: REVIEW=$(echo \"$RESULT\" | jq -r '.response') and line 68: token stats extraction with jq"
30
- },
31
- {
32
- "text": "Does not invent non-existent CLI flags or options",
33
- "passed": true,
34
- "evidence": "Only uses verified flags: -p and --output-format json. Exit codes match corrected values (0, 1, 42, 53)."
35
- }
36
- ]
37
- }
@@ -1,195 +0,0 @@
1
- # Setting Up Gemini CLI for Automated PR Reviews in GitHub Actions
2
-
3
- ## Prerequisites
4
-
5
- 1. A Gemini API key (get one at https://aistudio.google.com/apikey)
6
- 2. The API key stored as a GitHub Actions secret named `GEMINI_API_KEY`
7
-
8
- ## GitHub Actions Workflow
9
-
10
- Create a file at `.github/workflows/gemini-pr-review.yml`:
11
-
12
- ```yaml
13
- name: Gemini PR Review
14
-
15
- on:
16
- pull_request:
17
- types: [opened, synchronize]
18
-
19
- permissions:
20
- pull-requests: write
21
- contents: read
22
-
23
- jobs:
24
- review:
25
- runs-on: ubuntu-latest
26
- steps:
27
- - name: Checkout code
28
- uses: actions/checkout@v4
29
- with:
30
- fetch-depth: 0 # Full history so we can diff against base
31
-
32
- - name: Set up Node.js
33
- uses: actions/setup-node@v4
34
- with:
35
- node-version: '20'
36
-
37
- - name: Install Gemini CLI
38
- run: npm install -g @google/gemini-cli
39
-
40
- - name: Run AI Code Review
41
- env:
42
- GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
43
- run: |
44
- # Get the diff between the PR branch and the base branch
45
- DIFF=$(git diff origin/${{ github.base_ref }}...HEAD)
46
-
47
- if [ -z "$DIFF" ]; then
48
- echo "No changes to review."
49
- exit 0
50
- fi
51
-
52
- # Run Gemini CLI with structured JSON output
53
- RESULT=$(echo "$DIFF" | gemini -p "You are a senior code reviewer. Review the following pull request diff for:
54
- 1. Bugs and logical errors
55
- 2. Security vulnerabilities
56
- 3. Performance issues
57
- 4. Code style and best practices
58
-
59
- Provide your review in a structured format with sections for each category. For each issue found, include the file name, line context, severity (critical/warning/info), and a suggested fix.
60
-
61
- If no issues are found in a category, say so explicitly." \
62
- --output-format json)
63
-
64
- # Extract just the response text
65
- REVIEW=$(echo "$RESULT" | jq -r '.response')
66
-
67
- # Extract token usage stats for logging
68
- TOKENS=$(echo "$RESULT" | jq '.stats.models | to_entries[0].value.tokens.total // "unknown"')
69
- echo "Tokens used: $TOKENS"
70
-
71
- # Write the review to the GitHub Step Summary
72
- echo "## Gemini Code Review" >> $GITHUB_STEP_SUMMARY
73
- echo "" >> $GITHUB_STEP_SUMMARY
74
- echo "$REVIEW" >> $GITHUB_STEP_SUMMARY
75
-
76
- # Save the full JSON result for downstream steps
77
- echo "$RESULT" > review-result.json
78
-
79
- - name: Post review as PR comment
80
- if: success()
81
- env:
82
- GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
83
- run: |
84
- REVIEW=$(jq -r '.response' review-result.json)
85
- TOKENS=$(jq '.stats.models | to_entries[0].value.tokens.total // "unknown"' review-result.json)
86
-
87
- COMMENT_BODY=$(cat <<EOF
88
- ## Gemini Automated Code Review
89
-
90
- $REVIEW
91
-
92
- ---
93
- *Reviewed by Gemini CLI | Tokens used: $TOKENS*
94
- EOF
95
- )
96
-
97
- gh pr comment ${{ github.event.pull_request.number }} --body "$COMMENT_BODY"
98
- ```
99
-
100
- ## How It Works
101
-
102
- ### Authentication
103
-
104
- The workflow uses the `GEMINI_API_KEY` environment variable, which is pulled from GitHub Secrets. Gemini CLI automatically detects this variable -- no additional configuration is needed. The free tier allows 1,000 requests per day, which is sufficient for most PR review workflows.
105
-
106
- ### Structured JSON Output
107
-
108
- The `--output-format json` flag causes Gemini CLI to return a single JSON object once all processing is complete. The schema looks like this:
109
-
110
- ```json
111
- {
112
- "response": "The review text content...",
113
- "stats": {
114
- "models": {
115
- "gemini-2.5-pro": {
116
- "tokens": {
117
- "prompt": 24939,
118
- "candidates": 20,
119
- "total": 25113
120
- },
121
- "api": {
122
- "totalRequests": 2,
123
- "totalErrors": 0,
124
- "totalLatencyMs": 5053
125
- }
126
- }
127
- },
128
- "tools": {
129
- "totalCalls": 1,
130
- "totalSuccess": 1,
131
- "totalFail": 0
132
- },
133
- "files": {
134
- "totalLinesAdded": 0,
135
- "totalLinesRemoved": 0
136
- }
137
- }
138
- }
139
- ```
140
-
141
- You extract the review text with `jq -r '.response'` and can inspect token usage or tool call stats from the `stats` object.
142
-
143
- ### Exit Codes
144
-
145
- Gemini CLI uses these exit codes, which you can use for conditional logic:
146
-
147
- | Code | Meaning |
148
- |------|---------|
149
- | 0 | Success |
150
- | 1 | General error |
151
- | 42 | Input error (invalid arguments, missing config) |
152
- | 53 | Turn limit exceeded |
153
-
154
- ## Parsing the JSON Output in a Downstream Step
155
-
156
- If you need to parse the review in a separate step (for example, to fail the build on critical findings), you can do it like this:
157
-
158
- ```yaml
159
- - name: Check for critical issues
160
- run: |
161
- REVIEW=$(jq -r '.response' review-result.json)
162
-
163
- # Simple check: fail if the review mentions critical severity
164
- if echo "$REVIEW" | grep -qi "critical"; then
165
- echo "::error::Critical issues found in code review"
166
- exit 1
167
- fi
168
- ```
169
-
170
- ## Advanced: Using a GEMINI.md for Consistent Review Context
171
-
172
- Create a `.gemini/GEMINI.md` file in your repository root to give Gemini persistent context about your project's standards:
173
-
174
- ```markdown
175
- # Project Review Guidelines
176
-
177
- ## Code Standards
178
- - All functions must have JSDoc comments
179
- - Use 2-space indentation
180
- - No console.log in production code
181
-
182
- ## Security Requirements
183
- - Never hardcode secrets or API keys
184
- - All user input must be validated
185
- - SQL queries must use parameterized statements
186
- ```
187
-
188
- Gemini CLI automatically loads this file when running in your repo directory, so the reviewer will apply your project-specific rules during every PR review.
189
-
190
- ## Known Limitations
191
-
192
- - Custom slash commands (`.toml` files) do not work in headless/non-interactive mode. You must embed the full prompt text directly in your script, as shown above.
193
- - Extensions are also unavailable in headless mode.
194
- - The free tier is rate-limited to 60 requests/minute and 1,000 requests/day. For high-volume repos, consider using a paid API key.
195
- - Very large diffs may exceed the model's context window. Consider filtering the diff to only changed files that matter (e.g., exclude lockfiles and generated code).
@@ -1,5 +0,0 @@
1
- {
2
- "total_tokens": 64216,
3
- "duration_ms": 53586,
4
- "total_duration_seconds": 53.6
5
- }
@@ -1,37 +0,0 @@
1
- {
2
- "eval_id": 1,
3
- "eval_name": "cicd-setup",
4
- "config": "without_skill",
5
- "expectations": [
6
- {
7
- "text": "Uses -p or --prompt flag for headless/non-interactive mode",
8
- "passed": true,
9
- "evidence": "Line 96: gemini -p \"$(cat review_prompt.txt)\" and line 351: gemini -p \"Review this code diff...\""
10
- },
11
- {
12
- "text": "Includes --output-format json flag for structured output",
13
- "passed": false,
14
- "evidence": "Does not use --output-format json. Instead relies on prompt-based JSON (asking model to return JSON in prompt text). Line 95 comment mentions '--json flag if available' which is not a real Gemini CLI flag."
15
- },
16
- {
17
- "text": "Shows how to set GEMINI_API_KEY from GitHub secrets",
18
- "passed": true,
19
- "evidence": "Line 62: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} in env block"
20
- },
21
- {
22
- "text": "Provides a GitHub Actions workflow YAML snippet",
23
- "passed": true,
24
- "evidence": "Complete workflow YAML provided with checkout, node setup, install, review, and post-comment steps"
25
- },
26
- {
27
- "text": "Shows how to parse JSON response (e.g., with jq .response)",
28
- "passed": true,
29
- "evidence": "Uses jq for JSON validation (line 102) and parsing (line 248: jq -r '.candidates[0].content.parts[0].text'). Different schema than CLI --output-format json but still demonstrates jq parsing."
30
- },
31
- {
32
- "text": "Does not invent non-existent CLI flags or options",
33
- "passed": false,
34
- "evidence": "Line 95: comment references '--json flag if available' which is not a real Gemini CLI flag. Should use --output-format json instead."
35
- }
36
- ]
37
- }