microevals 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. config/judge_system_prompt.yaml +113 -0
  2. evals/nextjs/001-server-component.yaml +28 -0
  3. evals/nextjs/002-client-component.yaml +26 -0
  4. evals/nextjs/003-cookies.yaml +28 -0
  5. evals/nextjs/010-route-handlers.yaml +30 -0
  6. evals/nextjs/013-pathname-server.yaml +29 -0
  7. evals/nextjs/014-server-routing.yaml +28 -0
  8. evals/nextjs/018-use-router.yaml +28 -0
  9. evals/nextjs/020_no_use_effect.yaml +30 -0
  10. evals/nextjs/021-avoid-fetch-in-effect.yaml +28 -0
  11. evals/nextjs/022_prefer_server_actions.yaml +29 -0
  12. evals/nextjs/023_avoid_getserversideprops.yaml +27 -0
  13. evals/nextjs/024_avoid_redundant_usestate.yaml +29 -0
  14. evals/nextjs/025_no_async_client_components.yaml +29 -0
  15. evals/nextjs/026_no_serial_await.yaml +26 -0
  16. evals/nextjs/027-prefer-next-image.yaml +30 -0
  17. evals/nextjs/027_no_hooks_in_server_components.yaml +29 -0
  18. evals/nextjs/028-prefer-next-font.yaml +30 -0
  19. evals/nextjs/028_cookies_headers_context.yaml +29 -0
  20. evals/nextjs/029_no_catch_redirect.yaml +31 -0
  21. evals/nextjs/030_app_router_migration.yaml +30 -0
  22. evals/nextjs/031_no_non_serializable_props.yaml +31 -0
  23. evals/react/001_missing_useeffect_dependencies.yaml +29 -0
  24. evals/react/002_incorrect_event_handler.yaml +28 -0
  25. evals/react/003_missing_return_in_map.yaml +28 -0
  26. evals/react/004_async_useeffect.yaml +32 -0
  27. evals/react/005_direct_state_mutation.yaml +30 -0
  28. evals/react/006_index_as_key.yaml +31 -0
  29. evals/react/zustand_store_usage.yaml +25 -0
  30. evals/shadcn/001_cn_utility_function.yaml +31 -0
  31. evals/shadcn/002_css_variables.yaml +32 -0
  32. evals/shadcn/003_component_dependencies.yaml +33 -0
  33. evals/shadcn/004_path_aliases.yaml +32 -0
  34. evals/shadcn/005_client_directive.yaml +31 -0
  35. evals/shadcn/006_tailwind_config.yaml +36 -0
  36. evals/shadcn/007_components_json_config.yaml +35 -0
  37. evals/supabase/001_client_setup.yaml +47 -0
  38. evals/supabase/002_auth_context_setup.yaml +43 -0
  39. evals/supabase/003_auth_flow_implementation.yaml +46 -0
  40. evals/supabase/004_auth_flow_testing_WIP.yaml +52 -0
  41. evals/supabase/005_auth_google_oauth.yaml +55 -0
  42. evals/supabase/007_storage_client_setup.yaml +43 -0
  43. evals/supabase/008_storage_nextjs_config.yaml +45 -0
  44. evals/supabase/009_storage_image_upload.yaml +49 -0
  45. evals/supabase/010_security_rls_enabled.yaml +42 -0
  46. evals/supabase/011_security_rls_policies.yaml +43 -0
  47. evals/supabase/012_security_no_service_key_exposed.yaml +49 -0
  48. evals/supabase/013_database_read_data.yaml +44 -0
  49. evals/supabase/014_database_create_data.yaml +44 -0
  50. evals/supabase/015_database_update_data.yaml +47 -0
  51. evals/supabase/016_database_delete_data.yaml +47 -0
  52. evals/supabase/017_database_user_scoped_query.yaml +52 -0
  53. evals/tailwind/001_tailwind_v4_config.yaml +22 -0
  54. evals/tailwind/002_content_paths.yaml +27 -0
  55. evals/tailwind/003_no_dynamic_class_construction.yaml +28 -0
  56. evals/tailwind/tailwind_postcss_config.yaml +24 -0
  57. evals/typescript/001_unsafe_type_assertions.yaml +39 -0
  58. evals/typescript/002_missing_null_checks.yaml +33 -0
  59. evals/vercel/001_vercel_deployment.yaml +19 -0
  60. evals/vercel/002_environment_variables_handling.yaml +23 -0
  61. evals/vercel/003_seo_metadata.yaml +33 -0
  62. microevals/__init__.py +34 -0
  63. microevals/eval_registry.py +222 -0
  64. microevals/eval_runner.py +533 -0
  65. microevals/utils.py +490 -0
  66. microevals-0.1.0.dist-info/METADATA +575 -0
  67. microevals-0.1.0.dist-info/RECORD +71 -0
  68. microevals-0.1.0.dist-info/WHEEL +5 -0
  69. microevals-0.1.0.dist-info/entry_points.txt +2 -0
  70. microevals-0.1.0.dist-info/licenses/LICENSE +21 -0
  71. microevals-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,575 @@
1
+ Metadata-Version: 2.4
2
+ Name: microevals
3
+ Version: 0.1.0
4
+ Summary: Automated evaluation framework for AI-generated code quality
5
+ Author-email: Design Arena <contact@designarena.ai>, Kamryn Ohly <kamryn@arcada.dev>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Design-Arena/MicroEvals
8
+ Project-URL: Repository, https://github.com/Design-Arena/MicroEvals
9
+ Project-URL: Documentation, https://github.com/Design-Arena/MicroEvals#readme
10
+ Project-URL: Issues, https://github.com/Design-Arena/MicroEvals/issues
11
+ Keywords: evaluation,agents,ai,evals,design,benchmarks
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Operating System :: OS Independent
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: requests>=2.31.0
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: python-dotenv>=1.0.0
27
+ Dynamic: license-file
28
+
29
+ # MicroEvals
30
+
31
+ **Automated evaluation framework for AI-generated code quality and best practices.**
32
+
33
+ MicroEvals is a collection of focused, automated tests that evaluate whether AI-generated code (or any codebase) follows framework-specific best practices and avoids common anti-patterns. Each evaluation uses Claude to analyze your codebase against specific criteria.
34
+
35
+ ## What Are MicroEvals?
36
+
37
+ MicroEvals are **micro-evaluations** - small, focused tests that check for specific patterns or anti-patterns in your code. Unlike traditional linters that check syntax, MicroEvals use LLM as a judge to understand context and evaluate architectural decisions.
38
+
39
+ **Example Use Cases:**
40
+ - Verify Next.js App Router best practices (server components, data fetching)
41
+ - Catch React anti-patterns (missing dependencies, incorrect hooks usage)
42
+ - Validate Supabase security (RLS policies, proper auth setup)
43
+ - Check TypeScript type safety (unsafe assertions, missing null checks)
44
+ - Ensure proper shadcn/ui integration
45
+ - Audit deployment configurations
46
+
47
+ ## Quick Start
48
+
49
+ ### Installation
50
+
51
+ #### Option 1: Install from PyPI (Recommended)
52
+
53
+ ```bash
54
+ pip install microevals
55
+ ```
56
+
57
+ #### Option 2: Install from Source (For Development)
58
+
59
+ ```bash
60
+ # Clone the repository
61
+ git clone https://github.com/Design-Arena/MicroEvals
62
+ cd MicroEvals
63
+
64
+ # Install in development mode
65
+ pip install -e .
66
+ ```
67
+
68
+ ### Prerequisites
69
+
70
+ 1. **Python 3.8+** installed
71
+ 2. **Claude CLI** installed and authenticated:
72
+ ```bash
73
+ # Install Claude CLI (if not already installed)
74
+ # See: https://docs.anthropic.com/en/docs/build-with-claude/cli
75
+
76
+ # Verify installation
77
+ claude --version
78
+
79
+ # If command not found, add Claude to your PATH:
80
+ export PATH="$PATH:/path/to/claude"
81
+ # Add the export line to your ~/.bashrc or ~/.zshrc to make it permanent
82
+ ```
83
+
84
+ 3. **Git** installed (for remote repositories)
85
+
86
+ ### Run Your First Eval
87
+
88
+ ```bash
89
+ # Navigate to your project
90
+ cd your-nextjs-app
91
+
92
+ # Run evaluations on current directory
93
+ microeval --category nextjs
94
+
95
+ # Check the results
96
+ cat results/*.json
97
+ ```
98
+
99
+ **🔒 Safety Note:** When running on local directories, your code is **copied** to a temporary directory before evaluation. Your original files are **never modified or deleted**. The framework has 6 independent safety checks to prevent accidental file deletion.
100
+
101
+ ### Alternative: Run Against Remote Repository
102
+
103
+ ```bash
104
+ # Run against a GitHub repository
105
+ microeval --repo https://github.com/user/app --category nextjs
106
+ ```
107
+
108
+ ## Available Eval Categories
109
+
110
+ | Category | Count | Description |
111
+ |----------|-------|-------------|
112
+ | **nextjs** | 20+ | Next.js App Router patterns, server/client components, routing |
113
+ | **react** | 7+ | React hooks, state management, component patterns |
114
+ | **supabase** | 17+ | Supabase auth, database, storage, RLS security |
115
+ | **tailwind** | 4+ | Tailwind CSS configuration and usage |
116
+ | **typescript** | 2+ | TypeScript type safety and best practices |
117
+ | **vercel** | 3+ | Vercel deployment and configuration |
118
+ | **shadcn** | 7+ | shadcn/ui component library integration |
119
+
120
+ **See all available evals:**
121
+ ```bash
122
+ # List all evals (recommended)
123
+ microeval --list
124
+
125
+ # List evals in a specific category
126
+ microeval --list --category nextjs
127
+
128
+ # Or using Python module
129
+ python -m microevals.eval_registry --list
130
+ ```
131
+
132
+ ## Running Evals
133
+
134
+ ### Local Directory (Recommended)
135
+
136
+ Run evaluations on your current project:
137
+
138
+ ```bash
139
+ # Using the microeval command (recommended)
140
+ microeval --category nextjs
141
+
142
+ # Or using Python module directly
143
+ python -m microevals.eval_runner --category nextjs
144
+ ```
145
+
146
+ **More examples:**
147
+
148
+ ```bash
149
+ # Run a specific eval
150
+ microeval --eval evals/nextjs/001-server-component.yaml
151
+
152
+ # Run all evals
153
+ microeval --all
154
+
155
+ # Run with batch mode for speed
156
+ microeval --category nextjs --batch-size 10
157
+ ```
158
+
159
+ ### Remote Repository
160
+
161
+ Run evaluations against a GitHub repository:
162
+
163
+ ```bash
164
+ # Using the microeval command
165
+ microeval --repo https://github.com/user/app --category nextjs
166
+
167
+ # Or using Python module directly
168
+ python -m microevals.eval_runner --repo https://github.com/user/app --category nextjs
169
+ ```
170
+
171
+ **More examples:**
172
+
173
+ ```bash
174
+ # Run specific eval
175
+ microeval --repo https://github.com/user/app --eval evals/nextjs/001-server-component.yaml
176
+
177
+ # Run all evals
178
+ microeval --repo https://github.com/user/app --all
179
+
180
+ # Run with batch mode
181
+ microeval --repo https://github.com/user/app --all --batch-size 15
182
+ ```
183
+
184
+ ### Specific Eval IDs
185
+
186
+ Run evaluations by their IDs:
187
+
188
+ ```bash
189
+ # Using microeval command
190
+ microeval --ids nextjs_server_component_001 react_missing_useeffect_dependencies_001
191
+
192
+ # Or using Python module
193
+ python -m microevals.eval_runner \
194
+ --repo https://github.com/user/app \
195
+ --ids nextjs_server_component_001 react_missing_useeffect_dependencies_001
196
+ ```
197
+
198
+ ### Multiple Specific Evals
199
+
200
+ Run multiple specific eval files:
201
+
202
+ ```bash
203
+ # Using microeval command
204
+ microeval --evals evals/nextjs/001-server-component.yaml evals/react/001_missing_useeffect_dependencies.yaml
205
+
206
+ # Or using Python module
207
+ python -m microevals.eval_runner \
208
+ --repo https://github.com/user/app \
209
+ --evals evals/nextjs/001-server-component.yaml evals/react/001_missing_useeffect_dependencies.yaml
210
+ ```
211
+
212
+ ## Advanced Usage
213
+
214
+ ### Runtime Input Overrides
215
+
216
+ Override default values from eval YAML files:
217
+
218
+ ```bash
219
+ # Using microeval command
220
+ microeval --eval evals/supabase/001_client_setup.yaml \
221
+ --input supabase_url "https://xyz.supabase.co" \
222
+ --input supabase_anon_key "your_key_here"
223
+
224
+ # Or using Python module
225
+ python -m microevals.eval_runner \
226
+ --repo https://github.com/user/app \
227
+ --eval evals/supabase/001_client_setup.yaml \
228
+ --input supabase_url "https://xyz.supabase.co" \
229
+ --input supabase_anon_key "your_key_here"
230
+ ```
231
+
232
+ ### Parallel Execution
233
+
234
+ Run multiple evals in parallel (faster but uses more resources):
235
+
236
+ ```bash
237
+ # Using microeval command
238
+ microeval --category nextjs --parallel 3
239
+
240
+ # Or using Python module
241
+ python -m microevals.eval_runner \
242
+ --repo https://github.com/user/app \
243
+ --category nextjs \
244
+ --parallel 3
245
+ ```
246
+
247
+ ### Batch Mode
248
+
249
+ Run multiple evals in a single Claude session (most efficient):
250
+
251
+ ```bash
252
+ # Using microeval command - Run 5 evals per Claude session
253
+ microeval --category tailwind --batch-size 5
254
+
255
+ # Run all evals in large batches
256
+ microeval --all --batch-size 15
257
+
258
+ # Or using Python module
259
+ python -m microevals.eval_runner \
260
+ --repo https://github.com/user/app \
261
+ --category tailwind \
262
+ --batch-size 5
263
+ ```
264
+
265
+ **Batch mode benefits:**
266
+ - Faster execution (single context for multiple evals)
267
+ - More efficient Claude usage
268
+ - Better for related evaluations
269
+
270
+ **Preview batch prompt before running:**
271
+
272
+ ```bash
273
+ microeval --category tailwind --batch-size 3 --print-prompt
274
+
275
+ # Or using Python module
276
+ python -m microevals.eval_runner \
277
+ --repo https://github.com/user/app \
278
+ --category tailwind \
279
+ --batch-size 3 \
280
+ --print-prompt
281
+ ```
282
+
283
+ ### Custom Timeout
284
+
285
+ Increase timeout for slower evaluations:
286
+
287
+ ```bash
288
+ # Using microeval command
289
+ microeval --eval evals/nextjs/030_app_router_migration.yaml --timeout 600
290
+
291
+ # Or using Python module
292
+ python -m microevals.eval_runner \
293
+ --repo https://github.com/user/app \
294
+ --eval evals/nextjs/030_app_router_migration.yaml \
295
+ --timeout 600 # 10 minutes
296
+ ```
297
+
298
+ ### Custom Output Directory
299
+
300
+ Save results to a specific directory:
301
+
302
+ ```bash
303
+ # Using microeval command
304
+ microeval --category nextjs --output-dir my_results
305
+
306
+ # Or using Python module
307
+ python -m microevals.eval_runner \
308
+ --repo https://github.com/user/app \
309
+ --category nextjs \
310
+ --output-dir my_results
311
+ ```
312
+
313
+ ## Understanding Results
314
+
315
+ ### Score System
316
+
317
+ Each eval returns a score:
318
+
319
+ | Score | Status | Meaning |
320
+ |-------|--------|---------|
321
+ | **1.0** | PASS | Code follows best practices, no issues found |
322
+ | **0.0** | FAIL | Anti-pattern detected or criteria not met |
323
+ | **-1.0** | N/A | Pattern/feature not present in codebase |
324
+
325
+ ### Result Output
326
+
327
+ Results are saved to `results/` as JSON files:
328
+
329
+ ```json
330
+ {
331
+ "passed": true,
332
+ "score": 1.0,
333
+ "summary": "Server components properly use async/await for data fetching",
334
+ "evidence": [
335
+ "app/page.tsx:15 - Correct async server component implementation",
336
+ "app/posts/page.tsx:20 - Proper await on fetch and response.json()"
337
+ ],
338
+ "issues": [],
339
+ "metadata": {
340
+ "eval_id": "nextjs_server_component_001",
341
+ "eval_name": "Server Component Data Fetching",
342
+ "repo_url": "https://github.com/user/app",
343
+ "timestamp": "2025-11-10T10:30:45",
344
+ "evaluator": "claude"
345
+ }
346
+ }
347
+ ```
348
+
349
+ ### Terminal Output
350
+
351
+ Live results show in terminal with color coding:
352
+
353
+ ```
354
+ Running evaluations for: https://github.com/user/my-app
355
+ ================================================================================
356
+
357
+ [1/5] Running 001-server-component.yaml...
358
+ PASS nextjs/001-server-component.yaml 12.3s
359
+ Server components properly use async/await for data fetching
360
+
361
+ [2/5] Running 002-client-component.yaml...
362
+ FAIL nextjs/002-client-component.yaml 8.7s
363
+ Found 'use client' components with hooks that should be server components
364
+
365
+ [3/5] Running 003-cookies.yaml...
366
+ N/A nextjs/003-cookies.yaml 5.2s
367
+ No cookie usage found in codebase
368
+
369
+ ================================================================================
370
+ SUMMARY
371
+ ================================================================================
372
+ Total evaluations: 5
373
+ Passed: 3
374
+ Failed: 1
375
+ Not Applicable: 1
376
+ Timeouts: 0
377
+ Errors: 0
378
+ Total duration: 45.2s
379
+ Pass rate: 75.0% (excluding N/A)
380
+ ```
381
+
382
+ ## Project Structure
383
+
384
+ ```
385
+ MicroEvals/
386
+ ├── microevals/ # Main package
387
+ │ ├── __init__.py # Package initialization
388
+ │ ├── eval_runner.py # Main CLI for running evals
389
+ │ ├── eval_registry.py # Registry and discovery of evals
390
+ │ └── utils.py # Utility functions
391
+
392
+ ├── evals/ # Evaluation definitions
393
+ │ ├── nextjs/ # Next.js-specific evals
394
+ │ │ ├── 001-server-component.yaml
395
+ │ │ ├── 002-client-component.yaml
396
+ │ │ └── ...
397
+ │ ├── react/ # React-specific evals
398
+ │ ├── supabase/ # Supabase-specific evals
399
+ │ ├── tailwind/ # Tailwind-specific evals
400
+ │ ├── typescript/ # TypeScript-specific evals
401
+ │ ├── vercel/ # Vercel-specific evals
402
+ │ └── shadcn/ # shadcn/ui-specific evals
403
+
404
+ ├── config/ # Configuration files
405
+ │ ├── judge_system_prompt.yaml # Claude judge prompt templates
406
+ │ └── example_repos.json # Example repositories
407
+
408
+ ├── results/ # Evaluation results (auto-generated)
409
+ │ └── *.json # Result files
410
+
411
+ ├── requirements.txt # Python dependencies
412
+ ├── CONTRIBUTING.md # Contribution guidelines
413
+ ├── LICENSE # License file
414
+ └── README.md # This file
415
+ ```
416
+
417
+ ## Creating Custom Evals
418
+
419
+ Want to add your own evaluations? See [CONTRIBUTING.md](CONTRIBUTING.md) for:
420
+
421
+ - Eval template and format
422
+ - Naming conventions
423
+ - Testing guidelines
424
+ - Submission process
425
+
426
+ **Quick template:**
427
+
428
+ ```yaml
429
+ eval_id: category_descriptive_name_001
430
+ name: "Human-Readable Name"
431
+ description: "What this eval checks"
432
+ category: nextjs # or react, supabase, etc.
433
+
434
+ # Optional runtime inputs
435
+ inputs:
436
+ custom_variable: "default_value"
437
+
438
+ criteria: |
439
+ You have access to the entire codebase. Evaluate [what to check].
440
+
441
+ WHAT TO LOOK FOR:
442
+ - [Specific patterns to search for]
443
+
444
+ ANTI-PATTERN (mark as failed):
445
+ - [Bad pattern 1]
446
+ - [Bad pattern 2]
447
+
448
+ CORRECT PATTERN (mark as passed):
449
+ - [Good pattern 1]
450
+ - [Good pattern 2]
451
+
452
+ MARK AS N/A if:
453
+ - [Condition for not applicable]
454
+
455
+ Return JSON with: passed, score, summary, evidence, issues
456
+ ```
457
+
458
+ ## Use Cases
459
+
460
+ ### 1. CI/CD Integration
461
+
462
+ Add to your CI pipeline to catch anti-patterns:
463
+
464
+ ```yaml
465
+ # .github/workflows/evals.yml
466
+ name: Code Quality Evals
467
+ on: [push, pull_request]
468
+
469
+ jobs:
470
+ evals:
471
+ runs-on: ubuntu-latest
472
+ steps:
473
+ - uses: actions/checkout@v3
474
+ - name: Run MicroEvals
475
+ run: |
476
+ pip install -r requirements.txt
477
+ python -m microevals.eval_runner \
478
+ --repo . \
479
+ --category nextjs \
480
+ --batch-size 10
481
+ ```
482
+
483
+ ### 2. Audit Existing Projects
484
+
485
+ Evaluate multiple repositories:
486
+
487
+ ```bash
488
+ #!/bin/bash
489
+ repos=(
490
+ "https://github.com/org/app1"
491
+ "https://github.com/org/app2"
492
+ "https://github.com/org/app3"
493
+ )
494
+
495
+ for repo in "${repos[@]}"; do
496
+ echo "Evaluating $repo..."
497
+ python -m microevals.eval_runner --repo "$repo" --all --batch-size 20
498
+ done
499
+ ```
500
+
501
+ ### 3. Pre-deployment Checks
502
+
503
+ Validate before deploying to production:
504
+
505
+ ```bash
506
+ # Check production-critical patterns
507
+ python -m microevals.eval_runner \
508
+ --repo https://github.com/org/production-app \
509
+ --category vercel \
510
+ --category supabase \
511
+ --input deployment_url "https://app.vercel.app"
512
+ ```
513
+
514
+ ## Troubleshooting
515
+
516
+ ### Claude CLI Not Found
517
+
518
+ ```bash
519
+ # Ensure Claude CLI is installed and in PATH
520
+ which claude
521
+
522
+ # If not installed, see: https://docs.anthropic.com/en/docs/build-with-claude/cli
523
+ ```
524
+
525
+ ### Rate Limiting
526
+
527
+ If you hit Claude rate limits:
528
+
529
+ ```bash
530
+ # Use batch mode to reduce API calls
531
+ python -m microevals.eval_runner --repo URL --all --batch-size 15
532
+
533
+ # Or add delays with single eval mode (automatic 2s delay)
534
+ python -m microevals.eval_runner --repo URL --all --parallel 1
535
+ ```
536
+
537
+ ### Timeout Issues
538
+
539
+ For large codebases, increase timeout:
540
+
541
+ ```bash
542
+ python -m microevals.eval_runner \
543
+ --repo URL \
544
+ --all \
545
+ --timeout 600 \
546
+ --batch-size 10
547
+ ```
548
+
549
+ ## Contributing
550
+
551
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for:
552
+
553
+ - How to submit new evals
554
+ - Testing requirements
555
+ - PR guidelines
556
+
557
+ **Quick contribution:**
558
+ 1. Fork the repo
559
+ 2. Create new eval in `evals/[category]/`
560
+ 3. Test locally: `python -m microevals.eval_runner --eval your-eval.yaml --repo test-repo`
561
+ 4. Submit PR
562
+
563
+ ## License
564
+
565
+ MicroEvals operates under MIT license. Please see [LICENSE](LICENSE) for more details.
566
+
567
+ ## Support
568
+
569
+ - [Issues](https://github.com/Design-Arena/MicroEvals/issues)
570
+ - Email: contact@designarena.ai
571
+
572
+ ---
573
+
574
+ Built for better agent code quality.
575
+ See more and try the evals live at [designarena.ai/evals](DesignArena.ai/evals).
@@ -0,0 +1,71 @@
1
+ config/judge_system_prompt.yaml,sha256=ZsnWvg3IlXcNmF75MXo2fKEacRq_32ycab7bYPZgEd0,5088
2
+ evals/nextjs/001-server-component.yaml,sha256=5OUlzDEjDl8GhzEsFPWxaPTGIWh6Va9iQsFoNdHJBdo,1254
3
+ evals/nextjs/002-client-component.yaml,sha256=OSS4wceHg6gt38B1ViCD5EyQHeD-pm71rGsHW5jmySE,1187
4
+ evals/nextjs/003-cookies.yaml,sha256=7-o71TwWxf_1zypbMafBsBgbCCvehmv_QdIIEVocbtc,1327
5
+ evals/nextjs/010-route-handlers.yaml,sha256=aKNoU6aaqcYBK70nD7NwaUhJoTMbbGAKKV9ncZ-BsRE,1388
6
+ evals/nextjs/013-pathname-server.yaml,sha256=VhcdlfMp8kKnJny2RDddyJpFlnCtel9MPk2gv-4Zzjg,1384
7
+ evals/nextjs/014-server-routing.yaml,sha256=WSd9RiOf5qQjc8syBXg2feJO3Si6AIfg2tkpHlEOAcg,1406
8
+ evals/nextjs/018-use-router.yaml,sha256=tg_e_OX5YFBI1OszF-0vYLNc1Jp-1uxoXuABPWTKvKw,1229
9
+ evals/nextjs/020_no_use_effect.yaml,sha256=wQ1Gv4d4DfbF3WHWt6E9kayx1t8oUGy3tjlqmdFXWVw,1379
10
+ evals/nextjs/021-avoid-fetch-in-effect.yaml,sha256=-Yi1T6O2u7ZmR3w1jpkUvJ-SPMnxovQq6VEgNzk4YsU,1497
11
+ evals/nextjs/022_prefer_server_actions.yaml,sha256=3N_GIl216M3eL8leR5SUSmsxACjJp70LfDFS-d-cULg,1229
12
+ evals/nextjs/023_avoid_getserversideprops.yaml,sha256=bsxl6cu4S9quaCFFxyNYeQIC3_6aoGmtEjaXVPchYjM,1143
13
+ evals/nextjs/024_avoid_redundant_usestate.yaml,sha256=m2Iuwj03xM6PES2_nJA2Vk8SN4o3dImCwR1OFvyMgok,1251
14
+ evals/nextjs/025_no_async_client_components.yaml,sha256=sOLWLSEfg1AXrK4xYyVLt-xvbqzWYcmHItPdbUn1a0I,1299
15
+ evals/nextjs/026_no_serial_await.yaml,sha256=i-RPE8dsxo52IfXrb_FIVyJ0F4PexVwFWBak4yZpg7I,1118
16
+ evals/nextjs/027-prefer-next-image.yaml,sha256=_HqYl90rA1DdUH_59CIygMvdImKZEPAZFnUsgTrGiNI,1235
17
+ evals/nextjs/027_no_hooks_in_server_components.yaml,sha256=OCtNLX1Y2xPdPJSK-SmTtKFW2TbpJYF3g6ol3SPFQpY,1319
18
+ evals/nextjs/028-prefer-next-font.yaml,sha256=6zFqwC0Et8CgJh2TWhdg5bgaE1t5QayK3AJDTBJ5pac,1339
19
+ evals/nextjs/028_cookies_headers_context.yaml,sha256=CWByrwvBgU9S3u5sWbn7QaUAvoFMiEUj5Yw7KOKs82g,1379
20
+ evals/nextjs/029_no_catch_redirect.yaml,sha256=bzbKtQV93ldhDKjOWPGSMn2LgQlFmifxp4iOnMfThmM,1380
21
+ evals/nextjs/030_app_router_migration.yaml,sha256=d35_G6J-TydYO9Ehw-s8ETjDI5TBJK3827PEVOtLQSo,1201
22
+ evals/nextjs/031_no_non_serializable_props.yaml,sha256=kKb4zDbUmhBj9kEq2cFzO7QEx61LE16HfVkwmO7Btmc,1590
23
+ evals/react/001_missing_useeffect_dependencies.yaml,sha256=dvulLqelmdheGINvruWgM02LzxQz_7UlApLSr9YNlE4,1132
24
+ evals/react/002_incorrect_event_handler.yaml,sha256=BfSrp1Q887ZIUvWtG5s6B4kJnUBaQoJBArEg85SjRWw,1060
25
+ evals/react/003_missing_return_in_map.yaml,sha256=lJfeEVKkB28CgJizLTLIifhqzPXnfDpfxAlIwKK9M9A,1119
26
+ evals/react/004_async_useeffect.yaml,sha256=5iD_lilT9RZKe_Ql0JoDpiyEOWgR6bN8mhDuFSV9F3s,1071
27
+ evals/react/005_direct_state_mutation.yaml,sha256=bGIc6W9z3sIy_4fh04olnH2MDPGJJ3tVpnzFQM-SwzQ,1080
28
+ evals/react/006_index_as_key.yaml,sha256=Y-x6f6P0fMQHQdDHmeP0XUc-41dNqcIQzxOQhQlgJzk,1107
29
+ evals/react/zustand_store_usage.yaml,sha256=hWd5t2VEb7sqSZ0VILdTFps2zpB8QoIfrKCnhsHcD7w,886
30
+ evals/shadcn/001_cn_utility_function.yaml,sha256=p2m3vW8jQbsOtGPkdq58c4lNBt5lLjhJWKnEwgeqxB4,1050
31
+ evals/shadcn/002_css_variables.yaml,sha256=8nLAn5AuFTtJrIjztyrkNIFWlvCUR5ThcBpVbi4l9yI,1116
32
+ evals/shadcn/003_component_dependencies.yaml,sha256=9wcCMOrY09plqT-qdJ4Y0H_cu73NWBOCz47o7OFUdXc,1118
33
+ evals/shadcn/004_path_aliases.yaml,sha256=mJPsZoCsqLIVgU4-U-G1gZCiaVwXi8mLh5cgK3f5aCQ,1111
34
+ evals/shadcn/005_client_directive.yaml,sha256=Rh3KiGDILuhF5fFPd7O9MxG9aH2XDQ483rw6EvyZt-Y,1208
35
+ evals/shadcn/006_tailwind_config.yaml,sha256=yeTnRhwdzO4fxxh6S7v1KUVoRUYWBNOgw2J9RR_wWF8,1253
36
+ evals/shadcn/007_components_json_config.yaml,sha256=b1dQbAvA_3-fR2iBB9oAua-GAYlcAg3SBRksaLQc9QM,1154
37
+ evals/supabase/001_client_setup.yaml,sha256=P00X9Zovq-szyvftkRA_eW_RDbpbZH7JrMWzxQd_CsU,2102
38
+ evals/supabase/002_auth_context_setup.yaml,sha256=3_YmBmQSxMdw14DEtORUQaavYnzPlOryiGeIScPqZas,1854
39
+ evals/supabase/003_auth_flow_implementation.yaml,sha256=Rzea6GCxT9V7cEEjSa8pZj69BjRiJ3EaDqkX-P6UGn8,1634
40
+ evals/supabase/004_auth_flow_testing_WIP.yaml,sha256=JdqJ5p_YnNTgWDjvRBG4B3fkqUtww3hPbSBpPjc9tsU,1787
41
+ evals/supabase/005_auth_google_oauth.yaml,sha256=6scsmEaNmLCa27MhGdn3KsTQfX0jtuPio6cPdnelr8w,2136
42
+ evals/supabase/007_storage_client_setup.yaml,sha256=WsdZ_W2MHSqbgcPZfnd_gpBJvCiBLb5P6_yUz6fF6C4,1701
43
+ evals/supabase/008_storage_nextjs_config.yaml,sha256=drp8H6hnZZsIZvzunU_fHrxeNhwcp5t4hKLgW3qUNjU,1698
44
+ evals/supabase/009_storage_image_upload.yaml,sha256=yLh2vFAAaZAzTfcBmJ4v8IZtm3nhwru2e6M3eMEcpYU,1831
45
+ evals/supabase/010_security_rls_enabled.yaml,sha256=q9iglE_s5WEVRhteSr9WInPO-x5ZkE0RFx_89YkBmYA,1713
46
+ evals/supabase/011_security_rls_policies.yaml,sha256=f1LeJDcFEoNu0OfkcOlBXxiSnFfdEADHhtH6qjZhmKA,1805
47
+ evals/supabase/012_security_no_service_key_exposed.yaml,sha256=VEhOO9OQG2XimJ1yXhi-aYU3h4M9gBzIQaR69JsybTE,1950
48
+ evals/supabase/013_database_read_data.yaml,sha256=x9dDAIJEc1YDh7QS1nwQKZ4z7niCPb-m54_klnJpiGg,1660
49
+ evals/supabase/014_database_create_data.yaml,sha256=qyT850fWv-lDF4rQunOwSyHWyF1AisKVcbnZp0JMHzQ,1648
50
+ evals/supabase/015_database_update_data.yaml,sha256=rHStxm0UhyaXFeY9SdhCp3MTCBaUnIrdIkbn_kNIuhQ,1931
51
+ evals/supabase/016_database_delete_data.yaml,sha256=jkrNElfHgyXk13GBBdtwjTAkOtJsVaS_AbsfLaBYLiE,1879
52
+ evals/supabase/017_database_user_scoped_query.yaml,sha256=ZK0E3rN22RCk9S9d2AipKGGhK8WXvtEdrI5a9GAUG8Y,1993
53
+ evals/tailwind/001_tailwind_v4_config.yaml,sha256=RN7XV7zbAFKSeiXzKUPyNm7TfIz6Xdn6s4yxOgyUfj4,745
54
+ evals/tailwind/002_content_paths.yaml,sha256=2F7HS4Tnb3GQOLcde2fDMiWKPTKRE3MBduU4nMJtshI,1159
55
+ evals/tailwind/003_no_dynamic_class_construction.yaml,sha256=03p1Y58-rP0eAC4lRPXb2MFPSrFKBDU0rqm1RUKqITk,1124
56
+ evals/tailwind/tailwind_postcss_config.yaml,sha256=Mfts7oOzFlLGhrOfJ0TKsLGAWqnxEVKYX0UIvJsjXIs,1039
57
+ evals/typescript/001_unsafe_type_assertions.yaml,sha256=AFFcRRcNlShvXfZnq4EH47EGdSC_uM9hu5TfVtGPdpM,1551
58
+ evals/typescript/002_missing_null_checks.yaml,sha256=n_B9c1gCG9Qt9IgGJ-vryQjb7nAtGx_hGa7VWTpUqXg,1197
59
+ evals/vercel/001_vercel_deployment.yaml,sha256=BUBS_V_0MWQOR9a3j_PeTfDKK37Y-3tSBtcBtUJLm8c,845
60
+ evals/vercel/002_environment_variables_handling.yaml,sha256=s5_7u9FKXtgxJhNgRhUTBzBknMbTbnuwndWY5UPK7Uw,1069
61
+ evals/vercel/003_seo_metadata.yaml,sha256=IPMyNdU6dVdWznwb2dY2VtldYFOVbIdBTb8ho6KjO5U,1448
62
+ microevals/__init__.py,sha256=6OZMz6BNNbsEaLcoeooRXgREZ-UjwJkFfQWxuKx6JLM,651
63
+ microevals/eval_registry.py,sha256=8r-57uqcorPUuf2m_4gliI6RGQpgLYz0DaYJf-LOkng,8489
64
+ microevals/eval_runner.py,sha256=2VMyDUnPKWz_TWVg7hWyQNxK3Lpil0Q4d58S_CreUZs,21473
65
+ microevals/utils.py,sha256=qvZk09CDwM4_pym1hSM_W-ZDIfYCMUeNIC8K5rhI9x4,16884
66
+ microevals-0.1.0.dist-info/licenses/LICENSE,sha256=KrbtaYayZGcets1iOs45DzSQ29eEDn25Vk9yzIfA-TA,1069
67
+ microevals-0.1.0.dist-info/METADATA,sha256=623qIjGrAGJnjbHPLxGuqGlxzGtFnOH-MR1ERnA7jls,15671
68
+ microevals-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
+ microevals-0.1.0.dist-info/entry_points.txt,sha256=sTpr74DUzhPIMVcfzx1IyLwZ_wNuf5BJNYhNwxamm_c,58
70
+ microevals-0.1.0.dist-info/top_level.txt,sha256=8LDwu8cEm-bLrA_lUj1ARssge0Db7oHgu0cXnysx5UM,11
71
+ microevals-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ microeval = microevals.eval_runner:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Design-Arena
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ microevals