microeval 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. microeval-0.1.0/.beads/.gitignore +29 -0
  2. microeval-0.1.0/.beads/README.md +81 -0
  3. microeval-0.1.0/.beads/beads.left.jsonl +7 -0
  4. microeval-0.1.0/.beads/beads.left.meta.json +1 -0
  5. microeval-0.1.0/.beads/config.yaml +56 -0
  6. microeval-0.1.0/.beads/daemon.lock +7 -0
  7. microeval-0.1.0/.beads/issues.jsonl +7 -0
  8. microeval-0.1.0/.beads/metadata.json +5 -0
  9. microeval-0.1.0/.env.example +9 -0
  10. microeval-0.1.0/.gitattributes +3 -0
  11. microeval-0.1.0/.gitignore +173 -0
  12. microeval-0.1.0/LICENSE +21 -0
  13. microeval-0.1.0/PKG-INFO +479 -0
  14. microeval-0.1.0/README.md +446 -0
  15. microeval-0.1.0/evals-consultant/prompts/candidate-skills.txt +59 -0
  16. microeval-0.1.0/evals-consultant/prompts/candidate-summary.txt +29 -0
  17. microeval-0.1.0/evals-consultant/queries/consultant.yaml +121 -0
  18. microeval-0.1.0/evals-consultant/results/consultant-groq-llama-3.3-70b.yaml +72 -0
  19. microeval-0.1.0/evals-consultant/runs/consultant-bedrock-nova-pro.yaml +12 -0
  20. microeval-0.1.0/evals-consultant/runs/consultant-groq-llama-3.3-70b.yaml +12 -0
  21. microeval-0.1.0/evals-consultant/runs/consultant-ollama-llama3.2.yaml +12 -0
  22. microeval-0.1.0/evals-consultant/runs/consultant-openai-gpt-4o.yaml +12 -0
  23. microeval-0.1.0/evals-engineer/prompts/candidate-skills.txt +59 -0
  24. microeval-0.1.0/evals-engineer/prompts/candidate-summary.txt +29 -0
  25. microeval-0.1.0/evals-engineer/queries/engineer.yaml +124 -0
  26. microeval-0.1.0/evals-engineer/results/engineer-bedrock-nova-pro.yaml +76 -0
  27. microeval-0.1.0/evals-engineer/results/engineer-groq-llama-3.3-70b.yaml +64 -0
  28. microeval-0.1.0/evals-engineer/results/engineer-ollama-llama3.2.yaml +254 -0
  29. microeval-0.1.0/evals-engineer/results/engineer-openai-gpt-4o.yaml +70 -0
  30. microeval-0.1.0/evals-engineer/runs/engineer-bedrock-nova-pro.yaml +11 -0
  31. microeval-0.1.0/evals-engineer/runs/engineer-groq-llama-3.3-70b.yaml +11 -0
  32. microeval-0.1.0/evals-engineer/runs/engineer-ollama-llama3.2.yaml +11 -0
  33. microeval-0.1.0/evals-engineer/runs/engineer-openai-gpt-4o.yaml +11 -0
  34. microeval-0.1.0/microeval/__init__.py +0 -0
  35. microeval-0.1.0/microeval/chat.py +62 -0
  36. microeval-0.1.0/microeval/chat_client.py +1052 -0
  37. microeval-0.1.0/microeval/cli.py +150 -0
  38. microeval-0.1.0/microeval/config.json +14 -0
  39. microeval-0.1.0/microeval/evaluator.py +330 -0
  40. microeval-0.1.0/microeval/graph.py +133 -0
  41. microeval-0.1.0/microeval/index.html +1306 -0
  42. microeval-0.1.0/microeval/runner.py +139 -0
  43. microeval-0.1.0/microeval/sample-evals/prompts/summarize.txt +13 -0
  44. microeval-0.1.0/microeval/sample-evals/queries/summarize.yaml +13 -0
  45. microeval-0.1.0/microeval/sample-evals/results/summarize-openai-gpt-4o.yaml +60 -0
  46. microeval-0.1.0/microeval/sample-evals/runs/summarize-bedrock-nova.yaml +12 -0
  47. microeval-0.1.0/microeval/sample-evals/runs/summarize-groq-llama.yaml +12 -0
  48. microeval-0.1.0/microeval/sample-evals/runs/summarize-ollama-llama3.yaml +12 -0
  49. microeval-0.1.0/microeval/sample-evals/runs/summarize-openai-gpt-4o.yaml +15 -0
  50. microeval-0.1.0/microeval/schemas.py +127 -0
  51. microeval-0.1.0/microeval/server.py +457 -0
  52. microeval-0.1.0/microeval/setup_logger.py +51 -0
  53. microeval-0.1.0/microeval/yaml_utils.py +55 -0
  54. microeval-0.1.0/pyproject.toml +43 -0
  55. microeval-0.1.0/sample-evals/prompts/summarize.txt +13 -0
  56. microeval-0.1.0/sample-evals/queries/summarize.yaml +13 -0
  57. microeval-0.1.0/sample-evals/results/summarize-groq-llama.yaml +60 -0
  58. microeval-0.1.0/sample-evals/runs/summarize-bedrock-nova.yaml +12 -0
  59. microeval-0.1.0/sample-evals/runs/summarize-groq-llama.yaml +12 -0
  60. microeval-0.1.0/sample-evals/runs/summarize-ollama-llama3.yaml +12 -0
  61. microeval-0.1.0/sample-evals/runs/summarize-openai-gpt-4o.yaml +12 -0
@@ -0,0 +1,29 @@
1
+ # SQLite databases
2
+ *.db
3
+ *.db?*
4
+ *.db-journal
5
+ *.db-wal
6
+ *.db-shm
7
+
8
+ # Daemon runtime files
9
+ daemon.lock
10
+ daemon.log
11
+ daemon.pid
12
+ bd.sock
13
+
14
+ # Legacy database files
15
+ db.sqlite
16
+ bd.db
17
+
18
+ # Merge artifacts (temporary files from 3-way merge)
19
+ beads.base.jsonl
20
+ beads.base.meta.json
21
+ beads.left.jsonl
22
+ beads.left.meta.json
23
+ beads.right.jsonl
24
+ beads.right.meta.json
25
+
26
+ # Keep JSONL exports and config (source of truth for git)
27
+ !issues.jsonl
28
+ !metadata.json
29
+ !config.json
@@ -0,0 +1,81 @@
1
+ # Beads - AI-Native Issue Tracking
2
+
3
+ Welcome to Beads! This repository uses **Beads** for issue tracking - a modern, AI-native tool designed to live directly in your codebase alongside your code.
4
+
5
+ ## What is Beads?
6
+
7
+ Beads is issue tracking that lives in your repo, making it perfect for AI coding agents and developers who want their issues close to their code. No web UI required - everything works through the CLI and integrates seamlessly with git.
8
+
9
+ **Learn more:** [github.com/steveyegge/beads](https://github.com/steveyegge/beads)
10
+
11
+ ## Quick Start
12
+
13
+ ### Essential Commands
14
+
15
+ ```bash
16
+ # Create new issues
17
+ bd create "Add user authentication"
18
+
19
+ # View all issues
20
+ bd list
21
+
22
+ # View issue details
23
+ bd show <issue-id>
24
+
25
+ # Update issue status
26
+ bd update <issue-id> --status in-progress
27
+ bd update <issue-id> --status done
28
+
29
+ # Sync with git remote
30
+ bd sync
31
+ ```
32
+
33
+ ### Working with Issues
34
+
35
+ Issues in Beads are:
36
+ - **Git-native**: Stored in `.beads/issues.jsonl` and synced like code
37
+ - **AI-friendly**: CLI-first design works perfectly with AI coding agents
38
+ - **Branch-aware**: Issues can follow your branch workflow
39
+ - **Always in sync**: Auto-syncs with your commits
40
+
41
+ ## Why Beads?
42
+
43
+ ✨ **AI-Native Design**
44
+ - Built specifically for AI-assisted development workflows
45
+ - CLI-first interface works seamlessly with AI coding agents
46
+ - No context switching to web UIs
47
+
48
+ 🚀 **Developer Focused**
49
+ - Issues live in your repo, right next to your code
50
+ - Works offline, syncs when you push
51
+ - Fast, lightweight, and stays out of your way
52
+
53
+ 🔧 **Git Integration**
54
+ - Automatic sync with git commits
55
+ - Branch-aware issue tracking
56
+ - Intelligent JSONL merge resolution
57
+
58
+ ## Get Started with Beads
59
+
60
+ Try Beads in your own projects:
61
+
62
+ ```bash
63
+ # Install Beads
64
+ curl -sSL https://raw.githubusercontent.com/steveyegge/beads/main/scripts/install.sh | bash
65
+
66
+ # Initialize in your repo
67
+ bd init
68
+
69
+ # Create your first issue
70
+ bd create "Try out Beads"
71
+ ```
72
+
73
+ ## Learn More
74
+
75
+ - **Documentation**: [github.com/steveyegge/beads/docs](https://github.com/steveyegge/beads/tree/main/docs)
76
+ - **Quick Start Guide**: Run `bd quickstart`
77
+ - **Examples**: [github.com/steveyegge/beads/examples](https://github.com/steveyegge/beads/tree/main/examples)
78
+
79
+ ---
80
+
81
+ *Beads: Issue tracking that moves at the speed of thought* ⚡
@@ -0,0 +1,7 @@
1
+ {"id":"eval-0so","title":"Add validation for config.json format","description":"Add schema validation for config.json to ensure chat_models and embed_models are properly formatted. Provide helpful error messages if config is invalid.","status":"open","priority":2,"issue_type":"task","created_at":"2025-11-30T15:55:31.313033+11:00","updated_at":"2025-11-30T15:55:31.313033+11:00"}
2
+ {"id":"eval-519","title":"Add documentation for --evals-dir parameter usage","description":"Document the --evals-dir command-line parameter for both runner.py and server.py, including usage examples for switching between evals-engineer and evals-consultant.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-11-30T15:55:08.697823+11:00","updated_at":"2025-11-30T15:59:02.552661+11:00","closed_at":"2025-11-30T15:59:02.552661+11:00"}
3
+ {"id":"eval-dx6","title":"Update README.md to reflect new evals-engineer/evals-consultant structure","description":"README.md still references old evals directory. Need to update documentation to show split into evals-engineer and evals-consultant directories.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-11-30T15:55:00.792861+11:00","updated_at":"2025-11-30T15:58:38.70658+11:00","closed_at":"2025-11-30T15:58:38.70658+11:00"}
4
+ {"id":"eval-fy3","title":"Update graph.html to work with new directory structure","description":"Graph.html and graph-data.js need to be updated to read from either evals-engineer or evals-consultant directory structure. Currently references old evals/results path.","status":"open","priority":2,"issue_type":"task","created_at":"2025-11-30T15:55:22.977392+11:00","updated_at":"2025-11-30T15:55:22.977392+11:00"}
5
+ {"id":"eval-mrs","title":"Add export/import functionality for evaluation configurations","description":"Add ability to export evaluation configurations (runs, queries, prompts) to a portable format and import them into different environments or share with other users.","status":"open","priority":3,"issue_type":"feature","created_at":"2025-11-30T15:55:38.520227+11:00","updated_at":"2025-11-30T15:55:38.520227+11:00"}
6
+ {"id":"eval-rpx","title":"Add ability to switch between evals-engineer and evals-consultant in the UI","description":"Add a dropdown or toggle in the web UI to switch between evals-engineer and evals-consultant directories without restarting the server.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-11-30T15:55:15.623639+11:00","updated_at":"2025-11-30T15:55:15.623639+11:00"}
7
+ {"id":"eval-y1v","title":"Add comparison view for results across different models","description":"Create a UI view that allows side-by-side comparison of evaluation results across different models (e.g., compare gpt-4o vs llama3.2 vs bedrock-nova performance on same query).","status":"open","priority":3,"issue_type":"feature","created_at":"2025-11-30T15:55:46.05204+11:00","updated_at":"2025-11-30T15:55:46.05204+11:00"}
@@ -0,0 +1 @@
1
+ {"version":"0.26.0","timestamp":"2025-12-14T10:15:10.811603+11:00","commit":"34a150f"}
@@ -0,0 +1,56 @@
1
+ # Beads Configuration File
2
+ # This file configures default behavior for all bd commands in this repository
3
+ # All settings can also be set via environment variables (BD_* prefix)
4
+ # or overridden with command-line flags
5
+
6
+ # Issue prefix for this repository (used by bd init)
7
+ # If not set, bd init will auto-detect from directory name
8
+ # Example: issue-prefix: "myproject" creates issues like "myproject-1", "myproject-2", etc.
9
+ # issue-prefix: ""
10
+
11
+ # Use no-db mode: load from JSONL, no SQLite, write back after each command
12
+ # When true, bd will use .beads/issues.jsonl as the source of truth
13
+ # instead of SQLite database
14
+ # no-db: false
15
+
16
+ # Disable daemon for RPC communication (forces direct database access)
17
+ # no-daemon: false
18
+
19
+ # Disable auto-flush of database to JSONL after mutations
20
+ # no-auto-flush: false
21
+
22
+ # Disable auto-import from JSONL when it's newer than database
23
+ # no-auto-import: false
24
+
25
+ # Enable JSON output by default
26
+ # json: false
27
+
28
+ # Default actor for audit trails (overridden by BD_ACTOR or --actor)
29
+ # actor: ""
30
+
31
+ # Path to database (overridden by BEADS_DB or --db)
32
+ # db: ""
33
+
34
+ # Auto-start daemon if not running (can also use BEADS_AUTO_START_DAEMON)
35
+ # auto-start-daemon: true
36
+
37
+ # Debounce interval for auto-flush (can also use BEADS_FLUSH_DEBOUNCE)
38
+ # flush-debounce: "5s"
39
+
40
+ # Multi-repo configuration (experimental - bd-307)
41
+ # Allows hydrating from multiple repositories and routing writes to the correct JSONL
42
+ # repos:
43
+ # primary: "." # Primary repo (where this database lives)
44
+ # additional: # Additional repos to hydrate from (read-only)
45
+ # - ~/beads-planning # Personal planning repo
46
+ # - ~/work-planning # Work planning repo
47
+
48
+ # Integration settings (access with 'bd config get/set')
49
+ # These are stored in the database, not in this file:
50
+ # - jira.url
51
+ # - jira.project
52
+ # - linear.url
53
+ # - linear.api-key
54
+ # - github.org
55
+ # - github.repo
56
+ # - sync.branch - Git branch for beads commits (use BEADS_SYNC_BRANCH env var or bd config set)
@@ -0,0 +1,7 @@
1
+ {
2
+ "pid": 2865,
3
+ "parent_pid": 2860,
4
+ "database": "/Users/boscoh/p/starteval/.beads/beads.db",
5
+ "version": "0.26.0",
6
+ "started_at": "2025-12-15T07:22:28.962716Z"
7
+ }
@@ -0,0 +1,7 @@
1
+ {"id":"eval-0so","title":"Add validation for config.json format","description":"Add schema validation for config.json to ensure chat_models and embed_models are properly formatted. Provide helpful error messages if config is invalid.","status":"open","priority":2,"issue_type":"task","created_at":"2025-11-30T15:55:31.313033+11:00","updated_at":"2025-11-30T15:55:31.313033+11:00"}
2
+ {"id":"eval-519","title":"Add documentation for --evals-dir parameter usage","description":"Document the --evals-dir command-line parameter for both runner.py and server.py, including usage examples for switching between evals-engineer and evals-consultant.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-11-30T15:55:08.697823+11:00","updated_at":"2025-11-30T15:59:02.552661+11:00","closed_at":"2025-11-30T15:59:02.552661+11:00"}
3
+ {"id":"eval-dx6","title":"Update README.md to reflect new evals-engineer/evals-consultant structure","description":"README.md still references old evals directory. Need to update documentation to show split into evals-engineer and evals-consultant directories.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-11-30T15:55:00.792861+11:00","updated_at":"2025-11-30T15:58:38.70658+11:00","closed_at":"2025-11-30T15:58:38.70658+11:00"}
4
+ {"id":"eval-fy3","title":"Update graph.html to work with new directory structure","description":"Graph.html and graph-data.js need to be updated to read from either evals-engineer or evals-consultant directory structure. Currently references old evals/results path.","status":"open","priority":2,"issue_type":"task","created_at":"2025-11-30T15:55:22.977392+11:00","updated_at":"2025-11-30T15:55:22.977392+11:00"}
5
+ {"id":"eval-mrs","title":"Add export/import functionality for evaluation configurations","description":"Add ability to export evaluation configurations (runs, queries, prompts) to a portable format and import them into different environments or share with other users.","status":"open","priority":3,"issue_type":"feature","created_at":"2025-11-30T15:55:38.520227+11:00","updated_at":"2025-11-30T15:55:38.520227+11:00"}
6
+ {"id":"eval-rpx","title":"Add ability to switch between evals-engineer and evals-consultant in the UI","description":"Add a dropdown or toggle in the web UI to switch between evals-engineer and evals-consultant directories without restarting the server.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-11-30T15:55:15.623639+11:00","updated_at":"2025-11-30T15:55:15.623639+11:00"}
7
+ {"id":"eval-y1v","title":"Add comparison view for results across different models","description":"Create a UI view that allows side-by-side comparison of evaluation results across different models (e.g., compare gpt-4o vs llama3.2 vs bedrock-nova performance on same query).","status":"open","priority":3,"issue_type":"feature","created_at":"2025-11-30T15:55:46.05204+11:00","updated_at":"2025-11-30T15:55:46.05204+11:00"}
@@ -0,0 +1,5 @@
1
+ {
2
+ "database": "beads.db",
3
+ "jsonl_export": "issues.jsonl",
4
+ "last_bd_version": "0.26.0"
5
+ }
@@ -0,0 +1,9 @@
1
+ # OpenAI API Configuration
2
+ OPENAI_API_KEY=your_openai_api_key_here
3
+
4
+ # Groq API Configuration
5
+ GROQ_API_KEY=your_groq_api_key_here
6
+
7
+ # AWS Configuration (recommended - configure via ~/.aws/credentials)
8
+ AWS_PROFILE=your_aws_profile_name
9
+
@@ -0,0 +1,3 @@
1
+
2
+ # Use bd merge for beads JSONL files
3
+ .beads/issues.jsonl merge=beads
@@ -0,0 +1,173 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98
+ __pypackages__/
99
+
100
+ # Celery stuff
101
+ celerybeat-schedule
102
+ celerybeat.pid
103
+
104
+ # SageMath parsed files
105
+ *.sage.py
106
+
107
+ # Environments
108
+ .env
109
+ .venv
110
+ env/
111
+ venv/
112
+ ENV/
113
+ env.bak/
114
+ venv.bak/
115
+
116
+ # Spyder project settings
117
+ .spyderproject
118
+ .spyproject
119
+
120
+ # Rope project settings
121
+ .ropeproject
122
+
123
+ # mkdocs documentation
124
+ /site
125
+
126
+ # mypy
127
+ .mypy_cache/
128
+ .dmypy.json
129
+ dmypy.json
130
+
131
+ # Pyre type checker
132
+ .pyre/
133
+
134
+ # pytype static type analyzer
135
+ .pytype/
136
+
137
+ # Cython debug symbols
138
+ cython_debug/
139
+
140
+ # PyCharm
141
+ .idea/
142
+ *.iml
143
+
144
+ # VS Code
145
+ .vscode/
146
+ *.code-workspace
147
+
148
+ # macOS
149
+ .DS_Store
150
+ .AppleDouble
151
+ .LSOverride
152
+
153
+ # Windows
154
+ Thumbs.db
155
+ ehthumbs.db
156
+ Desktop.ini
157
+ $RECYCLE.BIN/
158
+
159
+ # Linux
160
+ *~
161
+
162
+ # Local development
163
+ *.local
164
+
165
+ # Project specific
166
+ *.db
167
+ *.sqlite3
168
+ *.log
169
+ *.pid
170
+ *.pid.lock
171
+
172
+ uv.lock
173
+ .ruff_cache
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Bosco Ho
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.