assayer 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
assayer-0.1.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Practical Mind
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
assayer-0.1.1/PKG-INFO ADDED
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.4
2
+ Name: assayer
3
+ Version: 0.1.1
4
+ Summary: Run a prompt across multiple LLMs and compare outputs side by side in the terminal.
5
+ License: MIT
6
+ Classifier: Development Status :: 3 - Alpha
7
+ Classifier: Environment :: Console
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Requires-Python: >=3.11
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: click>=8.1
18
+ Requires-Dist: rich>=13.0
19
+ Requires-Dist: litellm>=1.40
20
+ Requires-Dist: httpx>=0.27
21
+ Provides-Extra: score
22
+ Requires-Dist: sentence-transformers>=3.0; extra == "score"
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=8.0; extra == "dev"
25
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
26
+ Requires-Dist: ruff>=0.4; extra == "dev"
27
+ Requires-Dist: mypy>=1.10; extra == "dev"
28
+ Requires-Dist: sentence-transformers>=3.0; extra == "dev"
29
+ Dynamic: license-file
30
+
31
+ # assayer
32
+
33
+ Send a prompt to multiple language models in parallel and compare their outputs in the terminal. Useful for evaluating which model handles a given task better, measuring semantic similarity between responses, or running an LLM-as-judge evaluation — without leaving the shell.
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install assayer
39
+ ```
40
+
41
+ Similarity scoring requires the optional `score` extra:
42
+
43
+ ```bash
44
+ pip install "assayer[score]"
45
+ ```
46
+
47
+ Python 3.11 or newer is required.
48
+
49
+ > **Contributing?** See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, code style, and PR guidelines.
50
+
51
+ ## Supported Providers
52
+
53
+ - **OpenAI**: All GPT models.
54
+ - **Anthropic**: Claude 4.5 models (Opus, Sonnet, Haiku).
55
+ - **Google Gemini**: 1.5 Pro and Flash models.
56
+ - **Ollama**: Local models running on your machine.
57
+
58
+ ## Configuration
59
+
60
+ Assayer looks for API keys in environment variables or a configuration file at `~/.assayer/config.json`.
61
+
62
+ ### Environment Variables
63
+
64
+ ```bash
65
+ export OPENAI_API_KEY="your-key"
66
+ export ANTHROPIC_API_KEY="your-key"
67
+ export GEMINI_API_KEY="your-key"
68
+ ```
69
+
70
+ ### Configuration File
71
+
72
+ ```json
73
+ {
74
+ "OPENAI_API_KEY": "sk-...",
75
+ "ANTHROPIC_API_KEY": "sk-ant-...",
76
+ "GEMINI_API_KEY": "..."
77
+ }
78
+ ```
79
+
80
+ Use `assayer models check` to verify your configuration.
81
+
82
+ ## Quickstart
83
+
84
+ ```bash
85
+ assayer run "Explain recursion in one sentence." --models gpt-4o,claude-haiku-4-5-20251001
86
+ ```
87
+
88
+ ## Commands
89
+
90
+ ### run
91
+
92
+ ```bash
93
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5
94
+ assayer run --prompt-file prompt.txt --models gpt-4o,ollama/llama3
95
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5 --score
96
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5 --judge gpt-4o --judge-criteria "clarity,brevity"
97
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5 --output results.json
98
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5 --output results.csv
99
+ assayer run "prompt with {var}" --models gpt-4o --var key=value
100
+ ```
101
+
102
+ | Flag | Description |
103
+ |---|---|
104
+ | `--models` | Comma-separated model identifiers (required) |
105
+ | `--prompt-file` | Path to a `.txt` file instead of an inline prompt |
106
+ | `--var` | `KEY=VALUE` template variable, repeatable |
107
+ | `--system` | System prompt applied to all models |
108
+ | `--temperature` | Sampling temperature |
109
+ | `--max-tokens` | Maximum output tokens |
110
+ | `--score` | Show pairwise similarity matrix |
111
+ | `--judge` | Model to use as judge |
112
+ | `--judge-criteria` | Comma-separated criteria for the judge |
113
+ | `--output` | Save results to `.json` or `.csv` |
114
+
115
+ ### models
116
+
117
+ ```bash
118
+ assayer models list # list all supported model identifiers
119
+ assayer models check # check which API keys are configured
120
+ assayer models check ollama # check if Ollama is running and list local models
121
+ ```
122
+
123
+ ### config
124
+
125
+ ```bash
126
+ assayer config set OPENAI_API_KEY sk-...
127
+ assayer config show
128
+ ```
129
+
130
+ Keys are saved to `~/.assayer/config.json`. Environment variables take precedence.
131
+
132
+ ## Providers
133
+
134
+ ### OpenAI
135
+
136
+ ```bash
137
+ export OPENAI_API_KEY=sk-...
138
+ ```
139
+
140
+ Supported models: `gpt-5.5`, `gpt-5.5-pro`, `gpt-5.4`, `gpt-5.4-pro`, `gpt-5.4-mini`, `gpt-5.4-nano`, `gpt-5.2`, `gpt-5`, `gpt-5-mini`, `gpt-5-nano`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4.1-nano`, `gpt-4o`, `gpt-4o-mini`, `o3`, `o3-mini`, `o4-mini`
141
+
142
+ ### Anthropic
143
+
144
+ ```bash
145
+ export ANTHROPIC_API_KEY=sk-ant-...
146
+ ```
147
+
148
+ Supported models: `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-haiku-4-5-20251001`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-opus-4-5`
149
+
150
+ ### Google Gemini
151
+
152
+ ```bash
153
+ export GEMINI_API_KEY=...
154
+ ```
155
+
156
+ Supported models: `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite`, `gemini-3-flash-preview`, `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.5-flash-lite`, `gemini-2.0-flash`, `gemini-2.0-flash-lite`
157
+
158
+ ### Ollama (local)
159
+
160
+ No API key needed. Start Ollama and use the `ollama/` prefix:
161
+
162
+ ```bash
163
+ ollama serve
164
+ assayer run "prompt" --models ollama/llama4-scout,ollama/llama3.2,ollama/qwen3
165
+ ```
166
+
167
+ ## Scoring
168
+
169
+ `--score` embeds all outputs using `all-MiniLM-L6-v2` (runs locally, no API call) and displays a pairwise cosine similarity matrix. Values range from 0 (unrelated) to 1 (identical meaning).
170
+
171
+ ## LLM-as-judge
172
+
173
+ `--judge <model>` sends all outputs to the specified model and asks it to pick a winner. Use `--judge-criteria` to focus the evaluation:
174
+
175
+ ```bash
176
+ assayer run "Write a sorting algorithm." \
177
+ --models gpt-4o,claude-sonnet-4-5 \
178
+ --judge gpt-4o \
179
+ --judge-criteria "correctness,readability"
180
+ ```
181
+
182
+ If the judge call fails, a warning is printed to stderr and the run continues normally.
183
+
184
+ ## Export
185
+
186
+ `--output results.json` saves full results as JSON. `--output results.csv` saves as CSV. The file format is determined by the extension.
@@ -0,0 +1,156 @@
1
+ # assayer
2
+
3
+ Send a prompt to multiple language models in parallel and compare their outputs in the terminal. Useful for evaluating which model handles a given task better, measuring semantic similarity between responses, or running an LLM-as-judge evaluation — without leaving the shell.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install assayer
9
+ ```
10
+
11
+ Similarity scoring requires the optional `score` extra:
12
+
13
+ ```bash
14
+ pip install "assayer[score]"
15
+ ```
16
+
17
+ Python 3.11 or newer is required.
18
+
19
+ > **Contributing?** See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, code style, and PR guidelines.
20
+
21
+ ## Supported Providers
22
+
23
+ - **OpenAI**: All GPT models.
24
+ - **Anthropic**: Claude 4.5 models (Opus, Sonnet, Haiku).
25
+ - **Google Gemini**: 1.5 Pro and Flash models.
26
+ - **Ollama**: Local models running on your machine.
27
+
28
+ ## Configuration
29
+
30
+ Assayer looks for API keys in environment variables or a configuration file at `~/.assayer/config.json`.
31
+
32
+ ### Environment Variables
33
+
34
+ ```bash
35
+ export OPENAI_API_KEY="your-key"
36
+ export ANTHROPIC_API_KEY="your-key"
37
+ export GEMINI_API_KEY="your-key"
38
+ ```
39
+
40
+ ### Configuration File
41
+
42
+ ```json
43
+ {
44
+ "OPENAI_API_KEY": "sk-...",
45
+ "ANTHROPIC_API_KEY": "sk-ant-...",
46
+ "GEMINI_API_KEY": "..."
47
+ }
48
+ ```
49
+
50
+ Use `assayer models check` to verify your configuration.
51
+
52
+ ## Quickstart
53
+
54
+ ```bash
55
+ assayer run "Explain recursion in one sentence." --models gpt-4o,claude-haiku-4-5-20251001
56
+ ```
57
+
58
+ ## Commands
59
+
60
+ ### run
61
+
62
+ ```bash
63
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5
64
+ assayer run --prompt-file prompt.txt --models gpt-4o,ollama/llama3
65
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5 --score
66
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5 --judge gpt-4o --judge-criteria "clarity,brevity"
67
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5 --output results.json
68
+ assayer run "prompt" --models gpt-4o,claude-sonnet-4-5 --output results.csv
69
+ assayer run "prompt with {var}" --models gpt-4o --var key=value
70
+ ```
71
+
72
+ | Flag | Description |
73
+ |---|---|
74
+ | `--models` | Comma-separated model identifiers (required) |
75
+ | `--prompt-file` | Path to a `.txt` file instead of an inline prompt |
76
+ | `--var` | `KEY=VALUE` template variable, repeatable |
77
+ | `--system` | System prompt applied to all models |
78
+ | `--temperature` | Sampling temperature |
79
+ | `--max-tokens` | Maximum output tokens |
80
+ | `--score` | Show pairwise similarity matrix |
81
+ | `--judge` | Model to use as judge |
82
+ | `--judge-criteria` | Comma-separated criteria for the judge |
83
+ | `--output` | Save results to `.json` or `.csv` |
84
+
85
+ ### models
86
+
87
+ ```bash
88
+ assayer models list # list all supported model identifiers
89
+ assayer models check # check which API keys are configured
90
+ assayer models check ollama # check if Ollama is running and list local models
91
+ ```
92
+
93
+ ### config
94
+
95
+ ```bash
96
+ assayer config set OPENAI_API_KEY sk-...
97
+ assayer config show
98
+ ```
99
+
100
+ Keys are saved to `~/.assayer/config.json`. Environment variables take precedence.
101
+
102
+ ## Providers
103
+
104
+ ### OpenAI
105
+
106
+ ```bash
107
+ export OPENAI_API_KEY=sk-...
108
+ ```
109
+
110
+ Supported models: `gpt-5.5`, `gpt-5.5-pro`, `gpt-5.4`, `gpt-5.4-pro`, `gpt-5.4-mini`, `gpt-5.4-nano`, `gpt-5.2`, `gpt-5`, `gpt-5-mini`, `gpt-5-nano`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4.1-nano`, `gpt-4o`, `gpt-4o-mini`, `o3`, `o3-mini`, `o4-mini`
111
+
112
+ ### Anthropic
113
+
114
+ ```bash
115
+ export ANTHROPIC_API_KEY=sk-ant-...
116
+ ```
117
+
118
+ Supported models: `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-haiku-4-5-20251001`, `claude-opus-4-6`, `claude-sonnet-4-5`, `claude-opus-4-5`
119
+
120
+ ### Google Gemini
121
+
122
+ ```bash
123
+ export GEMINI_API_KEY=...
124
+ ```
125
+
126
+ Supported models: `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite`, `gemini-3-flash-preview`, `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.5-flash-lite`, `gemini-2.0-flash`, `gemini-2.0-flash-lite`
127
+
128
+ ### Ollama (local)
129
+
130
+ No API key needed. Start Ollama and use the `ollama/` prefix:
131
+
132
+ ```bash
133
+ ollama serve
134
+ assayer run "prompt" --models ollama/llama4-scout,ollama/llama3.2,ollama/qwen3
135
+ ```
136
+
137
+ ## Scoring
138
+
139
+ `--score` embeds all outputs using `all-MiniLM-L6-v2` (runs locally, no API call) and displays a pairwise cosine similarity matrix. Values range from 0 (unrelated) to 1 (identical meaning).
140
+
141
+ ## LLM-as-judge
142
+
143
+ `--judge <model>` sends all outputs to the specified model and asks it to pick a winner. Use `--judge-criteria` to focus the evaluation:
144
+
145
+ ```bash
146
+ assayer run "Write a sorting algorithm." \
147
+ --models gpt-4o,claude-sonnet-4-5 \
148
+ --judge gpt-4o \
149
+ --judge-criteria "correctness,readability"
150
+ ```
151
+
152
+ If the judge call fails, a warning is printed to stderr and the run continues normally.
153
+
154
+ ## Export
155
+
156
+ `--output results.json` saves full results as JSON. `--output results.csv` saves as CSV. The file format is determined by the extension.
File without changes
File without changes
@@ -0,0 +1,209 @@
1
+ import asyncio
2
+ import logging
3
+ import sys
4
+
5
+ import click
6
+ import httpx
7
+
8
+ from assayer.config import get_api_key, set_api_key, show_config
9
+
10
+ logging.getLogger("LiteLLM").addFilter(
11
+ type("_F", (logging.Filter,), {"filter": lambda _, r: r.levelno >= logging.ERROR})()
12
+ )
13
+
14
+ _KNOWN_MODELS: dict[str, list[str]] = {
15
+ "openai": [
16
+ "gpt-5.5", "gpt-5.5-pro",
17
+ "gpt-5.4", "gpt-5.4-pro", "gpt-5.4-mini", "gpt-5.4-nano",
18
+ "gpt-5.2", "gpt-5", "gpt-5-mini", "gpt-5-nano",
19
+ "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
20
+ "gpt-4o", "gpt-4o-mini",
21
+ "o3", "o3-mini", "o4-mini",
22
+ ],
23
+ "anthropic": [
24
+ "claude-opus-4-7", "claude-sonnet-4-6", "claude-haiku-4-5-20251001",
25
+ "claude-opus-4-6", "claude-sonnet-4-5", "claude-opus-4-5",
26
+ ],
27
+ "gemini": [
28
+ "gemini-3.1-pro-preview", "gemini-3.1-flash-lite", "gemini-3-flash-preview",
29
+ "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
30
+ "gemini-2.0-flash", "gemini-2.0-flash-lite",
31
+ ],
32
+ "ollama": ["ollama/llama4-scout", "ollama/llama3.2", "ollama/qwen3", "ollama/gemma4", "ollama/mistral", "ollama/deepseek-r1", "ollama/phi4"],
33
+ }
34
+
35
+
36
+ @click.group()
37
+ def cli() -> None:
38
+ pass
39
+
40
+
41
+ @cli.command()
42
+ @click.argument("prompt", required=False)
43
+ @click.option("--models", required=True, help="Comma-separated model identifiers.")
44
+ @click.option(
45
+ "--prompt-file",
46
+ type=click.Path(exists=True),
47
+ help="Path to a .txt prompt file.",
48
+ )
49
+ @click.option(
50
+ "--var",
51
+ multiple=True,
52
+ metavar="KEY=VALUE",
53
+ help="Template variables, repeatable.",
54
+ )
55
+ @click.option("--system", default=None, help="System prompt applied to all models.")
56
+ @click.option("--temperature", type=float, default=None, help="Sampling temperature.")
57
+ @click.option("--max-tokens", type=int, default=None, help="Max output tokens.")
58
+ @click.option("--output", default=None, help="Save results to file (.json or .csv).")
59
+ @click.option("--score", is_flag=True, default=False, help="Show similarity matrix.")
60
+ @click.option("--judge", default=None, help="Model to use as judge.")
61
+ @click.option(
62
+ "--judge-criteria", default=None, help="Comma-separated evaluation criteria."
63
+ )
64
+ def run(
65
+ prompt: str | None,
66
+ models: str,
67
+ prompt_file: str | None,
68
+ var: tuple[str, ...],
69
+ system: str | None,
70
+ temperature: float | None,
71
+ max_tokens: int | None,
72
+ output: str | None,
73
+ score: bool,
74
+ judge: str | None,
75
+ judge_criteria: str | None,
76
+ ) -> None:
77
+ if prompt_file:
78
+ with open(prompt_file) as f:
79
+ prompt_text = f.read().strip()
80
+ elif prompt:
81
+ prompt_text = prompt
82
+ else:
83
+ click.echo("Provide a prompt or --prompt-file.", err=True)
84
+ sys.exit(1)
85
+
86
+ if var:
87
+ variables: dict[str, str] = {}
88
+ for item in var:
89
+ if "=" not in item:
90
+ click.echo(
91
+ f"Invalid --var format: {item!r}. Expected KEY=VALUE.", err=True
92
+ )
93
+ sys.exit(1)
94
+ key, _, value = item.partition("=")
95
+ variables[key.strip()] = value
96
+ try:
97
+ prompt_text = prompt_text.format_map(variables)
98
+ except KeyError as exc:
99
+ click.echo(f"Missing template variable: {exc}", err=True)
100
+ sys.exit(1)
101
+
102
+ from assayer.exporter import export
103
+ from assayer.judge import run_judge
104
+ from assayer.renderer import render_run
105
+ from assayer.runner import run_all
106
+ from assayer.scorer import compute_similarity
107
+
108
+ model_list = [m.strip() for m in models.split(",") if m.strip()]
109
+ results = asyncio.run(
110
+ run_all(
111
+ prompt_text,
112
+ model_list,
113
+ system=system,
114
+ temperature=temperature,
115
+ max_tokens=max_tokens,
116
+ )
117
+ )
118
+ similarity = compute_similarity(results) if score else None
119
+
120
+ criteria = (
121
+ [c.strip() for c in judge_criteria.split(",")] if judge_criteria else None
122
+ )
123
+ judge_result = (
124
+ asyncio.run(run_judge(prompt_text, results, judge, criteria)) if judge else None
125
+ )
126
+
127
+ render_run(prompt_text, results, similarity=similarity, judge_result=judge_result)
128
+
129
+ if output:
130
+ export(results, output)
131
+ click.echo(f"Results saved to {output}")
132
+
133
+
134
+ @cli.group()
135
+ def models_cmd() -> None:
136
+ pass
137
+
138
+
139
+ cli.add_command(models_cmd, name="models")
140
+
141
+
142
+ @models_cmd.command(name="list")
143
+ def models_list() -> None:
144
+ for provider, names in _KNOWN_MODELS.items():
145
+ click.echo(f"\n{provider}")
146
+ for name in names:
147
+ click.echo(f" {name}")
148
+
149
+
150
+ @models_cmd.command(name="check")
151
+ @click.argument("provider", required=False)
152
+ def models_check(provider: str | None) -> None:
153
+ if provider == "ollama":
154
+ _check_ollama()
155
+ return
156
+
157
+ keys = {
158
+ "openai": "OPENAI_API_KEY",
159
+ "anthropic": "ANTHROPIC_API_KEY",
160
+ "gemini": "GEMINI_API_KEY",
161
+ }
162
+ for name, env_var in keys.items():
163
+ value = get_api_key(env_var)
164
+ status = "set" if value else "not set"
165
+ symbol = "+" if value else "-"
166
+ click.echo(f" [{symbol}] {name}: {env_var} {status}")
167
+
168
+
169
+ def _check_ollama() -> None:
170
+ try:
171
+ response = httpx.get("http://localhost:11434/api/tags", timeout=3.0)
172
+ response.raise_for_status()
173
+ data = response.json()
174
+ local_models: list[str] = [m["name"] for m in data.get("models", [])]
175
+ click.echo("Ollama is running.")
176
+ if local_models:
177
+ click.echo("Local models:")
178
+ for m in local_models:
179
+ click.echo(f" ollama/{m}")
180
+ else:
181
+ click.echo("No local models found.")
182
+ except httpx.ConnectError:
183
+ click.echo("Ollama is not running at localhost:11434.", err=True)
184
+ except Exception as exc:
185
+ click.echo(f"Ollama check failed: {exc}", err=True)
186
+
187
+
188
+ @cli.group()
189
+ def config() -> None:
190
+ pass
191
+
192
+
193
+ @config.command(name="set")
194
+ @click.argument("key")
195
+ @click.argument("value")
196
+ def config_set(key: str, value: str) -> None:
197
+ set_api_key(key, value)
198
+ click.echo(f"{key} saved.")
199
+
200
+
201
+ @config.command(name="show")
202
+ def config_show() -> None:
203
+ data = show_config()
204
+ for key, value in data.items():
205
+ if value:
206
+ masked = value[:8] + "..." if len(value) > 8 else value
207
+ click.echo(f" {key}: {masked}")
208
+ else:
209
+ click.echo(f" {key}: not set")
@@ -0,0 +1,30 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ _CONFIG_PATH = Path.home() / ".assayer" / "config.json"
6
+
7
+ _KNOWN_KEYS = ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GEMINI_API_KEY")
8
+
9
+
10
+ def get_api_key(name: str) -> str | None:
11
+ value = os.environ.get(name)
12
+ if value:
13
+ return value
14
+ if _CONFIG_PATH.exists():
15
+ data: dict[str, str] = json.loads(_CONFIG_PATH.read_text())
16
+ return data.get(name)
17
+ return None
18
+
19
+
20
+ def set_api_key(name: str, value: str) -> None:
21
+ _CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
22
+ data: dict[str, str] = {}
23
+ if _CONFIG_PATH.exists():
24
+ data = json.loads(_CONFIG_PATH.read_text())
25
+ data[name] = value
26
+ _CONFIG_PATH.write_text(json.dumps(data, indent=2))
27
+
28
+
29
+ def show_config() -> dict[str, str | None]:
30
+ return {key: get_api_key(key) for key in _KNOWN_KEYS}
@@ -0,0 +1,32 @@
1
+ import csv
2
+ import json
3
+ from pathlib import Path
4
+
5
+ from assayer.models import ModelResult
6
+
7
+
8
+ def _to_dict(result: ModelResult) -> dict:
9
+ return {
10
+ "model": result.model,
11
+ "output": result.output,
12
+ "tokens_input": result.tokens_input,
13
+ "tokens_output": result.tokens_output,
14
+ "latency_seconds": result.latency_seconds,
15
+ "cost_usd": result.cost_usd,
16
+ "error": result.error,
17
+ }
18
+
19
+
20
+ def export(results: list[ModelResult], path: str) -> None:
21
+ dest = Path(path)
22
+ records = [_to_dict(r) for r in results]
23
+
24
+ if dest.suffix.lower() == ".csv":
25
+ with dest.open("w", newline="", encoding="utf-8") as f:
26
+ writer = csv.DictWriter(f, fieldnames=list(records[0].keys()))
27
+ writer.writeheader()
28
+ writer.writerows(records)
29
+ else:
30
+ dest.write_text(
31
+ json.dumps(records, indent=2, ensure_ascii=False), encoding="utf-8"
32
+ )