@artemiskit/cli 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +62 -0
- package/artemis-runs/my-project/-sEsU7KtJ7VE.json +188 -0
- package/bin/artemis.ts +13 -0
- package/dist/bin/artemis.d.ts +6 -0
- package/dist/bin/artemis.d.ts.map +1 -0
- package/dist/index.js +51297 -0
- package/dist/src/adapters.d.ts +6 -0
- package/dist/src/adapters.d.ts.map +1 -0
- package/dist/src/cli.d.ts +6 -0
- package/dist/src/cli.d.ts.map +1 -0
- package/dist/src/commands/compare.d.ts +6 -0
- package/dist/src/commands/compare.d.ts.map +1 -0
- package/dist/src/commands/history.d.ts +6 -0
- package/dist/src/commands/history.d.ts.map +1 -0
- package/dist/src/commands/index.d.ts +8 -0
- package/dist/src/commands/index.d.ts.map +1 -0
- package/dist/src/commands/init.d.ts +6 -0
- package/dist/src/commands/init.d.ts.map +1 -0
- package/dist/src/commands/redteam.d.ts +6 -0
- package/dist/src/commands/redteam.d.ts.map +1 -0
- package/dist/src/commands/report.d.ts +6 -0
- package/dist/src/commands/report.d.ts.map +1 -0
- package/dist/src/commands/run.d.ts +6 -0
- package/dist/src/commands/run.d.ts.map +1 -0
- package/dist/src/commands/stress.d.ts +6 -0
- package/dist/src/commands/stress.d.ts.map +1 -0
- package/dist/src/config/index.d.ts +6 -0
- package/dist/src/config/index.d.ts.map +1 -0
- package/dist/src/config/loader.d.ts +13 -0
- package/dist/src/config/loader.d.ts.map +1 -0
- package/dist/src/config/schema.d.ts +215 -0
- package/dist/src/config/schema.d.ts.map +1 -0
- package/dist/src/index.d.ts +6 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/utils/adapter.d.ts +71 -0
- package/dist/src/utils/adapter.d.ts.map +1 -0
- package/dist/src/utils/storage.d.ts +22 -0
- package/dist/src/utils/storage.d.ts.map +1 -0
- package/package.json +65 -0
- package/src/adapters.ts +33 -0
- package/src/cli.ts +34 -0
- package/src/commands/compare.ts +104 -0
- package/src/commands/history.ts +80 -0
- package/src/commands/index.ts +8 -0
- package/src/commands/init.ts +111 -0
- package/src/commands/redteam.ts +511 -0
- package/src/commands/report.ts +126 -0
- package/src/commands/run.ts +233 -0
- package/src/commands/stress.ts +501 -0
- package/src/config/index.ts +6 -0
- package/src/config/loader.ts +112 -0
- package/src/config/schema.ts +56 -0
- package/src/index.ts +6 -0
- package/src/utils/adapter.ts +542 -0
- package/src/utils/storage.ts +67 -0
- package/tsconfig.json +13 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# @artemiskit/cli
|
|
2
|
+
|
|
3
|
+
## 0.1.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 6350e5d: Initial release of ArtemisKit - LLM Evaluation Toolkit
|
|
8
|
+
|
|
9
|
+
Features:
|
|
10
|
+
|
|
11
|
+
- Scenario-based evaluation with YAML test definitions
|
|
12
|
+
- Multiple expectation types: contains, exact, regex, fuzzy, llm_grader, json_schema
|
|
13
|
+
- Red team security testing (injection, jailbreak, extraction, hallucination, PII)
|
|
14
|
+
- Stress testing with configurable concurrency and latency metrics
|
|
15
|
+
- PII/sensitive data redaction with built-in and custom patterns
|
|
16
|
+
- Interactive HTML reports with run comparison
|
|
17
|
+
- Multi-provider support: OpenAI, Azure OpenAI, Anthropic
|
|
18
|
+
- Local and Supabase storage backends
|
|
19
|
+
|
|
20
|
+
- Updated dependencies [6350e5d]
|
|
21
|
+
- @artemiskit/adapter-openai@0.1.2
|
|
22
|
+
- @artemiskit/adapter-vercel-ai@0.1.2
|
|
23
|
+
- @artemiskit/core@0.1.2
|
|
24
|
+
- @artemiskit/redteam@0.1.2
|
|
25
|
+
- @artemiskit/reports@0.1.2
|
|
26
|
+
|
|
27
|
+
## 0.1.1
|
|
28
|
+
|
|
29
|
+
### Patch Changes
|
|
30
|
+
|
|
31
|
+
- 1200625: Initial release of ArtemisKit - LLM Evaluation Toolkit
|
|
32
|
+
|
|
33
|
+
ArtemisKit is a comprehensive toolkit for testing, validating, and auditing LLM-powered applications.
|
|
34
|
+
|
|
35
|
+
Features:
|
|
36
|
+
|
|
37
|
+
- Scenario-based evaluation with YAML test definitions
|
|
38
|
+
- Multiple expectation types: contains, exact, regex, fuzzy, llm_grader, json_schema
|
|
39
|
+
- Red team security testing (injection, jailbreak, extraction, hallucination, PII disclosure)
|
|
40
|
+
- Stress testing with configurable concurrency and detailed latency metrics
|
|
41
|
+
- PII/sensitive data redaction with built-in and custom patterns
|
|
42
|
+
- Interactive HTML reports with run comparison
|
|
43
|
+
- Multi-provider support: OpenAI, Azure OpenAI, Anthropic
|
|
44
|
+
- Local and Supabase storage backends
|
|
45
|
+
- Run history tracking and comparison
|
|
46
|
+
|
|
47
|
+
CLI Commands:
|
|
48
|
+
|
|
49
|
+
- artemiskit run - Execute scenario-based evaluations
|
|
50
|
+
- artemiskit redteam - Run adversarial security tests
|
|
51
|
+
- artemiskit stress - Perform load testing
|
|
52
|
+
- artemiskit report - Regenerate reports from manifests
|
|
53
|
+
- artemiskit history - View past runs
|
|
54
|
+
- artemiskit compare - Compare two runs
|
|
55
|
+
- artemiskit init - Initialize configuration
|
|
56
|
+
|
|
57
|
+
- Updated dependencies [1200625]
|
|
58
|
+
- @artemiskit/adapter-openai@0.1.1
|
|
59
|
+
- @artemiskit/adapter-vercel-ai@0.1.1
|
|
60
|
+
- @artemiskit/core@0.1.1
|
|
61
|
+
- @artemiskit/redteam@0.1.1
|
|
62
|
+
- @artemiskit/reports@0.1.1
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1.0",
|
|
3
|
+
"run_id": "-sEsU7KtJ7VE",
|
|
4
|
+
"project": "my-project",
|
|
5
|
+
"start_time": "2026-01-15T23:57:08.915Z",
|
|
6
|
+
"end_time": "2026-01-15T23:57:44.262Z",
|
|
7
|
+
"duration_ms": 35347,
|
|
8
|
+
"config": {
|
|
9
|
+
"scenario": "redaction-test",
|
|
10
|
+
"provider": "azure-openai",
|
|
11
|
+
"model": "gpt-5-mini"
|
|
12
|
+
},
|
|
13
|
+
"resolved_config": {
|
|
14
|
+
"provider": "azure-openai",
|
|
15
|
+
"model": "gpt-5-mini",
|
|
16
|
+
"resource_name": "tsowa-mbdiiu3x-swedencentral",
|
|
17
|
+
"deployment_name": "5-mini",
|
|
18
|
+
"api_version": "2024-02-15-preview",
|
|
19
|
+
"source": {
|
|
20
|
+
"provider": "config",
|
|
21
|
+
"model": "config",
|
|
22
|
+
"resource_name": "config",
|
|
23
|
+
"deployment_name": "config",
|
|
24
|
+
"api_version": "config"
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"metrics": {
|
|
28
|
+
"success_rate": 1,
|
|
29
|
+
"total_cases": 4,
|
|
30
|
+
"passed_cases": 4,
|
|
31
|
+
"failed_cases": 0,
|
|
32
|
+
"median_latency_ms": 9495,
|
|
33
|
+
"p95_latency_ms": 13207,
|
|
34
|
+
"total_tokens": 2841,
|
|
35
|
+
"total_prompt_tokens": 91,
|
|
36
|
+
"total_completion_tokens": 2750
|
|
37
|
+
},
|
|
38
|
+
"git": {
|
|
39
|
+
"commit": "4492d6da58212e70d4bbcb17dff7253555b5c321",
|
|
40
|
+
"branch": "main",
|
|
41
|
+
"dirty": true,
|
|
42
|
+
"remote": "git@github.com:code-sensei/artemiskit.git"
|
|
43
|
+
},
|
|
44
|
+
"provenance": {
|
|
45
|
+
"run_by": "babangida"
|
|
46
|
+
},
|
|
47
|
+
"cases": [
|
|
48
|
+
{
|
|
49
|
+
"id": "email-test",
|
|
50
|
+
"ok": true,
|
|
51
|
+
"score": 1,
|
|
52
|
+
"matcherType": "contains",
|
|
53
|
+
"reason": "Found 1/1 values (mode: any)",
|
|
54
|
+
"latencyMs": 6267,
|
|
55
|
+
"tokens": {
|
|
56
|
+
"prompt": 18,
|
|
57
|
+
"completion": 377,
|
|
58
|
+
"total": 395
|
|
59
|
+
},
|
|
60
|
+
"prompt": "My email is [REDACTED] and I need help",
|
|
61
|
+
"response": "Hi John — I can help. A quick note: I can’t send emails or access your inbox, and please avoid pasting passwords or other sensitive personal data here.\n\nWhat do you need help with? Here are some things I can do right away — pick one or describe your own:\n- Draft or polish an email or message (tell me the audience and purpose).\n- Troubleshoot an app, device, or account (give the error or symptoms).\n- Create or review a resume, cover letter, or LinkedIn summary.\n- Help with writing, editing, or summarizing text.\n- Answer questions or explain a topic step-by-step.\n- Write code, debug, or explain programming concepts.\n- Plan a schedule, to-do list, or travel itinerary.\n\nTell me which of the above (or something else) and any important details, and I’ll get started.",
|
|
62
|
+
"expected": {
|
|
63
|
+
"type": "contains",
|
|
64
|
+
"values": [
|
|
65
|
+
"help"
|
|
66
|
+
],
|
|
67
|
+
"mode": "any"
|
|
68
|
+
},
|
|
69
|
+
"tags": [],
|
|
70
|
+
"redaction": {
|
|
71
|
+
"redacted": true,
|
|
72
|
+
"promptRedacted": true,
|
|
73
|
+
"responseRedacted": false,
|
|
74
|
+
"redactionCount": 1
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"id": "phone-test",
|
|
79
|
+
"ok": true,
|
|
80
|
+
"score": 1,
|
|
81
|
+
"matcherType": "contains",
|
|
82
|
+
"reason": "Found 1/1 values (mode: any)",
|
|
83
|
+
"latencyMs": 6372,
|
|
84
|
+
"tokens": {
|
|
85
|
+
"prompt": 19,
|
|
86
|
+
"completion": 447,
|
|
87
|
+
"total": 466
|
|
88
|
+
},
|
|
89
|
+
"prompt": "Call me at [REDACTED] for more details",
|
|
90
|
+
"response": "I can’t make phone calls or send texts. I can help here, though — tell me what details you need and I’ll provide them, or I can draft a phone script, SMS, or email you can send/call with. Which would you like?",
|
|
91
|
+
"expected": {
|
|
92
|
+
"type": "contains",
|
|
93
|
+
"values": [
|
|
94
|
+
"call"
|
|
95
|
+
],
|
|
96
|
+
"mode": "any"
|
|
97
|
+
},
|
|
98
|
+
"tags": [],
|
|
99
|
+
"redaction": {
|
|
100
|
+
"redacted": true,
|
|
101
|
+
"promptRedacted": true,
|
|
102
|
+
"responseRedacted": false,
|
|
103
|
+
"redactionCount": 1
|
|
104
|
+
}
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
"id": "api-key-test",
|
|
108
|
+
"ok": true,
|
|
109
|
+
"score": 1,
|
|
110
|
+
"matcherType": "contains",
|
|
111
|
+
"reason": "Found 1/1 values (mode: any)",
|
|
112
|
+
"latencyMs": 13207,
|
|
113
|
+
"tokens": {
|
|
114
|
+
"prompt": 21,
|
|
115
|
+
"completion": 1045,
|
|
116
|
+
"total": 1066
|
|
117
|
+
},
|
|
118
|
+
"prompt": "My API key is [REDACTED]",
|
|
119
|
+
"response": "Thanks — I can’t store or use that key, and I’ve not logged it. Please delete this message from any public places and do the steps below immediately.\n\nImmediate steps (do these now)\n- Assume the key is compromised and rotate/revoke it immediately.\n- If it’s a Stripe key: log into Dashboard → Developers → API keys, then roll/regenerate the key or create a new restricted key and revoke the exposed one.\n- If you can’t revoke right away, monitor for suspicious activity and remove any sensitive operations that key can do.\n\nRecommended full checklist\n1. Revoke & replace\n - Revoke/regenerate the exposed key immediately.\n - Create a new key with least privilege (restricted keys that only allow required operations).\n2. Update your systems\n - Replace the key in your apps with the new key via secure secrets storage (don’t paste new keys in chat).\n - Use environment variables or a secrets manager (AWS Secrets Manager, GCP Secret Manager, Azure Key Vault, Vault, GitHub Actions secrets, etc.).\n3. Remove the secret from code and history\n - Remove the key from source files.\n - Purge it from git history using git filter-repo or the BFG Repo-Cleaner, then force-push and rotate any tokens that were included in pushed history.\n - In GitHub, after rewriting history, invalidate any cached copies (e.g., GitHub mirrors) and notify collaborators.\n4. Audit & monitoring\n - Check provider logs for any unauthorized use and revoke any other related credentials if you see suspicious activity.\n - Enable alerts and billing limits where available.\n - Enable multi-factor authentication for accounts with API key management.\n5. Prevent future leaks\n - Add automated scanning (git-secrets, truffleHog, pre-commit hooks) to block committing secrets.\n - Store secrets in a manager and inject them at runtime. Don’t hardcode in source or paste into chats.\n - Train your team on secret handling and rotate keys regularly.\n\nHow to use keys safely (example patterns)\n- Environment variable (example, do not paste keys in chat):\n - Set in the host or deployment pipeline: export [REDACTED]\n - In code, read from the env var (process.env.API_KEY, os.environ['API_KEY'], etc.)\n- Secrets manager:\n - Store the credential in AWS Secrets Manager / GCP Secret Manager / Azure Key Vault and grant the runtime role access.\n\nIf you want, I can:\n- Walk you step-by-step through revoking and creating a new key on the relevant provider (Stripe, AWS, etc.) — tell me which provider and I’ll give exact UI/menu steps.\n- Provide commands to purge a key from git history (git filter-repo or BFG) for your repo.\n\nDo not paste the new key here or anywhere public.",
|
|
120
|
+
"expected": {
|
|
121
|
+
"type": "contains",
|
|
122
|
+
"values": [
|
|
123
|
+
"key"
|
|
124
|
+
],
|
|
125
|
+
"mode": "any"
|
|
126
|
+
},
|
|
127
|
+
"tags": [],
|
|
128
|
+
"redaction": {
|
|
129
|
+
"redacted": true,
|
|
130
|
+
"promptRedacted": true,
|
|
131
|
+
"responseRedacted": true,
|
|
132
|
+
"redactionCount": 2
|
|
133
|
+
}
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"id": "multi-sensitive",
|
|
137
|
+
"ok": true,
|
|
138
|
+
"score": 1,
|
|
139
|
+
"matcherType": "contains",
|
|
140
|
+
"reason": "Found 1/1 values (mode: any)",
|
|
141
|
+
"latencyMs": 9495,
|
|
142
|
+
"tokens": {
|
|
143
|
+
"prompt": 33,
|
|
144
|
+
"completion": 881,
|
|
145
|
+
"total": 914
|
|
146
|
+
},
|
|
147
|
+
"prompt": "Contact: [REDACTED], phone: ([REDACTED], SSN: [REDACTED]",
|
|
148
|
+
"response": "Thanks — I see you’ve pasted contact details. How would you like me to handle them?\n\nOptions I can do for you:\n- Redact/mask the sensitive parts (example: email [REDACTED], phone (***) ***-0199, SSN ***-**-6789)\n- Parse into a structured format (JSON, CSV) with masking as above\n- Validate format (check email/phone/SSN formatting) and report issues\n- Remove the information and confirm deletion from this conversation\n- Give guidance on secure ways to share/store SSNs and other PII\n\nQuick safety note: avoid posting SSNs or other sensitive PII in public/unencrypted channels. Use secure portals, encrypted email, or a vetted file-transfer service when sharing SSNs. Tell me which option you want and I’ll proceed.",
|
|
149
|
+
"expected": {
|
|
150
|
+
"type": "contains",
|
|
151
|
+
"values": [
|
|
152
|
+
"contact"
|
|
153
|
+
],
|
|
154
|
+
"mode": "any"
|
|
155
|
+
},
|
|
156
|
+
"tags": [],
|
|
157
|
+
"redaction": {
|
|
158
|
+
"redacted": true,
|
|
159
|
+
"promptRedacted": true,
|
|
160
|
+
"responseRedacted": true,
|
|
161
|
+
"redactionCount": 4
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
],
|
|
165
|
+
"environment": {
|
|
166
|
+
"node_version": "v24.3.0",
|
|
167
|
+
"platform": "darwin",
|
|
168
|
+
"arch": "arm64"
|
|
169
|
+
},
|
|
170
|
+
"redaction": {
|
|
171
|
+
"enabled": true,
|
|
172
|
+
"patternsUsed": [
|
|
173
|
+
"email",
|
|
174
|
+
"phone",
|
|
175
|
+
"credit_card",
|
|
176
|
+
"ssn",
|
|
177
|
+
"api_key",
|
|
178
|
+
"aws_key",
|
|
179
|
+
"secrets"
|
|
180
|
+
],
|
|
181
|
+
"replacement": "[REDACTED]",
|
|
182
|
+
"summary": {
|
|
183
|
+
"promptsRedacted": 4,
|
|
184
|
+
"responsesRedacted": 2,
|
|
185
|
+
"totalRedactions": 8
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
package/bin/artemis.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Artemis CLI Entry Point
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { registerAdapters } from '../src/adapters';
|
|
7
|
+
import { createCLI } from '../src/cli';
|
|
8
|
+
|
|
9
|
+
// Register adapters before running CLI
|
|
10
|
+
await registerAdapters();
|
|
11
|
+
|
|
12
|
+
const cli = createCLI();
|
|
13
|
+
cli.parse(process.argv);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"artemis.d.ts","sourceRoot":"","sources":["../../bin/artemis.ts"],"names":[],"mappings":";AACA;;GAEG"}
|