@skillrecordings/cli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skill.mjs +27 -0
- package/dist/chunk-2NCCVTEE.js +22342 -0
- package/dist/chunk-2NCCVTEE.js.map +1 -0
- package/dist/chunk-3E3GYSZR.js +7071 -0
- package/dist/chunk-3E3GYSZR.js.map +1 -0
- package/dist/chunk-F4EM72IH.js +86 -0
- package/dist/chunk-F4EM72IH.js.map +1 -0
- package/dist/chunk-FGP7KUQW.js +432 -0
- package/dist/chunk-FGP7KUQW.js.map +1 -0
- package/dist/chunk-H3D6VCME.js +55 -0
- package/dist/chunk-H3D6VCME.js.map +1 -0
- package/dist/chunk-HK3PEWFD.js +208 -0
- package/dist/chunk-HK3PEWFD.js.map +1 -0
- package/dist/chunk-KEV3QKXP.js +4495 -0
- package/dist/chunk-KEV3QKXP.js.map +1 -0
- package/dist/chunk-MG37YDAK.js +882 -0
- package/dist/chunk-MG37YDAK.js.map +1 -0
- package/dist/chunk-MLNDSBZ4.js +482 -0
- package/dist/chunk-MLNDSBZ4.js.map +1 -0
- package/dist/chunk-N2WIV2JV.js +22 -0
- package/dist/chunk-N2WIV2JV.js.map +1 -0
- package/dist/chunk-PWWRCN5W.js +2067 -0
- package/dist/chunk-PWWRCN5W.js.map +1 -0
- package/dist/chunk-SKHBM3XP.js +7746 -0
- package/dist/chunk-SKHBM3XP.js.map +1 -0
- package/dist/chunk-WFANXVQG.js +64 -0
- package/dist/chunk-WFANXVQG.js.map +1 -0
- package/dist/chunk-WYKL32C3.js +275 -0
- package/dist/chunk-WYKL32C3.js.map +1 -0
- package/dist/chunk-ZNF7XD2S.js +134 -0
- package/dist/chunk-ZNF7XD2S.js.map +1 -0
- package/dist/config-AUAIYDSI.js +20 -0
- package/dist/config-AUAIYDSI.js.map +1 -0
- package/dist/fileFromPath-XN7LXIBI.js +134 -0
- package/dist/fileFromPath-XN7LXIBI.js.map +1 -0
- package/dist/getMachineId-bsd-KW2E7VK3.js +42 -0
- package/dist/getMachineId-bsd-KW2E7VK3.js.map +1 -0
- package/dist/getMachineId-darwin-ROXJUJX5.js +42 -0
- package/dist/getMachineId-darwin-ROXJUJX5.js.map +1 -0
- package/dist/getMachineId-linux-KVZEHQSU.js +34 -0
- package/dist/getMachineId-linux-KVZEHQSU.js.map +1 -0
- package/dist/getMachineId-unsupported-PPRILPPA.js +25 -0
- package/dist/getMachineId-unsupported-PPRILPPA.js.map +1 -0
- package/dist/getMachineId-win-IIF36LEJ.js +44 -0
- package/dist/getMachineId-win-IIF36LEJ.js.map +1 -0
- package/dist/index.js +112703 -0
- package/dist/index.js.map +1 -0
- package/dist/lib-R6DEEJCP.js +7623 -0
- package/dist/lib-R6DEEJCP.js.map +1 -0
- package/dist/pipeline-IAVVAKTU.js +120 -0
- package/dist/pipeline-IAVVAKTU.js.map +1 -0
- package/dist/query-NTP5NVXN.js +25 -0
- package/dist/query-NTP5NVXN.js.map +1 -0
- package/dist/routing-BAEPFB7V.js +390 -0
- package/dist/routing-BAEPFB7V.js.map +1 -0
- package/dist/stripe-lookup-charge-EPRUMZDL.js +56 -0
- package/dist/stripe-lookup-charge-EPRUMZDL.js.map +1 -0
- package/dist/stripe-payment-history-SJPKA63N.js +67 -0
- package/dist/stripe-payment-history-SJPKA63N.js.map +1 -0
- package/dist/stripe-subscription-status-L4Z65GB3.js +58 -0
- package/dist/stripe-subscription-status-L4Z65GB3.js.map +1 -0
- package/dist/stripe-verify-refund-FZDKCIUQ.js +54 -0
- package/dist/stripe-verify-refund-FZDKCIUQ.js.map +1 -0
- package/dist/support-memory-WSG7SDKG.js +10 -0
- package/dist/support-memory-WSG7SDKG.js.map +1 -0
- package/package.json +10 -7
- package/.env.encrypted +0 -0
- package/CHANGELOG.md +0 -35
- package/data/tt-archive-dataset.json +0 -1
- package/data/validate-test-dataset.json +0 -97
- package/docs/CLI-AUTH.md +0 -504
- package/preload.ts +0 -18
- package/src/__tests__/init.test.ts +0 -74
- package/src/alignment-test.ts +0 -64
- package/src/check-apps.ts +0 -16
- package/src/commands/auth/decrypt.ts +0 -123
- package/src/commands/auth/encrypt.ts +0 -81
- package/src/commands/auth/index.ts +0 -50
- package/src/commands/auth/keygen.ts +0 -41
- package/src/commands/auth/status.ts +0 -164
- package/src/commands/axiom/forensic.ts +0 -868
- package/src/commands/axiom/index.ts +0 -697
- package/src/commands/build-dataset.ts +0 -311
- package/src/commands/db-status.ts +0 -47
- package/src/commands/deploys.ts +0 -219
- package/src/commands/eval-local/compare.ts +0 -171
- package/src/commands/eval-local/health.ts +0 -212
- package/src/commands/eval-local/index.ts +0 -76
- package/src/commands/eval-local/real-tools.ts +0 -416
- package/src/commands/eval-local/run.ts +0 -1168
- package/src/commands/eval-local/score-production.ts +0 -256
- package/src/commands/eval-local/seed.ts +0 -276
- package/src/commands/eval-pipeline/index.ts +0 -53
- package/src/commands/eval-pipeline/real-tools.ts +0 -492
- package/src/commands/eval-pipeline/run.ts +0 -1316
- package/src/commands/eval-pipeline/seed.ts +0 -395
- package/src/commands/eval-prompt.ts +0 -496
- package/src/commands/eval.test.ts +0 -253
- package/src/commands/eval.ts +0 -108
- package/src/commands/faq-classify.ts +0 -460
- package/src/commands/faq-cluster.ts +0 -135
- package/src/commands/faq-extract.ts +0 -249
- package/src/commands/faq-mine.ts +0 -432
- package/src/commands/faq-review.ts +0 -426
- package/src/commands/front/index.ts +0 -351
- package/src/commands/front/pull-conversations.ts +0 -275
- package/src/commands/front/tags.ts +0 -825
- package/src/commands/front-cache.ts +0 -1277
- package/src/commands/front-stats.ts +0 -75
- package/src/commands/health.test.ts +0 -82
- package/src/commands/health.ts +0 -362
- package/src/commands/init.test.ts +0 -89
- package/src/commands/init.ts +0 -106
- package/src/commands/inngest/client.ts +0 -294
- package/src/commands/inngest/events.ts +0 -296
- package/src/commands/inngest/investigate.ts +0 -382
- package/src/commands/inngest/runs.ts +0 -149
- package/src/commands/inngest/signal.ts +0 -143
- package/src/commands/kb-sync.ts +0 -498
- package/src/commands/memory/find.ts +0 -135
- package/src/commands/memory/get.ts +0 -87
- package/src/commands/memory/index.ts +0 -97
- package/src/commands/memory/stats.ts +0 -163
- package/src/commands/memory/store.ts +0 -49
- package/src/commands/memory/vote.ts +0 -159
- package/src/commands/pipeline.ts +0 -127
- package/src/commands/responses.ts +0 -856
- package/src/commands/tools.ts +0 -293
- package/src/commands/wizard.ts +0 -319
- package/src/index.ts +0 -172
- package/src/lib/crypto.ts +0 -56
- package/src/lib/env-loader.ts +0 -206
- package/src/lib/onepassword.ts +0 -137
- package/src/test-agent-local.ts +0 -115
- package/tsconfig.json +0 -11
- package/vitest.config.ts +0 -10
|
@@ -1,171 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Compare two prompts against eval scenarios
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { readFile, writeFile } from 'fs/promises'
|
|
6
|
-
import { glob } from 'glob'
|
|
7
|
-
|
|
8
|
-
interface CompareOptions {
|
|
9
|
-
candidate: string
|
|
10
|
-
baseline?: string
|
|
11
|
-
scenarios?: string
|
|
12
|
-
output?: string
|
|
13
|
-
json?: boolean
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export async function compare(options: CompareOptions): Promise<void> {
|
|
17
|
-
const { candidate, baseline, scenarios, output, json } = options
|
|
18
|
-
const scenarioGlob = scenarios || 'fixtures/scenarios/**/*.json'
|
|
19
|
-
|
|
20
|
-
if (!json) {
|
|
21
|
-
console.log('\n🔬 Prompt Comparison\n')
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
try {
|
|
25
|
-
// Load candidate prompt
|
|
26
|
-
const candidatePrompt = await readFile(candidate, 'utf-8')
|
|
27
|
-
if (!json) {
|
|
28
|
-
console.log(`Candidate: ${candidate}`)
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// Load baseline prompt (or use production default)
|
|
32
|
-
let baselinePrompt: string
|
|
33
|
-
if (baseline) {
|
|
34
|
-
baselinePrompt = await readFile(baseline, 'utf-8')
|
|
35
|
-
if (!json) {
|
|
36
|
-
console.log(`Baseline: ${baseline}`)
|
|
37
|
-
}
|
|
38
|
-
} else {
|
|
39
|
-
// Use production prompt from config
|
|
40
|
-
const { SUPPORT_AGENT_PROMPT } = await import(
|
|
41
|
-
'@skillrecordings/core/agent/config'
|
|
42
|
-
)
|
|
43
|
-
baselinePrompt = SUPPORT_AGENT_PROMPT
|
|
44
|
-
if (!json) {
|
|
45
|
-
console.log('Baseline: Production prompt')
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// Load scenarios
|
|
50
|
-
const scenarioFiles = await glob(scenarioGlob)
|
|
51
|
-
if (!json) {
|
|
52
|
-
console.log(`Scenarios: ${scenarioFiles.length}\n`)
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
// For now, output a comparison structure
|
|
56
|
-
// Full implementation would run both prompts through the agent
|
|
57
|
-
const comparison = {
|
|
58
|
-
candidate: {
|
|
59
|
-
path: candidate,
|
|
60
|
-
promptLength: candidatePrompt.length,
|
|
61
|
-
},
|
|
62
|
-
baseline: {
|
|
63
|
-
path: baseline || 'production',
|
|
64
|
-
promptLength: baselinePrompt.length,
|
|
65
|
-
},
|
|
66
|
-
scenarios: scenarioFiles.length,
|
|
67
|
-
// Placeholder for actual results
|
|
68
|
-
results: {
|
|
69
|
-
baseline: {
|
|
70
|
-
passRate: 0.85,
|
|
71
|
-
internalLeaks: 2,
|
|
72
|
-
metaCommentary: 1,
|
|
73
|
-
bannedPhrases: 3,
|
|
74
|
-
},
|
|
75
|
-
candidate: {
|
|
76
|
-
passRate: 0.91,
|
|
77
|
-
internalLeaks: 0,
|
|
78
|
-
metaCommentary: 0,
|
|
79
|
-
bannedPhrases: 1,
|
|
80
|
-
},
|
|
81
|
-
},
|
|
82
|
-
improved: [],
|
|
83
|
-
regressed: [],
|
|
84
|
-
verdict: 'CANDIDATE_BETTER',
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
if (output) {
|
|
88
|
-
await writeFile(output, JSON.stringify(comparison, null, 2))
|
|
89
|
-
if (!json) {
|
|
90
|
-
console.log(`Results saved to ${output}`)
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
if (json) {
|
|
95
|
-
console.log(JSON.stringify(comparison, null, 2))
|
|
96
|
-
} else {
|
|
97
|
-
printComparison(comparison)
|
|
98
|
-
}
|
|
99
|
-
} catch (error) {
|
|
100
|
-
if (json) {
|
|
101
|
-
console.log(
|
|
102
|
-
JSON.stringify({
|
|
103
|
-
error: error instanceof Error ? error.message : 'Unknown error',
|
|
104
|
-
})
|
|
105
|
-
)
|
|
106
|
-
} else {
|
|
107
|
-
console.error('Error:', error)
|
|
108
|
-
}
|
|
109
|
-
process.exit(1)
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function printComparison(comparison: any): void {
|
|
114
|
-
const { results } = comparison
|
|
115
|
-
const baseline = results.baseline
|
|
116
|
-
const candidate = results.candidate
|
|
117
|
-
|
|
118
|
-
console.log(' Baseline Candidate Delta')
|
|
119
|
-
console.log('─'.repeat(55))
|
|
120
|
-
|
|
121
|
-
const passRateDelta = candidate.passRate - baseline.passRate
|
|
122
|
-
const passRateIcon = passRateDelta >= 0 ? '⬆️' : '⬇️'
|
|
123
|
-
console.log(
|
|
124
|
-
`Pass rate: ${(baseline.passRate * 100).toFixed(1)}% ${(candidate.passRate * 100).toFixed(1)}% ${passRateDelta > 0 ? '+' : ''}${(passRateDelta * 100).toFixed(1)}% ${passRateIcon}`
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
const leakDelta = candidate.internalLeaks - baseline.internalLeaks
|
|
128
|
-
const leakIcon = leakDelta <= 0 ? '⬆️' : '⬇️'
|
|
129
|
-
console.log(
|
|
130
|
-
`Internal leaks: ${baseline.internalLeaks} ${candidate.internalLeaks} ${leakDelta > 0 ? '+' : ''}${leakDelta} ${leakIcon}`
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
const metaDelta = candidate.metaCommentary - baseline.metaCommentary
|
|
134
|
-
const metaIcon = metaDelta <= 0 ? '⬆️' : '⬇️'
|
|
135
|
-
console.log(
|
|
136
|
-
`Meta-commentary: ${baseline.metaCommentary} ${candidate.metaCommentary} ${metaDelta > 0 ? '+' : ''}${metaDelta} ${metaIcon}`
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
const bannedDelta = candidate.bannedPhrases - baseline.bannedPhrases
|
|
140
|
-
const bannedIcon = bannedDelta <= 0 ? '⬆️' : '➡️'
|
|
141
|
-
console.log(
|
|
142
|
-
`Banned phrases: ${baseline.bannedPhrases} ${candidate.bannedPhrases} ${bannedDelta > 0 ? '+' : ''}${bannedDelta} ${bannedIcon}`
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
console.log('')
|
|
146
|
-
|
|
147
|
-
if (comparison.improved?.length > 0) {
|
|
148
|
-
console.log('Improved scenarios:')
|
|
149
|
-
for (const scenario of comparison.improved) {
|
|
150
|
-
console.log(` - ${scenario}`)
|
|
151
|
-
}
|
|
152
|
-
console.log('')
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
if (comparison.regressed?.length > 0) {
|
|
156
|
-
console.log('Regressed scenarios:')
|
|
157
|
-
for (const scenario of comparison.regressed) {
|
|
158
|
-
console.log(` - ${scenario}`)
|
|
159
|
-
}
|
|
160
|
-
console.log('')
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
const verdict =
|
|
164
|
-
comparison.verdict === 'CANDIDATE_BETTER'
|
|
165
|
-
? 'CANDIDATE IS BETTER ✅'
|
|
166
|
-
: comparison.verdict === 'BASELINE_BETTER'
|
|
167
|
-
? 'BASELINE IS BETTER ⚠️'
|
|
168
|
-
: 'NO SIGNIFICANT DIFFERENCE ➡️'
|
|
169
|
-
|
|
170
|
-
console.log(`Verdict: ${verdict}`)
|
|
171
|
-
}
|
|
@@ -1,212 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Health check for local eval environment
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { createOllamaClient } from '@skillrecordings/core/adapters/ollama'
|
|
6
|
-
import { createQdrantClient } from '@skillrecordings/core/adapters/qdrant'
|
|
7
|
-
|
|
8
|
-
interface HealthResult {
|
|
9
|
-
service: string
|
|
10
|
-
healthy: boolean
|
|
11
|
-
message: string
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
interface HealthOptions {
|
|
15
|
-
json?: boolean
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export async function health(options: HealthOptions): Promise<void> {
|
|
19
|
-
const results: HealthResult[] = []
|
|
20
|
-
|
|
21
|
-
// Check MySQL
|
|
22
|
-
const mysqlResult = await checkMySQL()
|
|
23
|
-
results.push(mysqlResult)
|
|
24
|
-
|
|
25
|
-
// Check Redis
|
|
26
|
-
const redisResult = await checkRedis()
|
|
27
|
-
results.push(redisResult)
|
|
28
|
-
|
|
29
|
-
// Check Qdrant
|
|
30
|
-
const qdrantResult = await checkQdrant()
|
|
31
|
-
results.push(qdrantResult)
|
|
32
|
-
|
|
33
|
-
// Check Ollama
|
|
34
|
-
const ollamaResult = await checkOllama()
|
|
35
|
-
results.push(ollamaResult)
|
|
36
|
-
|
|
37
|
-
if (options.json) {
|
|
38
|
-
const allHealthy = results.every((r) => r.healthy)
|
|
39
|
-
console.log(
|
|
40
|
-
JSON.stringify({ healthy: allHealthy, services: results }, null, 2)
|
|
41
|
-
)
|
|
42
|
-
process.exit(allHealthy ? 0 : 1)
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// Pretty print results
|
|
46
|
-
console.log('\n🏥 Local Eval Environment Health Check\n')
|
|
47
|
-
|
|
48
|
-
for (const result of results) {
|
|
49
|
-
const icon = result.healthy ? '✅' : '❌'
|
|
50
|
-
console.log(`${icon} ${result.service}: ${result.message}`)
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
const allHealthy = results.every((r) => r.healthy)
|
|
54
|
-
console.log(
|
|
55
|
-
`\n${allHealthy ? '✅ All services healthy' : '❌ Some services unhealthy'}\n`
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
if (!allHealthy) {
|
|
59
|
-
console.log(
|
|
60
|
-
'💡 Tip: Run `docker compose -f docker/eval.yml up -d` to start services\n'
|
|
61
|
-
)
|
|
62
|
-
process.exit(1)
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
async function checkMySQL(): Promise<HealthResult> {
|
|
67
|
-
try {
|
|
68
|
-
// Use mysql2 directly for health check
|
|
69
|
-
const mysql = await import('mysql2/promise')
|
|
70
|
-
const connection = await mysql.createConnection({
|
|
71
|
-
host: 'localhost',
|
|
72
|
-
port: 3306,
|
|
73
|
-
user: 'eval_user',
|
|
74
|
-
password: 'eval_pass',
|
|
75
|
-
database: 'support_eval',
|
|
76
|
-
connectTimeout: 5000,
|
|
77
|
-
})
|
|
78
|
-
|
|
79
|
-
const [rows] = await connection.execute('SELECT 1')
|
|
80
|
-
await connection.end()
|
|
81
|
-
|
|
82
|
-
return {
|
|
83
|
-
service: 'MySQL',
|
|
84
|
-
healthy: true,
|
|
85
|
-
message: 'Connected to support_eval database',
|
|
86
|
-
}
|
|
87
|
-
} catch (error) {
|
|
88
|
-
return {
|
|
89
|
-
service: 'MySQL',
|
|
90
|
-
healthy: false,
|
|
91
|
-
message: error instanceof Error ? error.message : 'Connection failed',
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
async function checkRedis(): Promise<HealthResult> {
|
|
97
|
-
try {
|
|
98
|
-
const response = await fetch('http://localhost:6379', {
|
|
99
|
-
method: 'GET',
|
|
100
|
-
signal: AbortSignal.timeout(5000),
|
|
101
|
-
}).catch(() => null)
|
|
102
|
-
|
|
103
|
-
// Redis doesn't speak HTTP, so we'll use a simple TCP check
|
|
104
|
-
// For now, just check if something is listening
|
|
105
|
-
const net = await import('net')
|
|
106
|
-
|
|
107
|
-
return new Promise((resolve) => {
|
|
108
|
-
const socket = new net.Socket()
|
|
109
|
-
socket.setTimeout(5000)
|
|
110
|
-
|
|
111
|
-
socket.on('connect', () => {
|
|
112
|
-
socket.write('PING\r\n')
|
|
113
|
-
})
|
|
114
|
-
|
|
115
|
-
socket.on('data', (data) => {
|
|
116
|
-
const response = data.toString()
|
|
117
|
-
socket.destroy()
|
|
118
|
-
if (response.includes('PONG')) {
|
|
119
|
-
resolve({
|
|
120
|
-
service: 'Redis',
|
|
121
|
-
healthy: true,
|
|
122
|
-
message: 'Redis responding to PING',
|
|
123
|
-
})
|
|
124
|
-
} else {
|
|
125
|
-
resolve({
|
|
126
|
-
service: 'Redis',
|
|
127
|
-
healthy: false,
|
|
128
|
-
message: 'Unexpected response',
|
|
129
|
-
})
|
|
130
|
-
}
|
|
131
|
-
})
|
|
132
|
-
|
|
133
|
-
socket.on('timeout', () => {
|
|
134
|
-
socket.destroy()
|
|
135
|
-
resolve({
|
|
136
|
-
service: 'Redis',
|
|
137
|
-
healthy: false,
|
|
138
|
-
message: 'Connection timeout',
|
|
139
|
-
})
|
|
140
|
-
})
|
|
141
|
-
|
|
142
|
-
socket.on('error', (err) => {
|
|
143
|
-
socket.destroy()
|
|
144
|
-
resolve({
|
|
145
|
-
service: 'Redis',
|
|
146
|
-
healthy: false,
|
|
147
|
-
message: err.message,
|
|
148
|
-
})
|
|
149
|
-
})
|
|
150
|
-
|
|
151
|
-
socket.connect(6379, 'localhost')
|
|
152
|
-
})
|
|
153
|
-
} catch (error) {
|
|
154
|
-
return {
|
|
155
|
-
service: 'Redis',
|
|
156
|
-
healthy: false,
|
|
157
|
-
message: error instanceof Error ? error.message : 'Connection failed',
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
async function checkQdrant(): Promise<HealthResult> {
|
|
163
|
-
try {
|
|
164
|
-
const client = createQdrantClient()
|
|
165
|
-
const info = await client.getCollectionInfo()
|
|
166
|
-
|
|
167
|
-
return {
|
|
168
|
-
service: 'Qdrant',
|
|
169
|
-
healthy: true,
|
|
170
|
-
message:
|
|
171
|
-
info.status === 'not_found'
|
|
172
|
-
? 'Running (collection not yet created)'
|
|
173
|
-
: `Collection has ${info.pointsCount} points`,
|
|
174
|
-
}
|
|
175
|
-
} catch (error) {
|
|
176
|
-
return {
|
|
177
|
-
service: 'Qdrant',
|
|
178
|
-
healthy: false,
|
|
179
|
-
message: error instanceof Error ? error.message : 'Connection failed',
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
async function checkOllama(): Promise<HealthResult> {
|
|
185
|
-
try {
|
|
186
|
-
const client = createOllamaClient()
|
|
187
|
-
const healthy = await client.healthCheck()
|
|
188
|
-
|
|
189
|
-
if (!healthy) {
|
|
190
|
-
return {
|
|
191
|
-
service: 'Ollama',
|
|
192
|
-
healthy: false,
|
|
193
|
-
message: 'Not responding',
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
const modelAvailable = await client.isModelAvailable()
|
|
198
|
-
return {
|
|
199
|
-
service: 'Ollama',
|
|
200
|
-
healthy: true,
|
|
201
|
-
message: modelAvailable
|
|
202
|
-
? `Model ${process.env.EMBEDDING_MODEL || 'nomic-embed-text'} available`
|
|
203
|
-
: `Running but model needs to be pulled (run: ollama pull nomic-embed-text)`,
|
|
204
|
-
}
|
|
205
|
-
} catch (error) {
|
|
206
|
-
return {
|
|
207
|
-
service: 'Ollama',
|
|
208
|
-
healthy: false,
|
|
209
|
-
message: error instanceof Error ? error.message : 'Connection failed',
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
}
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Local eval CLI commands
|
|
3
|
-
*
|
|
4
|
-
* Commands for running evals against a local Docker environment
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import type { Command } from 'commander'
|
|
8
|
-
import { compare } from './compare'
|
|
9
|
-
import { health } from './health'
|
|
10
|
-
import { run } from './run'
|
|
11
|
-
import { scoreProduction } from './score-production'
|
|
12
|
-
import { seed } from './seed'
|
|
13
|
-
|
|
14
|
-
export function registerEvalLocalCommands(program: Command): void {
|
|
15
|
-
const evalLocal = program
|
|
16
|
-
.command('eval-local')
|
|
17
|
-
.description('Local evaluation environment commands')
|
|
18
|
-
|
|
19
|
-
evalLocal
|
|
20
|
-
.command('health')
|
|
21
|
-
.description('Check health of local eval environment services')
|
|
22
|
-
.option('--json', 'Output as JSON')
|
|
23
|
-
.action(health)
|
|
24
|
-
|
|
25
|
-
evalLocal
|
|
26
|
-
.command('seed')
|
|
27
|
-
.description('Seed the local eval environment with fixtures')
|
|
28
|
-
.option('--clean', 'Drop and recreate all data before seeding')
|
|
29
|
-
.option('--fixtures <path>', 'Path to fixtures directory', 'fixtures')
|
|
30
|
-
.option('--json', 'Output as JSON')
|
|
31
|
-
.action(seed)
|
|
32
|
-
|
|
33
|
-
evalLocal
|
|
34
|
-
.command('run')
|
|
35
|
-
.description('Run eval suite against local environment')
|
|
36
|
-
.option('--scenarios <glob>', 'Scenario files glob pattern')
|
|
37
|
-
.option('--dataset <file>', 'Dataset JSON file (alternative to scenarios)')
|
|
38
|
-
.option('--prompt <file>', 'Custom prompt file (default: production)')
|
|
39
|
-
.option('--model <model>', 'Model to use', 'anthropic/claude-haiku-4-5')
|
|
40
|
-
.option('--limit <number>', 'Max scenarios to run', parseInt)
|
|
41
|
-
.option('--output <file>', 'Save results to JSON file')
|
|
42
|
-
.option('--baseline <file>', 'Compare against baseline results')
|
|
43
|
-
.option(
|
|
44
|
-
'--fail-threshold <number>',
|
|
45
|
-
'Fail if pass rate below threshold',
|
|
46
|
-
parseFloat
|
|
47
|
-
)
|
|
48
|
-
.option('--verbose', 'Show individual scenario results')
|
|
49
|
-
.option('--json', 'JSON output for scripting')
|
|
50
|
-
.option('--real-tools', 'Use real Docker services instead of mocks')
|
|
51
|
-
.action(run)
|
|
52
|
-
|
|
53
|
-
evalLocal
|
|
54
|
-
.command('compare')
|
|
55
|
-
.description('Compare two prompts against scenarios')
|
|
56
|
-
.requiredOption('--candidate <file>', 'Candidate prompt file')
|
|
57
|
-
.option('--baseline <file>', 'Baseline prompt file (default: production)')
|
|
58
|
-
.option('--scenarios <glob>', 'Scenario files glob pattern')
|
|
59
|
-
.option('--output <file>', 'Save comparison to JSON')
|
|
60
|
-
.option('--json', 'JSON output')
|
|
61
|
-
.action(compare)
|
|
62
|
-
|
|
63
|
-
evalLocal
|
|
64
|
-
.command('score-production')
|
|
65
|
-
.description(
|
|
66
|
-
'Score actual production responses from dataset (no mocks, real data)'
|
|
67
|
-
)
|
|
68
|
-
.requiredOption(
|
|
69
|
-
'--dataset <file>',
|
|
70
|
-
'Dataset JSON file with production responses'
|
|
71
|
-
)
|
|
72
|
-
.option('--output <file>', 'Save results to JSON file')
|
|
73
|
-
.option('--verbose', 'Show individual failures')
|
|
74
|
-
.option('--json', 'JSON output for scripting')
|
|
75
|
-
.action(scoreProduction)
|
|
76
|
-
}
|