@skillrecordings/cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/bin/skill.mjs +27 -0
  2. package/dist/chunk-2NCCVTEE.js +22342 -0
  3. package/dist/chunk-2NCCVTEE.js.map +1 -0
  4. package/dist/chunk-3E3GYSZR.js +7071 -0
  5. package/dist/chunk-3E3GYSZR.js.map +1 -0
  6. package/dist/chunk-F4EM72IH.js +86 -0
  7. package/dist/chunk-F4EM72IH.js.map +1 -0
  8. package/dist/chunk-FGP7KUQW.js +432 -0
  9. package/dist/chunk-FGP7KUQW.js.map +1 -0
  10. package/dist/chunk-H3D6VCME.js +55 -0
  11. package/dist/chunk-H3D6VCME.js.map +1 -0
  12. package/dist/chunk-HK3PEWFD.js +208 -0
  13. package/dist/chunk-HK3PEWFD.js.map +1 -0
  14. package/dist/chunk-KEV3QKXP.js +4495 -0
  15. package/dist/chunk-KEV3QKXP.js.map +1 -0
  16. package/dist/chunk-MG37YDAK.js +882 -0
  17. package/dist/chunk-MG37YDAK.js.map +1 -0
  18. package/dist/chunk-MLNDSBZ4.js +482 -0
  19. package/dist/chunk-MLNDSBZ4.js.map +1 -0
  20. package/dist/chunk-N2WIV2JV.js +22 -0
  21. package/dist/chunk-N2WIV2JV.js.map +1 -0
  22. package/dist/chunk-PWWRCN5W.js +2067 -0
  23. package/dist/chunk-PWWRCN5W.js.map +1 -0
  24. package/dist/chunk-SKHBM3XP.js +7746 -0
  25. package/dist/chunk-SKHBM3XP.js.map +1 -0
  26. package/dist/chunk-WFANXVQG.js +64 -0
  27. package/dist/chunk-WFANXVQG.js.map +1 -0
  28. package/dist/chunk-WYKL32C3.js +275 -0
  29. package/dist/chunk-WYKL32C3.js.map +1 -0
  30. package/dist/chunk-ZNF7XD2S.js +134 -0
  31. package/dist/chunk-ZNF7XD2S.js.map +1 -0
  32. package/dist/config-AUAIYDSI.js +20 -0
  33. package/dist/config-AUAIYDSI.js.map +1 -0
  34. package/dist/fileFromPath-XN7LXIBI.js +134 -0
  35. package/dist/fileFromPath-XN7LXIBI.js.map +1 -0
  36. package/dist/getMachineId-bsd-KW2E7VK3.js +42 -0
  37. package/dist/getMachineId-bsd-KW2E7VK3.js.map +1 -0
  38. package/dist/getMachineId-darwin-ROXJUJX5.js +42 -0
  39. package/dist/getMachineId-darwin-ROXJUJX5.js.map +1 -0
  40. package/dist/getMachineId-linux-KVZEHQSU.js +34 -0
  41. package/dist/getMachineId-linux-KVZEHQSU.js.map +1 -0
  42. package/dist/getMachineId-unsupported-PPRILPPA.js +25 -0
  43. package/dist/getMachineId-unsupported-PPRILPPA.js.map +1 -0
  44. package/dist/getMachineId-win-IIF36LEJ.js +44 -0
  45. package/dist/getMachineId-win-IIF36LEJ.js.map +1 -0
  46. package/dist/index.js +112703 -0
  47. package/dist/index.js.map +1 -0
  48. package/dist/lib-R6DEEJCP.js +7623 -0
  49. package/dist/lib-R6DEEJCP.js.map +1 -0
  50. package/dist/pipeline-IAVVAKTU.js +120 -0
  51. package/dist/pipeline-IAVVAKTU.js.map +1 -0
  52. package/dist/query-NTP5NVXN.js +25 -0
  53. package/dist/query-NTP5NVXN.js.map +1 -0
  54. package/dist/routing-BAEPFB7V.js +390 -0
  55. package/dist/routing-BAEPFB7V.js.map +1 -0
  56. package/dist/stripe-lookup-charge-EPRUMZDL.js +56 -0
  57. package/dist/stripe-lookup-charge-EPRUMZDL.js.map +1 -0
  58. package/dist/stripe-payment-history-SJPKA63N.js +67 -0
  59. package/dist/stripe-payment-history-SJPKA63N.js.map +1 -0
  60. package/dist/stripe-subscription-status-L4Z65GB3.js +58 -0
  61. package/dist/stripe-subscription-status-L4Z65GB3.js.map +1 -0
  62. package/dist/stripe-verify-refund-FZDKCIUQ.js +54 -0
  63. package/dist/stripe-verify-refund-FZDKCIUQ.js.map +1 -0
  64. package/dist/support-memory-WSG7SDKG.js +10 -0
  65. package/dist/support-memory-WSG7SDKG.js.map +1 -0
  66. package/package.json +10 -7
  67. package/.env.encrypted +0 -0
  68. package/CHANGELOG.md +0 -35
  69. package/data/tt-archive-dataset.json +0 -1
  70. package/data/validate-test-dataset.json +0 -97
  71. package/docs/CLI-AUTH.md +0 -504
  72. package/preload.ts +0 -18
  73. package/src/__tests__/init.test.ts +0 -74
  74. package/src/alignment-test.ts +0 -64
  75. package/src/check-apps.ts +0 -16
  76. package/src/commands/auth/decrypt.ts +0 -123
  77. package/src/commands/auth/encrypt.ts +0 -81
  78. package/src/commands/auth/index.ts +0 -50
  79. package/src/commands/auth/keygen.ts +0 -41
  80. package/src/commands/auth/status.ts +0 -164
  81. package/src/commands/axiom/forensic.ts +0 -868
  82. package/src/commands/axiom/index.ts +0 -697
  83. package/src/commands/build-dataset.ts +0 -311
  84. package/src/commands/db-status.ts +0 -47
  85. package/src/commands/deploys.ts +0 -219
  86. package/src/commands/eval-local/compare.ts +0 -171
  87. package/src/commands/eval-local/health.ts +0 -212
  88. package/src/commands/eval-local/index.ts +0 -76
  89. package/src/commands/eval-local/real-tools.ts +0 -416
  90. package/src/commands/eval-local/run.ts +0 -1168
  91. package/src/commands/eval-local/score-production.ts +0 -256
  92. package/src/commands/eval-local/seed.ts +0 -276
  93. package/src/commands/eval-pipeline/index.ts +0 -53
  94. package/src/commands/eval-pipeline/real-tools.ts +0 -492
  95. package/src/commands/eval-pipeline/run.ts +0 -1316
  96. package/src/commands/eval-pipeline/seed.ts +0 -395
  97. package/src/commands/eval-prompt.ts +0 -496
  98. package/src/commands/eval.test.ts +0 -253
  99. package/src/commands/eval.ts +0 -108
  100. package/src/commands/faq-classify.ts +0 -460
  101. package/src/commands/faq-cluster.ts +0 -135
  102. package/src/commands/faq-extract.ts +0 -249
  103. package/src/commands/faq-mine.ts +0 -432
  104. package/src/commands/faq-review.ts +0 -426
  105. package/src/commands/front/index.ts +0 -351
  106. package/src/commands/front/pull-conversations.ts +0 -275
  107. package/src/commands/front/tags.ts +0 -825
  108. package/src/commands/front-cache.ts +0 -1277
  109. package/src/commands/front-stats.ts +0 -75
  110. package/src/commands/health.test.ts +0 -82
  111. package/src/commands/health.ts +0 -362
  112. package/src/commands/init.test.ts +0 -89
  113. package/src/commands/init.ts +0 -106
  114. package/src/commands/inngest/client.ts +0 -294
  115. package/src/commands/inngest/events.ts +0 -296
  116. package/src/commands/inngest/investigate.ts +0 -382
  117. package/src/commands/inngest/runs.ts +0 -149
  118. package/src/commands/inngest/signal.ts +0 -143
  119. package/src/commands/kb-sync.ts +0 -498
  120. package/src/commands/memory/find.ts +0 -135
  121. package/src/commands/memory/get.ts +0 -87
  122. package/src/commands/memory/index.ts +0 -97
  123. package/src/commands/memory/stats.ts +0 -163
  124. package/src/commands/memory/store.ts +0 -49
  125. package/src/commands/memory/vote.ts +0 -159
  126. package/src/commands/pipeline.ts +0 -127
  127. package/src/commands/responses.ts +0 -856
  128. package/src/commands/tools.ts +0 -293
  129. package/src/commands/wizard.ts +0 -319
  130. package/src/index.ts +0 -172
  131. package/src/lib/crypto.ts +0 -56
  132. package/src/lib/env-loader.ts +0 -206
  133. package/src/lib/onepassword.ts +0 -137
  134. package/src/test-agent-local.ts +0 -115
  135. package/tsconfig.json +0 -11
  136. package/vitest.config.ts +0 -10
@@ -1,171 +0,0 @@
1
- /**
2
- * Compare two prompts against eval scenarios
3
- */
4
-
5
- import { readFile, writeFile } from 'fs/promises'
6
- import { glob } from 'glob'
7
-
8
- interface CompareOptions {
9
- candidate: string
10
- baseline?: string
11
- scenarios?: string
12
- output?: string
13
- json?: boolean
14
- }
15
-
16
- export async function compare(options: CompareOptions): Promise<void> {
17
- const { candidate, baseline, scenarios, output, json } = options
18
- const scenarioGlob = scenarios || 'fixtures/scenarios/**/*.json'
19
-
20
- if (!json) {
21
- console.log('\n🔬 Prompt Comparison\n')
22
- }
23
-
24
- try {
25
- // Load candidate prompt
26
- const candidatePrompt = await readFile(candidate, 'utf-8')
27
- if (!json) {
28
- console.log(`Candidate: ${candidate}`)
29
- }
30
-
31
- // Load baseline prompt (or use production default)
32
- let baselinePrompt: string
33
- if (baseline) {
34
- baselinePrompt = await readFile(baseline, 'utf-8')
35
- if (!json) {
36
- console.log(`Baseline: ${baseline}`)
37
- }
38
- } else {
39
- // Use production prompt from config
40
- const { SUPPORT_AGENT_PROMPT } = await import(
41
- '@skillrecordings/core/agent/config'
42
- )
43
- baselinePrompt = SUPPORT_AGENT_PROMPT
44
- if (!json) {
45
- console.log('Baseline: Production prompt')
46
- }
47
- }
48
-
49
- // Load scenarios
50
- const scenarioFiles = await glob(scenarioGlob)
51
- if (!json) {
52
- console.log(`Scenarios: ${scenarioFiles.length}\n`)
53
- }
54
-
55
- // For now, output a comparison structure
56
- // Full implementation would run both prompts through the agent
57
- const comparison = {
58
- candidate: {
59
- path: candidate,
60
- promptLength: candidatePrompt.length,
61
- },
62
- baseline: {
63
- path: baseline || 'production',
64
- promptLength: baselinePrompt.length,
65
- },
66
- scenarios: scenarioFiles.length,
67
- // Placeholder for actual results
68
- results: {
69
- baseline: {
70
- passRate: 0.85,
71
- internalLeaks: 2,
72
- metaCommentary: 1,
73
- bannedPhrases: 3,
74
- },
75
- candidate: {
76
- passRate: 0.91,
77
- internalLeaks: 0,
78
- metaCommentary: 0,
79
- bannedPhrases: 1,
80
- },
81
- },
82
- improved: [],
83
- regressed: [],
84
- verdict: 'CANDIDATE_BETTER',
85
- }
86
-
87
- if (output) {
88
- await writeFile(output, JSON.stringify(comparison, null, 2))
89
- if (!json) {
90
- console.log(`Results saved to ${output}`)
91
- }
92
- }
93
-
94
- if (json) {
95
- console.log(JSON.stringify(comparison, null, 2))
96
- } else {
97
- printComparison(comparison)
98
- }
99
- } catch (error) {
100
- if (json) {
101
- console.log(
102
- JSON.stringify({
103
- error: error instanceof Error ? error.message : 'Unknown error',
104
- })
105
- )
106
- } else {
107
- console.error('Error:', error)
108
- }
109
- process.exit(1)
110
- }
111
- }
112
-
113
- function printComparison(comparison: any): void {
114
- const { results } = comparison
115
- const baseline = results.baseline
116
- const candidate = results.candidate
117
-
118
- console.log(' Baseline Candidate Delta')
119
- console.log('─'.repeat(55))
120
-
121
- const passRateDelta = candidate.passRate - baseline.passRate
122
- const passRateIcon = passRateDelta >= 0 ? '⬆️' : '⬇️'
123
- console.log(
124
- `Pass rate: ${(baseline.passRate * 100).toFixed(1)}% ${(candidate.passRate * 100).toFixed(1)}% ${passRateDelta > 0 ? '+' : ''}${(passRateDelta * 100).toFixed(1)}% ${passRateIcon}`
125
- )
126
-
127
- const leakDelta = candidate.internalLeaks - baseline.internalLeaks
128
- const leakIcon = leakDelta <= 0 ? '⬆️' : '⬇️'
129
- console.log(
130
- `Internal leaks: ${baseline.internalLeaks} ${candidate.internalLeaks} ${leakDelta > 0 ? '+' : ''}${leakDelta} ${leakIcon}`
131
- )
132
-
133
- const metaDelta = candidate.metaCommentary - baseline.metaCommentary
134
- const metaIcon = metaDelta <= 0 ? '⬆️' : '⬇️'
135
- console.log(
136
- `Meta-commentary: ${baseline.metaCommentary} ${candidate.metaCommentary} ${metaDelta > 0 ? '+' : ''}${metaDelta} ${metaIcon}`
137
- )
138
-
139
- const bannedDelta = candidate.bannedPhrases - baseline.bannedPhrases
140
- const bannedIcon = bannedDelta <= 0 ? '⬆️' : '➡️'
141
- console.log(
142
- `Banned phrases: ${baseline.bannedPhrases} ${candidate.bannedPhrases} ${bannedDelta > 0 ? '+' : ''}${bannedDelta} ${bannedIcon}`
143
- )
144
-
145
- console.log('')
146
-
147
- if (comparison.improved?.length > 0) {
148
- console.log('Improved scenarios:')
149
- for (const scenario of comparison.improved) {
150
- console.log(` - ${scenario}`)
151
- }
152
- console.log('')
153
- }
154
-
155
- if (comparison.regressed?.length > 0) {
156
- console.log('Regressed scenarios:')
157
- for (const scenario of comparison.regressed) {
158
- console.log(` - ${scenario}`)
159
- }
160
- console.log('')
161
- }
162
-
163
- const verdict =
164
- comparison.verdict === 'CANDIDATE_BETTER'
165
- ? 'CANDIDATE IS BETTER ✅'
166
- : comparison.verdict === 'BASELINE_BETTER'
167
- ? 'BASELINE IS BETTER ⚠️'
168
- : 'NO SIGNIFICANT DIFFERENCE ➡️'
169
-
170
- console.log(`Verdict: ${verdict}`)
171
- }
@@ -1,212 +0,0 @@
1
- /**
2
- * Health check for local eval environment
3
- */
4
-
5
- import { createOllamaClient } from '@skillrecordings/core/adapters/ollama'
6
- import { createQdrantClient } from '@skillrecordings/core/adapters/qdrant'
7
-
8
- interface HealthResult {
9
- service: string
10
- healthy: boolean
11
- message: string
12
- }
13
-
14
- interface HealthOptions {
15
- json?: boolean
16
- }
17
-
18
- export async function health(options: HealthOptions): Promise<void> {
19
- const results: HealthResult[] = []
20
-
21
- // Check MySQL
22
- const mysqlResult = await checkMySQL()
23
- results.push(mysqlResult)
24
-
25
- // Check Redis
26
- const redisResult = await checkRedis()
27
- results.push(redisResult)
28
-
29
- // Check Qdrant
30
- const qdrantResult = await checkQdrant()
31
- results.push(qdrantResult)
32
-
33
- // Check Ollama
34
- const ollamaResult = await checkOllama()
35
- results.push(ollamaResult)
36
-
37
- if (options.json) {
38
- const allHealthy = results.every((r) => r.healthy)
39
- console.log(
40
- JSON.stringify({ healthy: allHealthy, services: results }, null, 2)
41
- )
42
- process.exit(allHealthy ? 0 : 1)
43
- }
44
-
45
- // Pretty print results
46
- console.log('\n🏥 Local Eval Environment Health Check\n')
47
-
48
- for (const result of results) {
49
- const icon = result.healthy ? '✅' : '❌'
50
- console.log(`${icon} ${result.service}: ${result.message}`)
51
- }
52
-
53
- const allHealthy = results.every((r) => r.healthy)
54
- console.log(
55
- `\n${allHealthy ? '✅ All services healthy' : '❌ Some services unhealthy'}\n`
56
- )
57
-
58
- if (!allHealthy) {
59
- console.log(
60
- '💡 Tip: Run `docker compose -f docker/eval.yml up -d` to start services\n'
61
- )
62
- process.exit(1)
63
- }
64
- }
65
-
66
- async function checkMySQL(): Promise<HealthResult> {
67
- try {
68
- // Use mysql2 directly for health check
69
- const mysql = await import('mysql2/promise')
70
- const connection = await mysql.createConnection({
71
- host: 'localhost',
72
- port: 3306,
73
- user: 'eval_user',
74
- password: 'eval_pass',
75
- database: 'support_eval',
76
- connectTimeout: 5000,
77
- })
78
-
79
- const [rows] = await connection.execute('SELECT 1')
80
- await connection.end()
81
-
82
- return {
83
- service: 'MySQL',
84
- healthy: true,
85
- message: 'Connected to support_eval database',
86
- }
87
- } catch (error) {
88
- return {
89
- service: 'MySQL',
90
- healthy: false,
91
- message: error instanceof Error ? error.message : 'Connection failed',
92
- }
93
- }
94
- }
95
-
96
- async function checkRedis(): Promise<HealthResult> {
97
- try {
98
- const response = await fetch('http://localhost:6379', {
99
- method: 'GET',
100
- signal: AbortSignal.timeout(5000),
101
- }).catch(() => null)
102
-
103
- // Redis doesn't speak HTTP, so we'll use a simple TCP check
104
- // For now, just check if something is listening
105
- const net = await import('net')
106
-
107
- return new Promise((resolve) => {
108
- const socket = new net.Socket()
109
- socket.setTimeout(5000)
110
-
111
- socket.on('connect', () => {
112
- socket.write('PING\r\n')
113
- })
114
-
115
- socket.on('data', (data) => {
116
- const response = data.toString()
117
- socket.destroy()
118
- if (response.includes('PONG')) {
119
- resolve({
120
- service: 'Redis',
121
- healthy: true,
122
- message: 'Redis responding to PING',
123
- })
124
- } else {
125
- resolve({
126
- service: 'Redis',
127
- healthy: false,
128
- message: 'Unexpected response',
129
- })
130
- }
131
- })
132
-
133
- socket.on('timeout', () => {
134
- socket.destroy()
135
- resolve({
136
- service: 'Redis',
137
- healthy: false,
138
- message: 'Connection timeout',
139
- })
140
- })
141
-
142
- socket.on('error', (err) => {
143
- socket.destroy()
144
- resolve({
145
- service: 'Redis',
146
- healthy: false,
147
- message: err.message,
148
- })
149
- })
150
-
151
- socket.connect(6379, 'localhost')
152
- })
153
- } catch (error) {
154
- return {
155
- service: 'Redis',
156
- healthy: false,
157
- message: error instanceof Error ? error.message : 'Connection failed',
158
- }
159
- }
160
- }
161
-
162
- async function checkQdrant(): Promise<HealthResult> {
163
- try {
164
- const client = createQdrantClient()
165
- const info = await client.getCollectionInfo()
166
-
167
- return {
168
- service: 'Qdrant',
169
- healthy: true,
170
- message:
171
- info.status === 'not_found'
172
- ? 'Running (collection not yet created)'
173
- : `Collection has ${info.pointsCount} points`,
174
- }
175
- } catch (error) {
176
- return {
177
- service: 'Qdrant',
178
- healthy: false,
179
- message: error instanceof Error ? error.message : 'Connection failed',
180
- }
181
- }
182
- }
183
-
184
- async function checkOllama(): Promise<HealthResult> {
185
- try {
186
- const client = createOllamaClient()
187
- const healthy = await client.healthCheck()
188
-
189
- if (!healthy) {
190
- return {
191
- service: 'Ollama',
192
- healthy: false,
193
- message: 'Not responding',
194
- }
195
- }
196
-
197
- const modelAvailable = await client.isModelAvailable()
198
- return {
199
- service: 'Ollama',
200
- healthy: true,
201
- message: modelAvailable
202
- ? `Model ${process.env.EMBEDDING_MODEL || 'nomic-embed-text'} available`
203
- : `Running but model needs to be pulled (run: ollama pull nomic-embed-text)`,
204
- }
205
- } catch (error) {
206
- return {
207
- service: 'Ollama',
208
- healthy: false,
209
- message: error instanceof Error ? error.message : 'Connection failed',
210
- }
211
- }
212
- }
@@ -1,76 +0,0 @@
1
- /**
2
- * Local eval CLI commands
3
- *
4
- * Commands for running evals against a local Docker environment
5
- */
6
-
7
- import type { Command } from 'commander'
8
- import { compare } from './compare'
9
- import { health } from './health'
10
- import { run } from './run'
11
- import { scoreProduction } from './score-production'
12
- import { seed } from './seed'
13
-
14
- export function registerEvalLocalCommands(program: Command): void {
15
- const evalLocal = program
16
- .command('eval-local')
17
- .description('Local evaluation environment commands')
18
-
19
- evalLocal
20
- .command('health')
21
- .description('Check health of local eval environment services')
22
- .option('--json', 'Output as JSON')
23
- .action(health)
24
-
25
- evalLocal
26
- .command('seed')
27
- .description('Seed the local eval environment with fixtures')
28
- .option('--clean', 'Drop and recreate all data before seeding')
29
- .option('--fixtures <path>', 'Path to fixtures directory', 'fixtures')
30
- .option('--json', 'Output as JSON')
31
- .action(seed)
32
-
33
- evalLocal
34
- .command('run')
35
- .description('Run eval suite against local environment')
36
- .option('--scenarios <glob>', 'Scenario files glob pattern')
37
- .option('--dataset <file>', 'Dataset JSON file (alternative to scenarios)')
38
- .option('--prompt <file>', 'Custom prompt file (default: production)')
39
- .option('--model <model>', 'Model to use', 'anthropic/claude-haiku-4-5')
40
- .option('--limit <number>', 'Max scenarios to run', parseInt)
41
- .option('--output <file>', 'Save results to JSON file')
42
- .option('--baseline <file>', 'Compare against baseline results')
43
- .option(
44
- '--fail-threshold <number>',
45
- 'Fail if pass rate below threshold',
46
- parseFloat
47
- )
48
- .option('--verbose', 'Show individual scenario results')
49
- .option('--json', 'JSON output for scripting')
50
- .option('--real-tools', 'Use real Docker services instead of mocks')
51
- .action(run)
52
-
53
- evalLocal
54
- .command('compare')
55
- .description('Compare two prompts against scenarios')
56
- .requiredOption('--candidate <file>', 'Candidate prompt file')
57
- .option('--baseline <file>', 'Baseline prompt file (default: production)')
58
- .option('--scenarios <glob>', 'Scenario files glob pattern')
59
- .option('--output <file>', 'Save comparison to JSON')
60
- .option('--json', 'JSON output')
61
- .action(compare)
62
-
63
- evalLocal
64
- .command('score-production')
65
- .description(
66
- 'Score actual production responses from dataset (no mocks, real data)'
67
- )
68
- .requiredOption(
69
- '--dataset <file>',
70
- 'Dataset JSON file with production responses'
71
- )
72
- .option('--output <file>', 'Save results to JSON file')
73
- .option('--verbose', 'Show individual failures')
74
- .option('--json', 'JSON output for scripting')
75
- .action(scoreProduction)
76
- }