@skillrecordings/cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/bin/skill.mjs +27 -0
  2. package/dist/chunk-2NCCVTEE.js +22342 -0
  3. package/dist/chunk-2NCCVTEE.js.map +1 -0
  4. package/dist/chunk-3E3GYSZR.js +7071 -0
  5. package/dist/chunk-3E3GYSZR.js.map +1 -0
  6. package/dist/chunk-F4EM72IH.js +86 -0
  7. package/dist/chunk-F4EM72IH.js.map +1 -0
  8. package/dist/chunk-FGP7KUQW.js +432 -0
  9. package/dist/chunk-FGP7KUQW.js.map +1 -0
  10. package/dist/chunk-H3D6VCME.js +55 -0
  11. package/dist/chunk-H3D6VCME.js.map +1 -0
  12. package/dist/chunk-HK3PEWFD.js +208 -0
  13. package/dist/chunk-HK3PEWFD.js.map +1 -0
  14. package/dist/chunk-KEV3QKXP.js +4495 -0
  15. package/dist/chunk-KEV3QKXP.js.map +1 -0
  16. package/dist/chunk-MG37YDAK.js +882 -0
  17. package/dist/chunk-MG37YDAK.js.map +1 -0
  18. package/dist/chunk-MLNDSBZ4.js +482 -0
  19. package/dist/chunk-MLNDSBZ4.js.map +1 -0
  20. package/dist/chunk-N2WIV2JV.js +22 -0
  21. package/dist/chunk-N2WIV2JV.js.map +1 -0
  22. package/dist/chunk-PWWRCN5W.js +2067 -0
  23. package/dist/chunk-PWWRCN5W.js.map +1 -0
  24. package/dist/chunk-SKHBM3XP.js +7746 -0
  25. package/dist/chunk-SKHBM3XP.js.map +1 -0
  26. package/dist/chunk-WFANXVQG.js +64 -0
  27. package/dist/chunk-WFANXVQG.js.map +1 -0
  28. package/dist/chunk-WYKL32C3.js +275 -0
  29. package/dist/chunk-WYKL32C3.js.map +1 -0
  30. package/dist/chunk-ZNF7XD2S.js +134 -0
  31. package/dist/chunk-ZNF7XD2S.js.map +1 -0
  32. package/dist/config-AUAIYDSI.js +20 -0
  33. package/dist/config-AUAIYDSI.js.map +1 -0
  34. package/dist/fileFromPath-XN7LXIBI.js +134 -0
  35. package/dist/fileFromPath-XN7LXIBI.js.map +1 -0
  36. package/dist/getMachineId-bsd-KW2E7VK3.js +42 -0
  37. package/dist/getMachineId-bsd-KW2E7VK3.js.map +1 -0
  38. package/dist/getMachineId-darwin-ROXJUJX5.js +42 -0
  39. package/dist/getMachineId-darwin-ROXJUJX5.js.map +1 -0
  40. package/dist/getMachineId-linux-KVZEHQSU.js +34 -0
  41. package/dist/getMachineId-linux-KVZEHQSU.js.map +1 -0
  42. package/dist/getMachineId-unsupported-PPRILPPA.js +25 -0
  43. package/dist/getMachineId-unsupported-PPRILPPA.js.map +1 -0
  44. package/dist/getMachineId-win-IIF36LEJ.js +44 -0
  45. package/dist/getMachineId-win-IIF36LEJ.js.map +1 -0
  46. package/dist/index.js +112703 -0
  47. package/dist/index.js.map +1 -0
  48. package/dist/lib-R6DEEJCP.js +7623 -0
  49. package/dist/lib-R6DEEJCP.js.map +1 -0
  50. package/dist/pipeline-IAVVAKTU.js +120 -0
  51. package/dist/pipeline-IAVVAKTU.js.map +1 -0
  52. package/dist/query-NTP5NVXN.js +25 -0
  53. package/dist/query-NTP5NVXN.js.map +1 -0
  54. package/dist/routing-BAEPFB7V.js +390 -0
  55. package/dist/routing-BAEPFB7V.js.map +1 -0
  56. package/dist/stripe-lookup-charge-EPRUMZDL.js +56 -0
  57. package/dist/stripe-lookup-charge-EPRUMZDL.js.map +1 -0
  58. package/dist/stripe-payment-history-SJPKA63N.js +67 -0
  59. package/dist/stripe-payment-history-SJPKA63N.js.map +1 -0
  60. package/dist/stripe-subscription-status-L4Z65GB3.js +58 -0
  61. package/dist/stripe-subscription-status-L4Z65GB3.js.map +1 -0
  62. package/dist/stripe-verify-refund-FZDKCIUQ.js +54 -0
  63. package/dist/stripe-verify-refund-FZDKCIUQ.js.map +1 -0
  64. package/dist/support-memory-WSG7SDKG.js +10 -0
  65. package/dist/support-memory-WSG7SDKG.js.map +1 -0
  66. package/package.json +10 -7
  67. package/.env.encrypted +0 -0
  68. package/CHANGELOG.md +0 -35
  69. package/data/tt-archive-dataset.json +0 -1
  70. package/data/validate-test-dataset.json +0 -97
  71. package/docs/CLI-AUTH.md +0 -504
  72. package/preload.ts +0 -18
  73. package/src/__tests__/init.test.ts +0 -74
  74. package/src/alignment-test.ts +0 -64
  75. package/src/check-apps.ts +0 -16
  76. package/src/commands/auth/decrypt.ts +0 -123
  77. package/src/commands/auth/encrypt.ts +0 -81
  78. package/src/commands/auth/index.ts +0 -50
  79. package/src/commands/auth/keygen.ts +0 -41
  80. package/src/commands/auth/status.ts +0 -164
  81. package/src/commands/axiom/forensic.ts +0 -868
  82. package/src/commands/axiom/index.ts +0 -697
  83. package/src/commands/build-dataset.ts +0 -311
  84. package/src/commands/db-status.ts +0 -47
  85. package/src/commands/deploys.ts +0 -219
  86. package/src/commands/eval-local/compare.ts +0 -171
  87. package/src/commands/eval-local/health.ts +0 -212
  88. package/src/commands/eval-local/index.ts +0 -76
  89. package/src/commands/eval-local/real-tools.ts +0 -416
  90. package/src/commands/eval-local/run.ts +0 -1168
  91. package/src/commands/eval-local/score-production.ts +0 -256
  92. package/src/commands/eval-local/seed.ts +0 -276
  93. package/src/commands/eval-pipeline/index.ts +0 -53
  94. package/src/commands/eval-pipeline/real-tools.ts +0 -492
  95. package/src/commands/eval-pipeline/run.ts +0 -1316
  96. package/src/commands/eval-pipeline/seed.ts +0 -395
  97. package/src/commands/eval-prompt.ts +0 -496
  98. package/src/commands/eval.test.ts +0 -253
  99. package/src/commands/eval.ts +0 -108
  100. package/src/commands/faq-classify.ts +0 -460
  101. package/src/commands/faq-cluster.ts +0 -135
  102. package/src/commands/faq-extract.ts +0 -249
  103. package/src/commands/faq-mine.ts +0 -432
  104. package/src/commands/faq-review.ts +0 -426
  105. package/src/commands/front/index.ts +0 -351
  106. package/src/commands/front/pull-conversations.ts +0 -275
  107. package/src/commands/front/tags.ts +0 -825
  108. package/src/commands/front-cache.ts +0 -1277
  109. package/src/commands/front-stats.ts +0 -75
  110. package/src/commands/health.test.ts +0 -82
  111. package/src/commands/health.ts +0 -362
  112. package/src/commands/init.test.ts +0 -89
  113. package/src/commands/init.ts +0 -106
  114. package/src/commands/inngest/client.ts +0 -294
  115. package/src/commands/inngest/events.ts +0 -296
  116. package/src/commands/inngest/investigate.ts +0 -382
  117. package/src/commands/inngest/runs.ts +0 -149
  118. package/src/commands/inngest/signal.ts +0 -143
  119. package/src/commands/kb-sync.ts +0 -498
  120. package/src/commands/memory/find.ts +0 -135
  121. package/src/commands/memory/get.ts +0 -87
  122. package/src/commands/memory/index.ts +0 -97
  123. package/src/commands/memory/stats.ts +0 -163
  124. package/src/commands/memory/store.ts +0 -49
  125. package/src/commands/memory/vote.ts +0 -159
  126. package/src/commands/pipeline.ts +0 -127
  127. package/src/commands/responses.ts +0 -856
  128. package/src/commands/tools.ts +0 -293
  129. package/src/commands/wizard.ts +0 -319
  130. package/src/index.ts +0 -172
  131. package/src/lib/crypto.ts +0 -56
  132. package/src/lib/env-loader.ts +0 -206
  133. package/src/lib/onepassword.ts +0 -137
  134. package/src/test-agent-local.ts +0 -115
  135. package/tsconfig.json +0 -11
  136. package/vitest.config.ts +0 -10
@@ -1,253 +0,0 @@
1
- import * as fs from 'node:fs/promises'
2
- import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
3
- import { runEval } from './eval'
4
-
5
- // Mock process.exit to prevent test termination
6
- const mockExit = vi.spyOn(process, 'exit').mockImplementation((code) => {
7
- throw new Error(`process.exit(${code})`)
8
- })
9
-
10
- // Mock fs module
11
- vi.mock('node:fs/promises')
12
-
13
- // Mock core evals module
14
- vi.mock('@skillrecordings/core/evals/routing', () => ({
15
- evalRouting: vi.fn(),
16
- }))
17
-
18
- describe('eval command', () => {
19
- beforeEach(() => {
20
- vi.clearAllMocks()
21
- mockExit.mockClear()
22
- })
23
-
24
- afterEach(() => {
25
- mockExit.mockClear()
26
- })
27
-
28
- it('should require dataset path', async () => {
29
- const consoleErrorSpy = vi.spyOn(console, 'error')
30
-
31
- await expect(runEval('routing', undefined)).rejects.toThrow(
32
- 'process.exit(1)'
33
- )
34
-
35
- expect(consoleErrorSpy).toHaveBeenCalledWith(
36
- expect.stringContaining('Dataset path is required')
37
- )
38
- })
39
-
40
- it('should fail if dataset file does not exist', async () => {
41
- const consoleErrorSpy = vi.spyOn(console, 'error')
42
- vi.mocked(fs.access).mockRejectedValue(new Error('File not found'))
43
-
44
- await expect(
45
- runEval('routing', '/path/to/nonexistent.json')
46
- ).rejects.toThrow('process.exit(1)')
47
-
48
- expect(consoleErrorSpy).toHaveBeenCalledWith(
49
- expect.stringContaining('Dataset file not found')
50
- )
51
- })
52
-
53
- it('should fail if dataset is invalid JSON', async () => {
54
- const consoleErrorSpy = vi.spyOn(console, 'error')
55
- vi.mocked(fs.access).mockResolvedValue(undefined)
56
- vi.mocked(fs.readFile).mockResolvedValue('invalid json')
57
-
58
- await expect(runEval('routing', '/path/to/invalid.json')).rejects.toThrow(
59
- 'process.exit(1)'
60
- )
61
-
62
- expect(consoleErrorSpy).toHaveBeenCalledWith(
63
- expect.stringContaining('Invalid JSON')
64
- )
65
- })
66
-
67
- it('should print pretty results table by default', async () => {
68
- const consoleSpy = vi.spyOn(console, 'log')
69
- const mockDataset = [
70
- {
71
- message: 'Test message',
72
- expectedCategory: 'needs_response',
73
- expectedRoute: 'classifier' as const,
74
- },
75
- ]
76
- const mockReport = {
77
- precision: 0.92,
78
- recall: 0.95,
79
- fpRate: 0.03,
80
- fnRate: 0.02,
81
- byCategory: {
82
- needs_response: {
83
- tp: 10,
84
- fp: 1,
85
- fn: 1,
86
- tn: 5,
87
- precision: 0.95,
88
- recall: 0.93,
89
- f1: 0.94,
90
- count: 17,
91
- },
92
- },
93
- cost: {
94
- tokens: 5000,
95
- estimatedUsd: 0.00125,
96
- },
97
- latency: {
98
- p50: 120,
99
- p95: 250,
100
- p99: 350,
101
- },
102
- passed: true,
103
- }
104
-
105
- vi.mocked(fs.access).mockResolvedValue(undefined)
106
- vi.mocked(fs.readFile).mockResolvedValue(JSON.stringify(mockDataset))
107
-
108
- const { evalRouting } = await import('@skillrecordings/core/evals/routing')
109
- vi.mocked(evalRouting).mockResolvedValue(mockReport)
110
-
111
- await expect(runEval('routing', '/path/to/dataset.json')).rejects.toThrow(
112
- 'process.exit(0)'
113
- )
114
-
115
- const output = consoleSpy.mock.calls.flat().join('\n')
116
- expect(output).toContain('Precision')
117
- expect(output).toContain('92.0%')
118
- expect(output).toContain('Recall')
119
- expect(output).toContain('95.0%')
120
- expect(output).toContain('Latency')
121
- expect(output).toContain('120ms')
122
- })
123
-
124
- it('should output JSON when --json flag is used', async () => {
125
- const consoleSpy = vi.spyOn(console, 'log')
126
- const mockDataset = [
127
- {
128
- message: 'Test message',
129
- expectedCategory: 'needs_response',
130
- expectedRoute: 'classifier' as const,
131
- },
132
- ]
133
- const mockReport = {
134
- precision: 0.92,
135
- recall: 0.95,
136
- fpRate: 0.03,
137
- fnRate: 0.02,
138
- byCategory: {},
139
- cost: {
140
- tokens: 5000,
141
- estimatedUsd: 0.00125,
142
- },
143
- latency: {
144
- p50: 120,
145
- p95: 250,
146
- p99: 350,
147
- },
148
- passed: true,
149
- }
150
-
151
- vi.mocked(fs.access).mockResolvedValue(undefined)
152
- vi.mocked(fs.readFile).mockResolvedValue(JSON.stringify(mockDataset))
153
-
154
- const { evalRouting } = await import('@skillrecordings/core/evals/routing')
155
- vi.mocked(evalRouting).mockResolvedValue(mockReport)
156
-
157
- await expect(
158
- runEval('routing', '/path/to/dataset.json', { json: true })
159
- ).rejects.toThrow('process.exit(0)')
160
-
161
- const output = consoleSpy.mock.calls.flat().join('\n')
162
- const parsed = JSON.parse(output)
163
-
164
- expect(parsed.precision).toBe(0.92)
165
- expect(parsed.recall).toBe(0.95)
166
- expect(parsed.passed).toBe(true)
167
- })
168
-
169
- it('should exit with code 1 when gates fail', async () => {
170
- const mockDataset = [
171
- {
172
- message: 'Test message',
173
- expectedCategory: 'needs_response',
174
- expectedRoute: 'classifier' as const,
175
- },
176
- ]
177
- const mockReport = {
178
- precision: 0.85, // Below threshold
179
- recall: 0.88, // Below threshold
180
- fpRate: 0.05,
181
- fnRate: 0.04,
182
- byCategory: {},
183
- cost: {
184
- tokens: 5000,
185
- estimatedUsd: 0.00125,
186
- },
187
- latency: {
188
- p50: 120,
189
- p95: 250,
190
- p99: 350,
191
- },
192
- passed: false,
193
- }
194
-
195
- vi.mocked(fs.access).mockResolvedValue(undefined)
196
- vi.mocked(fs.readFile).mockResolvedValue(JSON.stringify(mockDataset))
197
-
198
- const { evalRouting } = await import('@skillrecordings/core/evals/routing')
199
- vi.mocked(evalRouting).mockResolvedValue(mockReport)
200
-
201
- await expect(
202
- runEval('routing', '/path/to/dataset.json', {
203
- gates: { minPrecision: 0.92, minRecall: 0.95 },
204
- })
205
- ).rejects.toThrow('process.exit(1)')
206
- })
207
-
208
- it('should accept custom gates', async () => {
209
- const mockDataset = [
210
- {
211
- message: 'Test message',
212
- expectedCategory: 'needs_response',
213
- expectedRoute: 'classifier' as const,
214
- },
215
- ]
216
- const mockReport = {
217
- precision: 0.92,
218
- recall: 0.95,
219
- fpRate: 0.03,
220
- fnRate: 0.02,
221
- byCategory: {},
222
- cost: {
223
- tokens: 5000,
224
- estimatedUsd: 0.00125,
225
- },
226
- latency: {
227
- p50: 120,
228
- p95: 250,
229
- p99: 350,
230
- },
231
- passed: true,
232
- }
233
-
234
- vi.mocked(fs.access).mockResolvedValue(undefined)
235
- vi.mocked(fs.readFile).mockResolvedValue(JSON.stringify(mockDataset))
236
-
237
- const { evalRouting } = await import('@skillrecordings/core/evals/routing')
238
- vi.mocked(evalRouting).mockResolvedValue(mockReport)
239
-
240
- // Test with custom gates
241
- await expect(
242
- runEval('routing', '/path/to/dataset.json', {
243
- gates: { minPrecision: 0.9, minRecall: 0.93, maxFpRate: 0.05 },
244
- })
245
- ).rejects.toThrow('process.exit(0)')
246
-
247
- expect(evalRouting).toHaveBeenCalledWith(mockDataset, {
248
- minPrecision: 0.9,
249
- minRecall: 0.93,
250
- maxFpRate: 0.05,
251
- })
252
- })
253
- })
@@ -1,108 +0,0 @@
1
- import { access, readFile } from 'node:fs/promises'
2
- import type {
3
- EvalDatapoint,
4
- EvalGates,
5
- EvalReport,
6
- } from '@skillrecordings/core/evals/routing'
7
-
8
- export interface EvalOptions {
9
- json?: boolean
10
- gates?: EvalGates
11
- }
12
-
13
- /**
14
- * Run evals against a dataset
15
- *
16
- * Usage: skill eval routing --dataset path/to/dataset.json [--gates strict|relaxed] [--json]
17
- *
18
- * @param evalType - Type of eval to run (currently only 'routing' supported)
19
- * @param datasetPath - Path to JSON dataset file
20
- * @param options - Command options
21
- */
22
- export async function runEval(
23
- evalType: string,
24
- datasetPath: string | undefined,
25
- options: EvalOptions = {}
26
- ): Promise<void> {
27
- const { json = false, gates } = options
28
-
29
- // Validate inputs
30
- if (!datasetPath) {
31
- console.error(
32
- 'Error: Dataset path is required. Usage: skill eval routing --dataset <path>'
33
- )
34
- process.exit(1)
35
- }
36
-
37
- // Check if file exists
38
- try {
39
- await access(datasetPath)
40
- } catch {
41
- console.error(`Error: Dataset file not found: ${datasetPath}`)
42
- process.exit(1)
43
- }
44
-
45
- // Read and parse dataset
46
- let dataset: EvalDatapoint[]
47
- try {
48
- const content = await readFile(datasetPath, 'utf-8')
49
- dataset = JSON.parse(content)
50
- } catch (error) {
51
- console.error(
52
- `Error: Invalid JSON in dataset file: ${error instanceof Error ? error.message : 'Unknown error'}`
53
- )
54
- process.exit(1)
55
- }
56
-
57
- // Import evalRouting (only when needed to avoid circular deps)
58
- const { evalRouting } = await import('@skillrecordings/core/evals/routing')
59
-
60
- // Run eval with optional gates
61
- const report: EvalReport = await evalRouting(dataset, gates)
62
-
63
- // Output results
64
- if (json) {
65
- console.log(JSON.stringify(report, null, 2))
66
- } else {
67
- printPrettyResults(report)
68
- }
69
-
70
- // Exit with appropriate code
71
- process.exit(report.passed ? 0 : 1)
72
- }
73
-
74
- /**
75
- * Print pretty-formatted results table
76
- */
77
- function printPrettyResults(report: EvalReport): void {
78
- console.log('\nšŸ“Š Evaluation Results\n')
79
- console.log('Overall Metrics:')
80
- console.log(` Precision: ${(report.precision * 100).toFixed(1)}%`)
81
- console.log(` Recall: ${(report.recall * 100).toFixed(1)}%`)
82
- console.log(` False Positive Rate: ${(report.fpRate * 100).toFixed(1)}%`)
83
- console.log(` False Negative Rate: ${(report.fnRate * 100).toFixed(1)}%`)
84
-
85
- console.log('\nPerformance:')
86
- console.log(` Latency (p50): ${report.latency.p50.toFixed(0)}ms`)
87
- console.log(` Latency (p95): ${report.latency.p95.toFixed(0)}ms`)
88
- console.log(` Latency (p99): ${report.latency.p99.toFixed(0)}ms`)
89
- console.log(` Total Tokens: ${report.cost.tokens.toLocaleString()}`)
90
- console.log(` Estimated Cost: $${report.cost.estimatedUsd.toFixed(4)}`)
91
-
92
- if (Object.keys(report.byCategory).length > 0) {
93
- console.log('\nCategory Breakdown:')
94
- for (const [category, metrics] of Object.entries(report.byCategory)) {
95
- console.log(`\n ${category}:`)
96
- console.log(` Precision: ${(metrics.precision * 100).toFixed(1)}%`)
97
- console.log(` Recall: ${(metrics.recall * 100).toFixed(1)}%`)
98
- console.log(` F1: ${(metrics.f1 * 100).toFixed(1)}%`)
99
- console.log(` Count: ${metrics.count}`)
100
- }
101
- }
102
-
103
- console.log(`\n${report.passed ? 'āœ… PASSED' : 'āŒ FAILED'}`)
104
-
105
- if (!report.passed) {
106
- console.log('\nOne or more metrics fell below threshold gates.')
107
- }
108
- }