@skillrecordings/cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/bin/skill.mjs +27 -0
  2. package/dist/chunk-2NCCVTEE.js +22342 -0
  3. package/dist/chunk-2NCCVTEE.js.map +1 -0
  4. package/dist/chunk-3E3GYSZR.js +7071 -0
  5. package/dist/chunk-3E3GYSZR.js.map +1 -0
  6. package/dist/chunk-F4EM72IH.js +86 -0
  7. package/dist/chunk-F4EM72IH.js.map +1 -0
  8. package/dist/chunk-FGP7KUQW.js +432 -0
  9. package/dist/chunk-FGP7KUQW.js.map +1 -0
  10. package/dist/chunk-H3D6VCME.js +55 -0
  11. package/dist/chunk-H3D6VCME.js.map +1 -0
  12. package/dist/chunk-HK3PEWFD.js +208 -0
  13. package/dist/chunk-HK3PEWFD.js.map +1 -0
  14. package/dist/chunk-KEV3QKXP.js +4495 -0
  15. package/dist/chunk-KEV3QKXP.js.map +1 -0
  16. package/dist/chunk-MG37YDAK.js +882 -0
  17. package/dist/chunk-MG37YDAK.js.map +1 -0
  18. package/dist/chunk-MLNDSBZ4.js +482 -0
  19. package/dist/chunk-MLNDSBZ4.js.map +1 -0
  20. package/dist/chunk-N2WIV2JV.js +22 -0
  21. package/dist/chunk-N2WIV2JV.js.map +1 -0
  22. package/dist/chunk-PWWRCN5W.js +2067 -0
  23. package/dist/chunk-PWWRCN5W.js.map +1 -0
  24. package/dist/chunk-SKHBM3XP.js +7746 -0
  25. package/dist/chunk-SKHBM3XP.js.map +1 -0
  26. package/dist/chunk-WFANXVQG.js +64 -0
  27. package/dist/chunk-WFANXVQG.js.map +1 -0
  28. package/dist/chunk-WYKL32C3.js +275 -0
  29. package/dist/chunk-WYKL32C3.js.map +1 -0
  30. package/dist/chunk-ZNF7XD2S.js +134 -0
  31. package/dist/chunk-ZNF7XD2S.js.map +1 -0
  32. package/dist/config-AUAIYDSI.js +20 -0
  33. package/dist/config-AUAIYDSI.js.map +1 -0
  34. package/dist/fileFromPath-XN7LXIBI.js +134 -0
  35. package/dist/fileFromPath-XN7LXIBI.js.map +1 -0
  36. package/dist/getMachineId-bsd-KW2E7VK3.js +42 -0
  37. package/dist/getMachineId-bsd-KW2E7VK3.js.map +1 -0
  38. package/dist/getMachineId-darwin-ROXJUJX5.js +42 -0
  39. package/dist/getMachineId-darwin-ROXJUJX5.js.map +1 -0
  40. package/dist/getMachineId-linux-KVZEHQSU.js +34 -0
  41. package/dist/getMachineId-linux-KVZEHQSU.js.map +1 -0
  42. package/dist/getMachineId-unsupported-PPRILPPA.js +25 -0
  43. package/dist/getMachineId-unsupported-PPRILPPA.js.map +1 -0
  44. package/dist/getMachineId-win-IIF36LEJ.js +44 -0
  45. package/dist/getMachineId-win-IIF36LEJ.js.map +1 -0
  46. package/dist/index.js +112703 -0
  47. package/dist/index.js.map +1 -0
  48. package/dist/lib-R6DEEJCP.js +7623 -0
  49. package/dist/lib-R6DEEJCP.js.map +1 -0
  50. package/dist/pipeline-IAVVAKTU.js +120 -0
  51. package/dist/pipeline-IAVVAKTU.js.map +1 -0
  52. package/dist/query-NTP5NVXN.js +25 -0
  53. package/dist/query-NTP5NVXN.js.map +1 -0
  54. package/dist/routing-BAEPFB7V.js +390 -0
  55. package/dist/routing-BAEPFB7V.js.map +1 -0
  56. package/dist/stripe-lookup-charge-EPRUMZDL.js +56 -0
  57. package/dist/stripe-lookup-charge-EPRUMZDL.js.map +1 -0
  58. package/dist/stripe-payment-history-SJPKA63N.js +67 -0
  59. package/dist/stripe-payment-history-SJPKA63N.js.map +1 -0
  60. package/dist/stripe-subscription-status-L4Z65GB3.js +58 -0
  61. package/dist/stripe-subscription-status-L4Z65GB3.js.map +1 -0
  62. package/dist/stripe-verify-refund-FZDKCIUQ.js +54 -0
  63. package/dist/stripe-verify-refund-FZDKCIUQ.js.map +1 -0
  64. package/dist/support-memory-WSG7SDKG.js +10 -0
  65. package/dist/support-memory-WSG7SDKG.js.map +1 -0
  66. package/package.json +10 -7
  67. package/.env.encrypted +0 -0
  68. package/CHANGELOG.md +0 -35
  69. package/data/tt-archive-dataset.json +0 -1
  70. package/data/validate-test-dataset.json +0 -97
  71. package/docs/CLI-AUTH.md +0 -504
  72. package/preload.ts +0 -18
  73. package/src/__tests__/init.test.ts +0 -74
  74. package/src/alignment-test.ts +0 -64
  75. package/src/check-apps.ts +0 -16
  76. package/src/commands/auth/decrypt.ts +0 -123
  77. package/src/commands/auth/encrypt.ts +0 -81
  78. package/src/commands/auth/index.ts +0 -50
  79. package/src/commands/auth/keygen.ts +0 -41
  80. package/src/commands/auth/status.ts +0 -164
  81. package/src/commands/axiom/forensic.ts +0 -868
  82. package/src/commands/axiom/index.ts +0 -697
  83. package/src/commands/build-dataset.ts +0 -311
  84. package/src/commands/db-status.ts +0 -47
  85. package/src/commands/deploys.ts +0 -219
  86. package/src/commands/eval-local/compare.ts +0 -171
  87. package/src/commands/eval-local/health.ts +0 -212
  88. package/src/commands/eval-local/index.ts +0 -76
  89. package/src/commands/eval-local/real-tools.ts +0 -416
  90. package/src/commands/eval-local/run.ts +0 -1168
  91. package/src/commands/eval-local/score-production.ts +0 -256
  92. package/src/commands/eval-local/seed.ts +0 -276
  93. package/src/commands/eval-pipeline/index.ts +0 -53
  94. package/src/commands/eval-pipeline/real-tools.ts +0 -492
  95. package/src/commands/eval-pipeline/run.ts +0 -1316
  96. package/src/commands/eval-pipeline/seed.ts +0 -395
  97. package/src/commands/eval-prompt.ts +0 -496
  98. package/src/commands/eval.test.ts +0 -253
  99. package/src/commands/eval.ts +0 -108
  100. package/src/commands/faq-classify.ts +0 -460
  101. package/src/commands/faq-cluster.ts +0 -135
  102. package/src/commands/faq-extract.ts +0 -249
  103. package/src/commands/faq-mine.ts +0 -432
  104. package/src/commands/faq-review.ts +0 -426
  105. package/src/commands/front/index.ts +0 -351
  106. package/src/commands/front/pull-conversations.ts +0 -275
  107. package/src/commands/front/tags.ts +0 -825
  108. package/src/commands/front-cache.ts +0 -1277
  109. package/src/commands/front-stats.ts +0 -75
  110. package/src/commands/health.test.ts +0 -82
  111. package/src/commands/health.ts +0 -362
  112. package/src/commands/init.test.ts +0 -89
  113. package/src/commands/init.ts +0 -106
  114. package/src/commands/inngest/client.ts +0 -294
  115. package/src/commands/inngest/events.ts +0 -296
  116. package/src/commands/inngest/investigate.ts +0 -382
  117. package/src/commands/inngest/runs.ts +0 -149
  118. package/src/commands/inngest/signal.ts +0 -143
  119. package/src/commands/kb-sync.ts +0 -498
  120. package/src/commands/memory/find.ts +0 -135
  121. package/src/commands/memory/get.ts +0 -87
  122. package/src/commands/memory/index.ts +0 -97
  123. package/src/commands/memory/stats.ts +0 -163
  124. package/src/commands/memory/store.ts +0 -49
  125. package/src/commands/memory/vote.ts +0 -159
  126. package/src/commands/pipeline.ts +0 -127
  127. package/src/commands/responses.ts +0 -856
  128. package/src/commands/tools.ts +0 -293
  129. package/src/commands/wizard.ts +0 -319
  130. package/src/index.ts +0 -172
  131. package/src/lib/crypto.ts +0 -56
  132. package/src/lib/env-loader.ts +0 -206
  133. package/src/lib/onepassword.ts +0 -137
  134. package/src/test-agent-local.ts +0 -115
  135. package/tsconfig.json +0 -11
  136. package/vitest.config.ts +0 -10
@@ -1,97 +0,0 @@
1
- import type { Command } from 'commander'
2
- import { find } from './find'
3
- import { get } from './get'
4
- import { stale, stats } from './stats'
5
- import { store } from './store'
6
- import { deleteMemory, downvote, upvote, validate } from './vote'
7
-
8
- /**
9
- * Register memory commands with Commander
10
- */
11
- export function registerMemoryCommands(program: Command): void {
12
- const memory = program
13
- .command('memory')
14
- .description('Manage semantic memory for agent learning')
15
-
16
- memory
17
- .command('store')
18
- .description('Store a new memory')
19
- .argument('<content>', 'Memory content to store')
20
- .option('--tags <tags>', 'Comma-separated tags')
21
- .option('--collection <collection>', 'Collection name (default: learnings)')
22
- .option('--app <app>', 'App slug to associate with memory')
23
- .option('--json', 'Output as JSON')
24
- .action(store)
25
-
26
- memory
27
- .command('find')
28
- .description('Search memories by semantic similarity')
29
- .argument('<query>', 'Search query text')
30
- .option('--limit <number>', 'Max results (1-100, default: 10)')
31
- .option('--collection <collection>', 'Collection name (default: learnings)')
32
- .option('--app <app>', 'Filter by app slug')
33
- .option(
34
- '--min-confidence <confidence>',
35
- 'Minimum confidence threshold (0-1, default: 0.5)'
36
- )
37
- .option('--json', 'Output as JSON')
38
- .action(find)
39
-
40
- memory
41
- .command('get')
42
- .description('Get a specific memory by ID')
43
- .argument('<id>', 'Memory ID')
44
- .option('--collection <collection>', 'Collection name (default: learnings)')
45
- .option('--json', 'Output as JSON')
46
- .action(get)
47
-
48
- memory
49
- .command('validate')
50
- .description('Validate a memory (resets decay clock)')
51
- .argument('<id>', 'Memory ID')
52
- .option('--collection <collection>', 'Collection name (default: learnings)')
53
- .option('--json', 'Output as JSON')
54
- .action(validate)
55
-
56
- memory
57
- .command('upvote')
58
- .description('Upvote a memory')
59
- .argument('<id>', 'Memory ID')
60
- .option('--collection <collection>', 'Collection name (default: learnings)')
61
- .option('--reason <reason>', 'Optional reason for upvote')
62
- .option('--json', 'Output as JSON')
63
- .action(upvote)
64
-
65
- memory
66
- .command('downvote')
67
- .description('Downvote a memory')
68
- .argument('<id>', 'Memory ID')
69
- .option('--collection <collection>', 'Collection name (default: learnings)')
70
- .option('--reason <reason>', 'Optional reason for downvote')
71
- .option('--json', 'Output as JSON')
72
- .action(downvote)
73
-
74
- memory
75
- .command('delete')
76
- .description('Delete a memory')
77
- .argument('<id>', 'Memory ID')
78
- .option('--collection <collection>', 'Collection name (default: learnings)')
79
- .option('--json', 'Output as JSON')
80
- .action(deleteMemory)
81
-
82
- memory
83
- .command('stats')
84
- .description('Display memory statistics')
85
- .option('--collection <collection>', 'Filter by collection')
86
- .option('--app <app>', 'Filter by app slug')
87
- .option('--json', 'Output as JSON')
88
- .action(stats)
89
-
90
- memory
91
- .command('stale')
92
- .description('List stale memories needing validation')
93
- .option('--collection <collection>', 'Filter by collection')
94
- .option('--threshold <threshold>', 'Confidence threshold (default: 0.25)')
95
- .option('--json', 'Output as JSON')
96
- .action(stale)
97
- }
@@ -1,163 +0,0 @@
1
- import { calculateConfidence } from '@skillrecordings/memory/decay'
2
- import { VotingService } from '@skillrecordings/memory/voting'
3
-
4
- /**
5
- * Display memory statistics
6
- */
7
- export async function stats(options: {
8
- collection?: string
9
- app?: string
10
- json?: boolean
11
- }): Promise<void> {
12
- try {
13
- const statsResult = await VotingService.stats(options.collection)
14
-
15
- if (options.json) {
16
- console.log(JSON.stringify(statsResult, null, 2))
17
- return
18
- }
19
-
20
- // Format output
21
- const collections = Object.keys(statsResult)
22
-
23
- if (collections.length === 0) {
24
- console.log('No memories found')
25
- return
26
- }
27
-
28
- console.log('\nMemory Statistics')
29
- console.log('─'.repeat(60))
30
-
31
- for (const collection of collections) {
32
- const stats = statsResult[collection]
33
- if (!stats) continue
34
-
35
- console.log(`\n${collection}:`)
36
- console.log(` Total memories: ${stats.count}`)
37
- console.log(
38
- ` Avg confidence: ${(stats.avg_confidence * 100).toFixed(1)}%`
39
- )
40
- console.log(` Upvotes: ${stats.total_upvotes}`)
41
- console.log(` Downvotes: ${stats.total_downvotes}`)
42
- console.log(` Citations: ${stats.total_citations}`)
43
- console.log(
44
- ` Avg success rate: ${(stats.avg_success_rate * 100).toFixed(1)}%`
45
- )
46
- }
47
-
48
- console.log('')
49
- } catch (error) {
50
- if (options.json) {
51
- console.error(
52
- JSON.stringify({
53
- error: error instanceof Error ? error.message : 'Unknown error',
54
- })
55
- )
56
- } else {
57
- console.error(
58
- 'Error:',
59
- error instanceof Error ? error.message : 'Unknown error'
60
- )
61
- }
62
- process.exit(1)
63
- }
64
- }
65
-
66
- /**
67
- * List stale memories (low confidence, needing validation)
68
- */
69
- export async function stale(options: {
70
- collection?: string
71
- threshold?: number
72
- json?: boolean
73
- }): Promise<void> {
74
- try {
75
- const threshold = options.threshold ?? 0.25
76
-
77
- // Get all collections or specified one
78
- const collections = options.collection
79
- ? [options.collection]
80
- : await VotingService._listCollections()
81
-
82
- const staleMemories: Array<{
83
- id: string
84
- collection: string
85
- confidence: number
86
- age_days: number
87
- content_preview: string
88
- }> = []
89
-
90
- // Check each collection for stale memories
91
- for (const collection of collections) {
92
- const memories = await VotingService._fetchAllMemories(collection)
93
-
94
- for (const memory of memories) {
95
- const confidence = calculateConfidence(memory)
96
-
97
- if (confidence < threshold) {
98
- const createdAt = new Date(memory.metadata.created_at)
99
- const lastValidatedAt = memory.metadata.last_validated_at
100
- ? new Date(memory.metadata.last_validated_at)
101
- : undefined
102
- const referenceDate = lastValidatedAt || createdAt
103
- const ageDays =
104
- (Date.now() - referenceDate.getTime()) / (24 * 60 * 60 * 1000)
105
-
106
- // Truncate content for preview
107
- const contentPreview =
108
- memory.content.length > 60
109
- ? memory.content.slice(0, 57) + '...'
110
- : memory.content
111
-
112
- staleMemories.push({
113
- id: memory.id,
114
- collection,
115
- confidence,
116
- age_days: ageDays,
117
- content_preview: contentPreview,
118
- })
119
- }
120
- }
121
- }
122
-
123
- if (options.json) {
124
- console.log(JSON.stringify(staleMemories, null, 2))
125
- return
126
- }
127
-
128
- if (staleMemories.length === 0) {
129
- console.log(
130
- `No stale memories found (threshold: ${(threshold * 100).toFixed(0)}%)`
131
- )
132
- return
133
- }
134
-
135
- console.log(
136
- `\nStale Memories (confidence < ${(threshold * 100).toFixed(0)}%)`
137
- )
138
- console.log('─'.repeat(80))
139
-
140
- for (const mem of staleMemories) {
141
- console.log(`\n${mem.id} [${mem.collection}]`)
142
- console.log(` Confidence: ${(mem.confidence * 100).toFixed(1)}%`)
143
- console.log(` Age: ${mem.age_days.toFixed(1)} days`)
144
- console.log(` Preview: ${mem.content_preview}`)
145
- }
146
-
147
- console.log('')
148
- } catch (error) {
149
- if (options.json) {
150
- console.error(
151
- JSON.stringify({
152
- error: error instanceof Error ? error.message : 'Unknown error',
153
- })
154
- )
155
- } else {
156
- console.error(
157
- 'Error:',
158
- error instanceof Error ? error.message : 'Unknown error'
159
- )
160
- }
161
- process.exit(1)
162
- }
163
- }
@@ -1,49 +0,0 @@
1
- import { MemoryService } from '@skillrecordings/memory/memory'
2
-
3
- /**
4
- * Store a new memory with optional tags and collection
5
- */
6
- export async function store(
7
- content: string,
8
- options: {
9
- tags?: string
10
- collection?: string
11
- app?: string
12
- json?: boolean
13
- }
14
- ): Promise<void> {
15
- try {
16
- const memory = await MemoryService.store(content, {
17
- collection: options.collection || 'learnings',
18
- source: 'human',
19
- app_slug: options.app,
20
- tags: options.tags?.split(',').map((t) => t.trim()) ?? [],
21
- })
22
-
23
- if (options.json) {
24
- console.log(JSON.stringify(memory, null, 2))
25
- } else {
26
- console.log(`āœ“ Stored memory: ${memory.id}`)
27
- if (memory.metadata.tags && memory.metadata.tags.length > 0) {
28
- console.log(` Tags: ${memory.metadata.tags.join(', ')}`)
29
- }
30
- if (memory.metadata.app_slug) {
31
- console.log(` App: ${memory.metadata.app_slug}`)
32
- }
33
- }
34
- } catch (error) {
35
- if (options.json) {
36
- console.error(
37
- JSON.stringify({
38
- error: error instanceof Error ? error.message : 'Unknown error',
39
- })
40
- )
41
- } else {
42
- console.error(
43
- 'Error:',
44
- error instanceof Error ? error.message : 'Unknown error'
45
- )
46
- }
47
- process.exit(1)
48
- }
49
- }
@@ -1,159 +0,0 @@
1
- import { MemoryService } from '@skillrecordings/memory/memory'
2
- import { VotingService } from '@skillrecordings/memory/voting'
3
-
4
- /**
5
- * Validate a memory (resets decay clock)
6
- */
7
- export async function validate(
8
- id: string,
9
- options: {
10
- collection?: string
11
- json?: boolean
12
- }
13
- ): Promise<void> {
14
- try {
15
- const collection = options.collection || 'learnings'
16
- await MemoryService.validate(id, collection)
17
-
18
- if (options.json) {
19
- console.log(JSON.stringify({ success: true, id }, null, 2))
20
- } else {
21
- console.log(`āœ“ Validated memory: ${id}`)
22
- console.log(' Decay clock has been reset')
23
- }
24
- } catch (error) {
25
- if (options.json) {
26
- console.error(
27
- JSON.stringify({
28
- error: error instanceof Error ? error.message : 'Unknown error',
29
- })
30
- )
31
- } else {
32
- console.error(
33
- 'Error:',
34
- error instanceof Error ? error.message : 'Unknown error'
35
- )
36
- }
37
- process.exit(1)
38
- }
39
- }
40
-
41
- /**
42
- * Upvote a memory
43
- */
44
- export async function upvote(
45
- id: string,
46
- options: {
47
- collection?: string
48
- reason?: string
49
- json?: boolean
50
- }
51
- ): Promise<void> {
52
- try {
53
- const collection = options.collection || 'learnings'
54
- await VotingService.vote(id, collection, 'upvote')
55
-
56
- if (options.json) {
57
- console.log(
58
- JSON.stringify({ success: true, id, vote: 'upvote' }, null, 2)
59
- )
60
- } else {
61
- console.log(`āœ“ Upvoted memory: ${id}`)
62
- if (options.reason) {
63
- console.log(` Reason: ${options.reason}`)
64
- }
65
- }
66
- } catch (error) {
67
- if (options.json) {
68
- console.error(
69
- JSON.stringify({
70
- error: error instanceof Error ? error.message : 'Unknown error',
71
- })
72
- )
73
- } else {
74
- console.error(
75
- 'Error:',
76
- error instanceof Error ? error.message : 'Unknown error'
77
- )
78
- }
79
- process.exit(1)
80
- }
81
- }
82
-
83
- /**
84
- * Downvote a memory
85
- */
86
- export async function downvote(
87
- id: string,
88
- options: {
89
- collection?: string
90
- reason?: string
91
- json?: boolean
92
- }
93
- ): Promise<void> {
94
- try {
95
- const collection = options.collection || 'learnings'
96
- await VotingService.vote(id, collection, 'downvote')
97
-
98
- if (options.json) {
99
- console.log(
100
- JSON.stringify({ success: true, id, vote: 'downvote' }, null, 2)
101
- )
102
- } else {
103
- console.log(`āœ“ Downvoted memory: ${id}`)
104
- if (options.reason) {
105
- console.log(` Reason: ${options.reason}`)
106
- }
107
- }
108
- } catch (error) {
109
- if (options.json) {
110
- console.error(
111
- JSON.stringify({
112
- error: error instanceof Error ? error.message : 'Unknown error',
113
- })
114
- )
115
- } else {
116
- console.error(
117
- 'Error:',
118
- error instanceof Error ? error.message : 'Unknown error'
119
- )
120
- }
121
- process.exit(1)
122
- }
123
- }
124
-
125
- /**
126
- * Delete a memory
127
- */
128
- export async function deleteMemory(
129
- id: string,
130
- options: {
131
- collection?: string
132
- json?: boolean
133
- }
134
- ): Promise<void> {
135
- try {
136
- const collection = options.collection || 'learnings'
137
- await MemoryService.delete(id, collection)
138
-
139
- if (options.json) {
140
- console.log(JSON.stringify({ success: true, id }, null, 2))
141
- } else {
142
- console.log(`āœ“ Deleted memory: ${id}`)
143
- }
144
- } catch (error) {
145
- if (options.json) {
146
- console.error(
147
- JSON.stringify({
148
- error: error instanceof Error ? error.message : 'Unknown error',
149
- })
150
- )
151
- } else {
152
- console.error(
153
- 'Error:',
154
- error instanceof Error ? error.message : 'Unknown error'
155
- )
156
- }
157
- process.exit(1)
158
- }
159
- }
@@ -1,127 +0,0 @@
1
- /**
2
- * Pipeline CLI commands
3
- *
4
- * Commands for running and evaluating the pipeline steps.
5
- */
6
-
7
- import type { Command } from 'commander'
8
- import { runClassifyEval, buildClassifyDataset } from '@skillrecordings/core/pipeline/evals/classify.eval'
9
- import { runValidateEval, buildValidateDatasetFromProduction } from '@skillrecordings/core/pipeline/evals/validate.eval'
10
- import { runE2EEval } from '@skillrecordings/core/pipeline/evals/e2e.eval'
11
-
12
- export function registerPipelineCommands(program: Command): void {
13
- const pipeline = program
14
- .command('pipeline')
15
- .description('Pipeline step commands and evals')
16
-
17
- // -------------------------------------------------------------------------
18
- // Classify eval
19
- // -------------------------------------------------------------------------
20
- pipeline
21
- .command('eval-classify')
22
- .description('Run classifier evaluation')
23
- .requiredOption('--dataset <file>', 'Path to labeled scenarios JSON')
24
- .option('--output <file>', 'Save results to JSON')
25
- .option('--verbose', 'Show individual failures')
26
- .option('--json', 'JSON output')
27
- .option('--force-llm', 'Skip fast path, always use LLM')
28
- .option('--model <model>', 'Model for LLM classification', 'anthropic/claude-haiku-4-5')
29
- .action(async (opts) => {
30
- await runClassifyEval(opts)
31
- })
32
-
33
- pipeline
34
- .command('build-classify-dataset')
35
- .description('Build classify eval dataset from production data')
36
- .requiredOption('--production <file>', 'Production dataset JSON')
37
- .requiredOption('--output <file>', 'Output scenarios JSON')
38
- .action(async (opts) => {
39
- await buildClassifyDataset(opts.production, opts.output)
40
- })
41
-
42
- // -------------------------------------------------------------------------
43
- // Validate eval
44
- // -------------------------------------------------------------------------
45
- pipeline
46
- .command('eval-validate')
47
- .description('Run validator evaluation')
48
- .option('--dataset <file>', 'Path to scenarios JSON (uses built-in if not provided)')
49
- .option('--output <file>', 'Save results to JSON')
50
- .option('--verbose', 'Show individual failures')
51
- .option('--json', 'JSON output')
52
- .action(async (opts) => {
53
- await runValidateEval(opts)
54
- })
55
-
56
- pipeline
57
- .command('build-validate-dataset')
58
- .description('Build validate eval dataset from production failures')
59
- .requiredOption('--production <file>', 'Production baseline results JSON')
60
- .requiredOption('--output <file>', 'Output scenarios JSON')
61
- .action(async (opts) => {
62
- await buildValidateDatasetFromProduction(opts.production, opts.output)
63
- })
64
-
65
- // -------------------------------------------------------------------------
66
- // E2E eval
67
- // -------------------------------------------------------------------------
68
- pipeline
69
- .command('eval-e2e')
70
- .description('Run end-to-end pipeline evaluation')
71
- .requiredOption('--dataset <file>', 'Production dataset JSON')
72
- .option('--output <file>', 'Save results to JSON')
73
- .option('--verbose', 'Show individual failures')
74
- .option('--json', 'JSON output')
75
- .option('--limit <number>', 'Max scenarios to run', parseInt)
76
- .option('--model <model>', 'Model for LLM steps', 'anthropic/claude-haiku-4-5')
77
- .action(async (opts) => {
78
- await runE2EEval(opts)
79
- })
80
-
81
- // -------------------------------------------------------------------------
82
- // Run pipeline
83
- // -------------------------------------------------------------------------
84
- pipeline
85
- .command('run')
86
- .description('Run pipeline on a single message')
87
- .requiredOption('--subject <text>', 'Message subject')
88
- .requiredOption('--body <text>', 'Message body')
89
- .option('--app <id>', 'App ID', 'total-typescript')
90
- .option('--dry-run', 'Don\'t actually send', true)
91
- .option('--json', 'JSON output')
92
- .action(async (opts) => {
93
- const { runPipeline } = await import('@skillrecordings/core/pipeline')
94
-
95
- const result = await runPipeline({
96
- message: {
97
- subject: opts.subject,
98
- body: opts.body,
99
- },
100
- appConfig: {
101
- appId: opts.app,
102
- instructorConfigured: false,
103
- autoSendEnabled: false,
104
- },
105
- dryRun: opts.dryRun,
106
- })
107
-
108
- if (opts.json) {
109
- console.log(JSON.stringify(result, null, 2))
110
- } else {
111
- console.log(`\nšŸ“¬ Pipeline Result\n`)
112
- console.log(`Action: ${result.action}`)
113
- if (result.response) {
114
- console.log(`\nResponse:\n${result.response}`)
115
- }
116
- console.log(`\nSteps:`)
117
- for (const step of result.steps) {
118
- const icon = step.success ? 'āœ…' : 'āŒ'
119
- console.log(` ${icon} ${step.step} (${step.durationMs}ms)`)
120
- if (step.error) {
121
- console.log(` Error: ${step.error}`)
122
- }
123
- }
124
- console.log(`\nTotal: ${result.totalDurationMs}ms`)
125
- }
126
- })
127
- }