@skillrecordings/cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/bin/skill.mjs +27 -0
  2. package/dist/chunk-2NCCVTEE.js +22342 -0
  3. package/dist/chunk-2NCCVTEE.js.map +1 -0
  4. package/dist/chunk-3E3GYSZR.js +7071 -0
  5. package/dist/chunk-3E3GYSZR.js.map +1 -0
  6. package/dist/chunk-F4EM72IH.js +86 -0
  7. package/dist/chunk-F4EM72IH.js.map +1 -0
  8. package/dist/chunk-FGP7KUQW.js +432 -0
  9. package/dist/chunk-FGP7KUQW.js.map +1 -0
  10. package/dist/chunk-H3D6VCME.js +55 -0
  11. package/dist/chunk-H3D6VCME.js.map +1 -0
  12. package/dist/chunk-HK3PEWFD.js +208 -0
  13. package/dist/chunk-HK3PEWFD.js.map +1 -0
  14. package/dist/chunk-KEV3QKXP.js +4495 -0
  15. package/dist/chunk-KEV3QKXP.js.map +1 -0
  16. package/dist/chunk-MG37YDAK.js +882 -0
  17. package/dist/chunk-MG37YDAK.js.map +1 -0
  18. package/dist/chunk-MLNDSBZ4.js +482 -0
  19. package/dist/chunk-MLNDSBZ4.js.map +1 -0
  20. package/dist/chunk-N2WIV2JV.js +22 -0
  21. package/dist/chunk-N2WIV2JV.js.map +1 -0
  22. package/dist/chunk-PWWRCN5W.js +2067 -0
  23. package/dist/chunk-PWWRCN5W.js.map +1 -0
  24. package/dist/chunk-SKHBM3XP.js +7746 -0
  25. package/dist/chunk-SKHBM3XP.js.map +1 -0
  26. package/dist/chunk-WFANXVQG.js +64 -0
  27. package/dist/chunk-WFANXVQG.js.map +1 -0
  28. package/dist/chunk-WYKL32C3.js +275 -0
  29. package/dist/chunk-WYKL32C3.js.map +1 -0
  30. package/dist/chunk-ZNF7XD2S.js +134 -0
  31. package/dist/chunk-ZNF7XD2S.js.map +1 -0
  32. package/dist/config-AUAIYDSI.js +20 -0
  33. package/dist/config-AUAIYDSI.js.map +1 -0
  34. package/dist/fileFromPath-XN7LXIBI.js +134 -0
  35. package/dist/fileFromPath-XN7LXIBI.js.map +1 -0
  36. package/dist/getMachineId-bsd-KW2E7VK3.js +42 -0
  37. package/dist/getMachineId-bsd-KW2E7VK3.js.map +1 -0
  38. package/dist/getMachineId-darwin-ROXJUJX5.js +42 -0
  39. package/dist/getMachineId-darwin-ROXJUJX5.js.map +1 -0
  40. package/dist/getMachineId-linux-KVZEHQSU.js +34 -0
  41. package/dist/getMachineId-linux-KVZEHQSU.js.map +1 -0
  42. package/dist/getMachineId-unsupported-PPRILPPA.js +25 -0
  43. package/dist/getMachineId-unsupported-PPRILPPA.js.map +1 -0
  44. package/dist/getMachineId-win-IIF36LEJ.js +44 -0
  45. package/dist/getMachineId-win-IIF36LEJ.js.map +1 -0
  46. package/dist/index.js +112703 -0
  47. package/dist/index.js.map +1 -0
  48. package/dist/lib-R6DEEJCP.js +7623 -0
  49. package/dist/lib-R6DEEJCP.js.map +1 -0
  50. package/dist/pipeline-IAVVAKTU.js +120 -0
  51. package/dist/pipeline-IAVVAKTU.js.map +1 -0
  52. package/dist/query-NTP5NVXN.js +25 -0
  53. package/dist/query-NTP5NVXN.js.map +1 -0
  54. package/dist/routing-BAEPFB7V.js +390 -0
  55. package/dist/routing-BAEPFB7V.js.map +1 -0
  56. package/dist/stripe-lookup-charge-EPRUMZDL.js +56 -0
  57. package/dist/stripe-lookup-charge-EPRUMZDL.js.map +1 -0
  58. package/dist/stripe-payment-history-SJPKA63N.js +67 -0
  59. package/dist/stripe-payment-history-SJPKA63N.js.map +1 -0
  60. package/dist/stripe-subscription-status-L4Z65GB3.js +58 -0
  61. package/dist/stripe-subscription-status-L4Z65GB3.js.map +1 -0
  62. package/dist/stripe-verify-refund-FZDKCIUQ.js +54 -0
  63. package/dist/stripe-verify-refund-FZDKCIUQ.js.map +1 -0
  64. package/dist/support-memory-WSG7SDKG.js +10 -0
  65. package/dist/support-memory-WSG7SDKG.js.map +1 -0
  66. package/package.json +10 -7
  67. package/.env.encrypted +0 -0
  68. package/CHANGELOG.md +0 -35
  69. package/data/tt-archive-dataset.json +0 -1
  70. package/data/validate-test-dataset.json +0 -97
  71. package/docs/CLI-AUTH.md +0 -504
  72. package/preload.ts +0 -18
  73. package/src/__tests__/init.test.ts +0 -74
  74. package/src/alignment-test.ts +0 -64
  75. package/src/check-apps.ts +0 -16
  76. package/src/commands/auth/decrypt.ts +0 -123
  77. package/src/commands/auth/encrypt.ts +0 -81
  78. package/src/commands/auth/index.ts +0 -50
  79. package/src/commands/auth/keygen.ts +0 -41
  80. package/src/commands/auth/status.ts +0 -164
  81. package/src/commands/axiom/forensic.ts +0 -868
  82. package/src/commands/axiom/index.ts +0 -697
  83. package/src/commands/build-dataset.ts +0 -311
  84. package/src/commands/db-status.ts +0 -47
  85. package/src/commands/deploys.ts +0 -219
  86. package/src/commands/eval-local/compare.ts +0 -171
  87. package/src/commands/eval-local/health.ts +0 -212
  88. package/src/commands/eval-local/index.ts +0 -76
  89. package/src/commands/eval-local/real-tools.ts +0 -416
  90. package/src/commands/eval-local/run.ts +0 -1168
  91. package/src/commands/eval-local/score-production.ts +0 -256
  92. package/src/commands/eval-local/seed.ts +0 -276
  93. package/src/commands/eval-pipeline/index.ts +0 -53
  94. package/src/commands/eval-pipeline/real-tools.ts +0 -492
  95. package/src/commands/eval-pipeline/run.ts +0 -1316
  96. package/src/commands/eval-pipeline/seed.ts +0 -395
  97. package/src/commands/eval-prompt.ts +0 -496
  98. package/src/commands/eval.test.ts +0 -253
  99. package/src/commands/eval.ts +0 -108
  100. package/src/commands/faq-classify.ts +0 -460
  101. package/src/commands/faq-cluster.ts +0 -135
  102. package/src/commands/faq-extract.ts +0 -249
  103. package/src/commands/faq-mine.ts +0 -432
  104. package/src/commands/faq-review.ts +0 -426
  105. package/src/commands/front/index.ts +0 -351
  106. package/src/commands/front/pull-conversations.ts +0 -275
  107. package/src/commands/front/tags.ts +0 -825
  108. package/src/commands/front-cache.ts +0 -1277
  109. package/src/commands/front-stats.ts +0 -75
  110. package/src/commands/health.test.ts +0 -82
  111. package/src/commands/health.ts +0 -362
  112. package/src/commands/init.test.ts +0 -89
  113. package/src/commands/init.ts +0 -106
  114. package/src/commands/inngest/client.ts +0 -294
  115. package/src/commands/inngest/events.ts +0 -296
  116. package/src/commands/inngest/investigate.ts +0 -382
  117. package/src/commands/inngest/runs.ts +0 -149
  118. package/src/commands/inngest/signal.ts +0 -143
  119. package/src/commands/kb-sync.ts +0 -498
  120. package/src/commands/memory/find.ts +0 -135
  121. package/src/commands/memory/get.ts +0 -87
  122. package/src/commands/memory/index.ts +0 -97
  123. package/src/commands/memory/stats.ts +0 -163
  124. package/src/commands/memory/store.ts +0 -49
  125. package/src/commands/memory/vote.ts +0 -159
  126. package/src/commands/pipeline.ts +0 -127
  127. package/src/commands/responses.ts +0 -856
  128. package/src/commands/tools.ts +0 -293
  129. package/src/commands/wizard.ts +0 -319
  130. package/src/index.ts +0 -172
  131. package/src/lib/crypto.ts +0 -56
  132. package/src/lib/env-loader.ts +0 -206
  133. package/src/lib/onepassword.ts +0 -137
  134. package/src/test-agent-local.ts +0 -115
  135. package/tsconfig.json +0 -11
  136. package/vitest.config.ts +0 -10
@@ -1,256 +0,0 @@
1
- /**
2
- * Score REAL production responses from the dataset
3
- *
4
- * No mocks, no generation - just score what was actually sent to customers.
5
- * This gives us the TRUE baseline quality of production.
6
- */
7
-
8
- import {
9
- BannedPhrases,
10
- Helpfulness,
11
- InternalStateLeakage,
12
- MetaCommentary,
13
- ProductFabrication,
14
- } from '@skillrecordings/core/evals/scorers'
15
- import { readFile, writeFile } from 'fs/promises'
16
-
17
- interface DatasetItem {
18
- id: string
19
- app: string
20
- conversationId: string
21
- customerEmail: string
22
- triggerMessage: {
23
- subject: string
24
- body: string
25
- timestamp: number
26
- }
27
- agentResponse?: {
28
- text: string
29
- category: string
30
- timestamp: string
31
- }
32
- conversationHistory: Array<{
33
- direction: 'in' | 'out'
34
- body: string
35
- timestamp: number
36
- }>
37
- }
38
-
39
- interface ScoreResult {
40
- id: string
41
- subject: string
42
- hadResponse: boolean
43
- productionResponse: string
44
- scores: {
45
- internalLeaks: { passed: boolean; matches: string[] }
46
- metaCommentary: { passed: boolean; matches: string[] }
47
- bannedPhrases: { passed: boolean; matches: string[] }
48
- fabrication: { passed: boolean; matches: string[] }
49
- helpfulness: { score: number }
50
- }
51
- passed: boolean
52
- failureReasons: string[]
53
- }
54
-
55
- interface ScoreOptions {
56
- dataset: string
57
- output?: string
58
- verbose?: boolean
59
- json?: boolean
60
- }
61
-
62
- export async function scoreProduction(options: ScoreOptions): Promise<void> {
63
- const { dataset: datasetPath, output, verbose, json } = options
64
-
65
- const datasetContent = await readFile(datasetPath, 'utf-8')
66
- const dataset: DatasetItem[] = JSON.parse(datasetContent)
67
-
68
- if (!json) {
69
- console.log(`\n📊 Scoring ${dataset.length} production responses\n`)
70
- }
71
-
72
- const results: ScoreResult[] = []
73
- let passed = 0
74
- let failed = 0
75
- let noResponse = 0
76
-
77
- const failures = {
78
- internalLeaks: 0,
79
- metaCommentary: 0,
80
- bannedPhrases: 0,
81
- fabrication: 0,
82
- }
83
-
84
- for (const item of dataset) {
85
- const response = item.agentResponse?.text || ''
86
- const subject = item.triggerMessage?.subject || 'Unknown'
87
-
88
- if (!response || response.trim().length === 0) {
89
- noResponse++
90
- results.push({
91
- id: item.id,
92
- subject,
93
- hadResponse: false,
94
- productionResponse: '',
95
- scores: {
96
- internalLeaks: { passed: true, matches: [] },
97
- metaCommentary: { passed: true, matches: [] },
98
- bannedPhrases: { passed: true, matches: [] },
99
- fabrication: { passed: true, matches: [] },
100
- helpfulness: { score: 0 },
101
- },
102
- passed: true, // No response = can't fail quality
103
- failureReasons: [],
104
- })
105
- continue
106
- }
107
-
108
- // Score the production response
109
- const leakResult = InternalStateLeakage({ output: response })
110
- const metaResult = MetaCommentary({ output: response })
111
- const bannedResult = BannedPhrases({ output: response })
112
- const fabResult = ProductFabrication({ output: response })
113
- const helpResult = Helpfulness({ output: response })
114
-
115
- const scores = {
116
- internalLeaks: {
117
- passed: leakResult.score === 1,
118
- matches: leakResult.metadata?.foundLeaks || [],
119
- },
120
- metaCommentary: {
121
- passed: metaResult.score === 1,
122
- matches: metaResult.metadata?.foundMeta || [],
123
- },
124
- bannedPhrases: {
125
- passed: bannedResult.score === 1,
126
- matches: bannedResult.metadata?.foundBanned || [],
127
- },
128
- fabrication: {
129
- passed: fabResult.score === 1,
130
- matches: fabResult.metadata?.foundFabrication || [],
131
- },
132
- helpfulness: {
133
- score: helpResult.score,
134
- },
135
- }
136
-
137
- const failureReasons: string[] = []
138
- if (!scores.internalLeaks.passed) {
139
- failureReasons.push(
140
- `Internal leak: ${scores.internalLeaks.matches.join(', ')}`
141
- )
142
- failures.internalLeaks++
143
- }
144
- if (!scores.metaCommentary.passed) {
145
- failureReasons.push(
146
- `Meta commentary: ${scores.metaCommentary.matches.join(', ')}`
147
- )
148
- failures.metaCommentary++
149
- }
150
- if (!scores.bannedPhrases.passed) {
151
- failureReasons.push(
152
- `Banned phrase: ${scores.bannedPhrases.matches.join(', ')}`
153
- )
154
- failures.bannedPhrases++
155
- }
156
- if (!scores.fabrication.passed) {
157
- failureReasons.push(
158
- `Fabrication: ${scores.fabrication.matches.join(', ')}`
159
- )
160
- failures.fabrication++
161
- }
162
-
163
- const itemPassed = failureReasons.length === 0
164
- if (itemPassed) {
165
- passed++
166
- } else {
167
- failed++
168
- }
169
-
170
- results.push({
171
- id: item.id,
172
- subject,
173
- hadResponse: true,
174
- productionResponse: response,
175
- scores,
176
- passed: itemPassed,
177
- failureReasons,
178
- })
179
-
180
- if (verbose && !itemPassed) {
181
- console.log(`❌ ${subject.slice(0, 60)}...`)
182
- for (const reason of failureReasons) {
183
- console.log(` └─ ${reason}`)
184
- }
185
- }
186
- }
187
-
188
- // Summary
189
- const withResponses = passed + failed
190
- const passRate = withResponses > 0 ? (passed / withResponses) * 100 : 0
191
-
192
- if (output) {
193
- await writeFile(
194
- output,
195
- JSON.stringify(
196
- {
197
- summary: {
198
- total: dataset.length,
199
- withResponses,
200
- noResponse,
201
- passed,
202
- failed,
203
- passRate,
204
- failures,
205
- },
206
- results,
207
- },
208
- null,
209
- 2
210
- )
211
- )
212
- if (!json) console.log(`Results saved to ${output}`)
213
- }
214
-
215
- if (json) {
216
- console.log(
217
- JSON.stringify(
218
- {
219
- summary: {
220
- total: dataset.length,
221
- withResponses,
222
- noResponse,
223
- passed,
224
- failed,
225
- passRate,
226
- failures,
227
- },
228
- results,
229
- },
230
- null,
231
- 2
232
- )
233
- )
234
- } else {
235
- console.log('📊 Production Response Quality\n')
236
- console.log(`Total conversations: ${dataset.length}`)
237
- console.log(` With response: ${withResponses}`)
238
- console.log(` No response: ${noResponse}`)
239
- console.log('')
240
- console.log(`Quality (responses only):`)
241
- console.log(` ✅ Passed: ${passed} (${passRate.toFixed(1)}%)`)
242
- console.log(` ❌ Failed: ${failed}`)
243
-
244
- if (failed > 0) {
245
- console.log('\nFailure breakdown:')
246
- if (failures.internalLeaks > 0)
247
- console.log(` 🚨 Internal leaks: ${failures.internalLeaks}`)
248
- if (failures.metaCommentary > 0)
249
- console.log(` 💬 Meta-commentary: ${failures.metaCommentary}`)
250
- if (failures.bannedPhrases > 0)
251
- console.log(` 🚫 Banned phrases: ${failures.bannedPhrases}`)
252
- if (failures.fabrication > 0)
253
- console.log(` 🎭 Fabrication: ${failures.fabrication}`)
254
- }
255
- }
256
- }
@@ -1,276 +0,0 @@
1
- /**
2
- * Seed command for local eval environment
3
- * Populates MySQL and Qdrant with test fixtures
4
- */
5
-
6
- import { join } from 'path'
7
- import { createOllamaClient } from '@skillrecordings/core/adapters/ollama'
8
- import { createQdrantClient } from '@skillrecordings/core/adapters/qdrant'
9
- import { readFile, readdir } from 'fs/promises'
10
- import { glob } from 'glob'
11
- import matter from 'gray-matter'
12
-
13
- interface SeedOptions {
14
- clean?: boolean
15
- fixtures?: string
16
- json?: boolean
17
- }
18
-
19
- interface SeedResult {
20
- apps: number
21
- customers: number
22
- knowledge: number
23
- scenarios: number
24
- embeddings: number
25
- }
26
-
27
- export async function seed(options: SeedOptions): Promise<void> {
28
- const fixturesPath = options.fixtures || 'fixtures'
29
-
30
- if (!options.json) {
31
- console.log('\n🌱 Seeding local eval environment...\n')
32
- }
33
-
34
- const result: SeedResult = {
35
- apps: 0,
36
- customers: 0,
37
- knowledge: 0,
38
- scenarios: 0,
39
- embeddings: 0,
40
- }
41
-
42
- try {
43
- // Get MySQL connection
44
- const mysql = await import('mysql2/promise')
45
- const connection = await mysql.createConnection({
46
- host: 'localhost',
47
- port: 3306,
48
- user: 'eval_user',
49
- password: 'eval_pass',
50
- database: 'support_eval',
51
- })
52
-
53
- if (options.clean) {
54
- if (!options.json) console.log('🧹 Cleaning existing data...')
55
- await cleanDatabase(connection)
56
-
57
- // Also clean Qdrant
58
- const qdrant = createQdrantClient()
59
- await qdrant.deleteCollection()
60
- }
61
-
62
- // 1. Seed apps
63
- if (!options.json) console.log('📦 Seeding apps...')
64
- const apps = await loadJsonFiles(join(fixturesPath, 'apps'))
65
- result.apps = await seedApps(connection, apps)
66
-
67
- // 2. Seed customers (stored as JSON for mock lookups)
68
- if (!options.json) console.log('👥 Loading customer fixtures...')
69
- const customers = await loadJsonFiles(join(fixturesPath, 'customers'))
70
- result.customers = customers.length
71
- // Customers are used by mock integration client, not stored in DB
72
-
73
- // 3. Seed knowledge base with embeddings
74
- if (!options.json) console.log('📚 Seeding knowledge base...')
75
- const knowledge = await loadKnowledgeFiles(join(fixturesPath, 'knowledge'))
76
- result.knowledge = knowledge.length
77
- result.embeddings = await seedKnowledgeBase(knowledge)
78
-
79
- // 4. Count scenarios
80
- const scenarioFiles = await glob(join(fixturesPath, 'scenarios/**/*.json'))
81
- result.scenarios = scenarioFiles.length
82
-
83
- await connection.end()
84
-
85
- if (options.json) {
86
- console.log(JSON.stringify({ success: true, result }, null, 2))
87
- } else {
88
- console.log('\n✅ Seeding complete!\n')
89
- console.log(` Apps: ${result.apps}`)
90
- console.log(` Customers: ${result.customers}`)
91
- console.log(` Knowledge: ${result.knowledge} documents`)
92
- console.log(` Embeddings: ${result.embeddings}`)
93
- console.log(` Scenarios: ${result.scenarios}\n`)
94
- }
95
- } catch (error) {
96
- if (options.json) {
97
- console.log(
98
- JSON.stringify({
99
- success: false,
100
- error: error instanceof Error ? error.message : 'Unknown error',
101
- })
102
- )
103
- } else {
104
- console.error('❌ Seeding failed:', error)
105
- }
106
- process.exit(1)
107
- }
108
- }
109
-
110
- async function cleanDatabase(connection: any): Promise<void> {
111
- // Disable foreign key checks temporarily
112
- await connection.execute('SET FOREIGN_KEY_CHECKS = 0')
113
-
114
- const tables = [
115
- 'SUPPORT_trust_scores',
116
- 'SUPPORT_audit_log',
117
- 'SUPPORT_approval_requests',
118
- 'SUPPORT_actions',
119
- 'SUPPORT_conversations',
120
- 'SUPPORT_apps',
121
- ]
122
-
123
- for (const table of tables) {
124
- await connection.execute(`TRUNCATE TABLE ${table}`)
125
- }
126
-
127
- await connection.execute('SET FOREIGN_KEY_CHECKS = 1')
128
- }
129
-
130
- async function loadJsonFiles(dirPath: string): Promise<any[]> {
131
- try {
132
- const files = await readdir(dirPath)
133
- const jsonFiles = files.filter((f) => f.endsWith('.json'))
134
-
135
- const items = await Promise.all(
136
- jsonFiles.map(async (file) => {
137
- const content = await readFile(join(dirPath, file), 'utf-8')
138
- return JSON.parse(content)
139
- })
140
- )
141
-
142
- return items
143
- } catch (error) {
144
- return []
145
- }
146
- }
147
-
148
- interface KnowledgeDoc {
149
- id: string
150
- content: string
151
- type: string
152
- app: string
153
- tags: string[]
154
- filePath: string
155
- }
156
-
157
- function generateUUID(): string {
158
- // Simple UUID v4 generation
159
- return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => {
160
- const r = (Math.random() * 16) | 0
161
- const v = c === 'x' ? r : (r & 0x3) | 0x8
162
- return v.toString(16)
163
- })
164
- }
165
-
166
- async function loadKnowledgeFiles(basePath: string): Promise<KnowledgeDoc[]> {
167
- const files = await glob(join(basePath, '**/*.md'))
168
- const docs: KnowledgeDoc[] = []
169
-
170
- for (const filePath of files) {
171
- const content = await readFile(filePath, 'utf-8')
172
- const { data: frontmatter, content: body } = matter(content)
173
-
174
- // Generate UUID for Qdrant compatibility
175
- const id = generateUUID()
176
-
177
- docs.push({
178
- id,
179
- content: body.trim(),
180
- type: frontmatter.type || 'general',
181
- app: frontmatter.app || 'unknown',
182
- tags: frontmatter.tags || [],
183
- filePath,
184
- })
185
- }
186
-
187
- return docs
188
- }
189
-
190
- async function seedApps(connection: any, apps: any[]): Promise<number> {
191
- for (const app of apps) {
192
- await connection.execute(
193
- `INSERT INTO SUPPORT_apps (
194
- id, slug, name, front_inbox_id, instructor_teammate_id,
195
- stripe_account_id, stripe_connected, integration_base_url,
196
- webhook_secret, capabilities
197
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
198
- ON DUPLICATE KEY UPDATE
199
- name = VALUES(name),
200
- integration_base_url = VALUES(integration_base_url)`,
201
- [
202
- app.id,
203
- app.slug,
204
- app.name,
205
- app.front_inbox_id,
206
- app.instructor_teammate_id || null,
207
- app.stripe_account_id || null,
208
- app.stripe_connected || false,
209
- app.integration_base_url,
210
- app.webhook_secret,
211
- JSON.stringify(app.capabilities || []),
212
- ]
213
- )
214
-
215
- // Seed default trust scores for this app
216
- const categories = ['refund', 'access', 'technical', 'general']
217
- for (const category of categories) {
218
- const id = `ts_${app.id}_${category}`
219
- await connection.execute(
220
- `INSERT INTO SUPPORT_trust_scores (id, app_id, category, trust_score, sample_count)
221
- VALUES (?, ?, ?, 0.75, 25)
222
- ON DUPLICATE KEY UPDATE id = id`,
223
- [id, app.id, category]
224
- )
225
- }
226
- }
227
-
228
- return apps.length
229
- }
230
-
231
- async function seedKnowledgeBase(docs: KnowledgeDoc[]): Promise<number> {
232
- if (docs.length === 0) return 0
233
-
234
- const qdrant = createQdrantClient()
235
- const ollama = createOllamaClient()
236
-
237
- // Ensure model is available
238
- await ollama.ensureModel()
239
-
240
- // Ensure collection exists
241
- // Use 1024 for mxbai-embed-large, 768 for nomic-embed-text
242
- const embeddingModel = process.env.EMBEDDING_MODEL || 'mxbai-embed-large'
243
- const vectorSize = embeddingModel.includes('mxbai') ? 1024 : 768
244
- await qdrant.ensureCollection(vectorSize)
245
-
246
- let embeddedCount = 0
247
-
248
- for (const doc of docs) {
249
- try {
250
- // Generate embedding
251
- const embedding = await ollama.embed(doc.content)
252
-
253
- // Store in Qdrant
254
- await qdrant.upsert([
255
- {
256
- id: doc.id,
257
- vector: embedding,
258
- payload: {
259
- content: doc.content,
260
- type: doc.type,
261
- app: doc.app,
262
- tags: doc.tags,
263
- },
264
- },
265
- ])
266
-
267
- embeddedCount++
268
- process.stdout.write(`\r Embedded: ${embeddedCount}/${docs.length}`)
269
- } catch (error) {
270
- console.error(`\n Failed to embed ${doc.id}:`, error)
271
- }
272
- }
273
-
274
- console.log('') // New line after progress
275
- return embeddedCount
276
- }
@@ -1,53 +0,0 @@
1
- /**
2
- * Eval-pipeline CLI commands
3
- *
4
- * Run evals against individual pipeline steps or full e2e.
5
- * Uses actual pipeline implementations from @skillrecordings/core/pipeline.
6
- */
7
-
8
- import type { Command } from 'commander'
9
- import { run } from './run'
10
- import { seed } from './seed'
11
-
12
- export function registerEvalPipelineCommands(program: Command): void {
13
- const evalPipeline = program
14
- .command('eval-pipeline')
15
- .description('Evaluate pipeline steps against labeled scenarios')
16
-
17
- // Run subcommand (main functionality)
18
- evalPipeline
19
- .command('run')
20
- .description('Run eval suite against pipeline steps')
21
- .option(
22
- '--step <step>',
23
- 'Which step to test: classify | route | gather | draft | validate | e2e',
24
- 'classify'
25
- )
26
- .option('--scenarios <glob>', 'Scenario files glob pattern')
27
- .option('--dataset <file>', 'Dataset JSON file (alternative to scenarios)')
28
- .option('--limit <n>', 'Max scenarios to run', parseInt)
29
- .option('--verbose', 'Show individual scenario results')
30
- .option('--json', 'JSON output for scripting')
31
- .option(
32
- '--model <model>',
33
- 'Model for LLM steps',
34
- 'anthropic/claude-haiku-4-5'
35
- )
36
- .option('--force-llm', 'Skip fast path, always use LLM (classify step)')
37
- .option('--real-tools', 'Use real Docker MySQL/Qdrant instead of mocks')
38
- .option('--parallel <n>', 'Run N scenarios concurrently', parseInt, 10)
39
- .option('--cache-classify', 'Cache classify results between runs')
40
- .option('--clear-cache', 'Clear cached classify results before run')
41
- .option('--fail-fast', 'Stop on first failure')
42
- .option('--quick', 'Run smoke test subset (~10 scenarios)')
43
- .action(run)
44
-
45
- // Seed subcommand
46
- evalPipeline
47
- .command('seed')
48
- .description('Seed MySQL and Qdrant with test fixtures')
49
- .option('--clean', 'Drop and recreate all data')
50
- .option('--fixtures <path>', 'Path to fixtures directory', 'fixtures')
51
- .option('--json', 'JSON output for scripting')
52
- .action(seed)
53
- }