@skillrecordings/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.encrypted +0 -0
- package/CHANGELOG.md +35 -0
- package/README.md +214 -0
- package/bin/skill.ts +3 -0
- package/data/tt-archive-dataset.json +1 -0
- package/data/validate-test-dataset.json +97 -0
- package/docs/CLI-AUTH.md +504 -0
- package/package.json +38 -0
- package/preload.ts +18 -0
- package/src/__tests__/init.test.ts +74 -0
- package/src/alignment-test.ts +64 -0
- package/src/check-apps.ts +16 -0
- package/src/commands/auth/decrypt.ts +123 -0
- package/src/commands/auth/encrypt.ts +81 -0
- package/src/commands/auth/index.ts +50 -0
- package/src/commands/auth/keygen.ts +41 -0
- package/src/commands/auth/status.ts +164 -0
- package/src/commands/axiom/forensic.ts +868 -0
- package/src/commands/axiom/index.ts +697 -0
- package/src/commands/build-dataset.ts +311 -0
- package/src/commands/db-status.ts +47 -0
- package/src/commands/deploys.ts +219 -0
- package/src/commands/eval-local/compare.ts +171 -0
- package/src/commands/eval-local/health.ts +212 -0
- package/src/commands/eval-local/index.ts +76 -0
- package/src/commands/eval-local/real-tools.ts +416 -0
- package/src/commands/eval-local/run.ts +1168 -0
- package/src/commands/eval-local/score-production.ts +256 -0
- package/src/commands/eval-local/seed.ts +276 -0
- package/src/commands/eval-pipeline/index.ts +53 -0
- package/src/commands/eval-pipeline/real-tools.ts +492 -0
- package/src/commands/eval-pipeline/run.ts +1316 -0
- package/src/commands/eval-pipeline/seed.ts +395 -0
- package/src/commands/eval-prompt.ts +496 -0
- package/src/commands/eval.test.ts +253 -0
- package/src/commands/eval.ts +108 -0
- package/src/commands/faq-classify.ts +460 -0
- package/src/commands/faq-cluster.ts +135 -0
- package/src/commands/faq-extract.ts +249 -0
- package/src/commands/faq-mine.ts +432 -0
- package/src/commands/faq-review.ts +426 -0
- package/src/commands/front/index.ts +351 -0
- package/src/commands/front/pull-conversations.ts +275 -0
- package/src/commands/front/tags.ts +825 -0
- package/src/commands/front-cache.ts +1277 -0
- package/src/commands/front-stats.ts +75 -0
- package/src/commands/health.test.ts +82 -0
- package/src/commands/health.ts +362 -0
- package/src/commands/init.test.ts +89 -0
- package/src/commands/init.ts +106 -0
- package/src/commands/inngest/client.ts +294 -0
- package/src/commands/inngest/events.ts +296 -0
- package/src/commands/inngest/investigate.ts +382 -0
- package/src/commands/inngest/runs.ts +149 -0
- package/src/commands/inngest/signal.ts +143 -0
- package/src/commands/kb-sync.ts +498 -0
- package/src/commands/memory/find.ts +135 -0
- package/src/commands/memory/get.ts +87 -0
- package/src/commands/memory/index.ts +97 -0
- package/src/commands/memory/stats.ts +163 -0
- package/src/commands/memory/store.ts +49 -0
- package/src/commands/memory/vote.ts +159 -0
- package/src/commands/pipeline.ts +127 -0
- package/src/commands/responses.ts +856 -0
- package/src/commands/tools.ts +293 -0
- package/src/commands/wizard.ts +319 -0
- package/src/index.ts +172 -0
- package/src/lib/crypto.ts +56 -0
- package/src/lib/env-loader.ts +206 -0
- package/src/lib/onepassword.ts +137 -0
- package/src/test-agent-local.ts +115 -0
- package/tsconfig.json +11 -0
- package/vitest.config.ts +10 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Real tool implementations that query Docker services
|
|
3
|
+
*
|
|
4
|
+
* Unlike the mock tools, these actually hit MySQL and Qdrant
|
|
5
|
+
* for production-like eval behavior.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
type QdrantClient,
|
|
10
|
+
createQdrantClient,
|
|
11
|
+
} from '@skillrecordings/core/adapters/qdrant'
|
|
12
|
+
import { tool } from 'ai'
|
|
13
|
+
import { type Pool, createPool } from 'mysql2/promise'
|
|
14
|
+
import { z } from 'zod'
|
|
15
|
+
|
|
16
|
+
let mysqlPool: Pool | null = null
|
|
17
|
+
let qdrantClient: QdrantClient | null = null
|
|
18
|
+
|
|
19
|
+
export interface RealToolsConfig {
|
|
20
|
+
mysql?: {
|
|
21
|
+
host: string
|
|
22
|
+
port: number
|
|
23
|
+
user: string
|
|
24
|
+
password: string
|
|
25
|
+
database: string
|
|
26
|
+
}
|
|
27
|
+
qdrant?: {
|
|
28
|
+
url: string
|
|
29
|
+
collection: string
|
|
30
|
+
}
|
|
31
|
+
embedFn?: (text: string) => Promise<number[]>
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const DEFAULT_CONFIG: RealToolsConfig = {
|
|
35
|
+
mysql: {
|
|
36
|
+
host: 'localhost',
|
|
37
|
+
port: 3306,
|
|
38
|
+
user: 'eval_user',
|
|
39
|
+
password: 'eval_pass',
|
|
40
|
+
database: 'support_eval',
|
|
41
|
+
},
|
|
42
|
+
qdrant: {
|
|
43
|
+
url: 'http://localhost:6333',
|
|
44
|
+
collection: 'support_knowledge',
|
|
45
|
+
},
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Initialize connections to Docker services
|
|
50
|
+
*/
|
|
51
|
+
export async function initRealTools(
|
|
52
|
+
config: RealToolsConfig = DEFAULT_CONFIG
|
|
53
|
+
): Promise<void> {
|
|
54
|
+
if (config.mysql) {
|
|
55
|
+
mysqlPool = createPool({
|
|
56
|
+
...config.mysql,
|
|
57
|
+
waitForConnections: true,
|
|
58
|
+
connectionLimit: 5,
|
|
59
|
+
})
|
|
60
|
+
// Test connection
|
|
61
|
+
const conn = await mysqlPool.getConnection()
|
|
62
|
+
await conn.ping()
|
|
63
|
+
conn.release()
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (config.qdrant) {
|
|
67
|
+
qdrantClient = createQdrantClient()
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Clean up connections
|
|
73
|
+
*/
|
|
74
|
+
export async function cleanupRealTools(): Promise<void> {
|
|
75
|
+
if (mysqlPool) {
|
|
76
|
+
await mysqlPool.end()
|
|
77
|
+
mysqlPool = null
|
|
78
|
+
}
|
|
79
|
+
qdrantClient = null
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Create real tools that query Docker services
|
|
84
|
+
*/
|
|
85
|
+
export function createRealTools(
|
|
86
|
+
scenario: { appId?: string; customerEmail?: string },
|
|
87
|
+
embedFn?: (text: string) => Promise<number[]>
|
|
88
|
+
) {
|
|
89
|
+
const appId = scenario.appId || 'total-typescript'
|
|
90
|
+
const customerEmail = scenario.customerEmail || '[EMAIL]'
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
lookupUser: tool({
|
|
94
|
+
description: 'Look up user by email in the product database',
|
|
95
|
+
inputSchema: z.object({
|
|
96
|
+
email: z.string().describe('Customer email address'),
|
|
97
|
+
appId: z.string().describe('App/product identifier'),
|
|
98
|
+
}),
|
|
99
|
+
execute: async ({ email, appId: queryAppId }) => {
|
|
100
|
+
if (!mysqlPool) {
|
|
101
|
+
return { found: false, error: 'MySQL not connected' }
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
try {
|
|
105
|
+
// Look up customer in conversations table (we store customer emails there)
|
|
106
|
+
const [convRows] = (await mysqlPool.query(
|
|
107
|
+
`SELECT DISTINCT customer_email, customer_name
|
|
108
|
+
FROM SUPPORT_conversations
|
|
109
|
+
WHERE customer_email = ? AND (app_id = ? OR app_id IS NULL)
|
|
110
|
+
LIMIT 1`,
|
|
111
|
+
[email, queryAppId]
|
|
112
|
+
)) as any[]
|
|
113
|
+
|
|
114
|
+
// For eval purposes, also check if there's fixture data
|
|
115
|
+
// In real system this would call the integration client
|
|
116
|
+
if (convRows.length > 0) {
|
|
117
|
+
return {
|
|
118
|
+
found: true,
|
|
119
|
+
user: {
|
|
120
|
+
id: `user_${email.split('@')[0]}`,
|
|
121
|
+
email: convRows[0].customer_email,
|
|
122
|
+
name: convRows[0].customer_name || 'Customer',
|
|
123
|
+
},
|
|
124
|
+
purchases: [
|
|
125
|
+
{
|
|
126
|
+
id: `purch_${Date.now()}`,
|
|
127
|
+
product:
|
|
128
|
+
queryAppId === 'ai-hero'
|
|
129
|
+
? 'AI Hero Workshop'
|
|
130
|
+
: 'Total TypeScript',
|
|
131
|
+
date: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000)
|
|
132
|
+
.toISOString()
|
|
133
|
+
.split('T')[0],
|
|
134
|
+
status: 'active',
|
|
135
|
+
},
|
|
136
|
+
],
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
found: false,
|
|
142
|
+
user: null,
|
|
143
|
+
purchases: [],
|
|
144
|
+
}
|
|
145
|
+
} catch (error) {
|
|
146
|
+
console.error('lookupUser error:', error)
|
|
147
|
+
return { found: false, error: String(error) }
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
}),
|
|
151
|
+
|
|
152
|
+
searchKnowledge: tool({
|
|
153
|
+
description: 'Search the knowledge base for relevant information',
|
|
154
|
+
inputSchema: z.object({
|
|
155
|
+
query: z.string().describe('Search query'),
|
|
156
|
+
appId: z.string().describe('App/product identifier'),
|
|
157
|
+
}),
|
|
158
|
+
execute: async ({ query, appId: queryAppId }) => {
|
|
159
|
+
if (!qdrantClient || !embedFn) {
|
|
160
|
+
return { similarTickets: [], knowledge: [], goodResponses: [] }
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
const queryVector = await embedFn(query)
|
|
165
|
+
|
|
166
|
+
// Search the knowledge collection using our custom client
|
|
167
|
+
const results = await qdrantClient.search(queryVector, 5, {
|
|
168
|
+
should: [
|
|
169
|
+
{ key: 'app_id', match: { value: queryAppId } },
|
|
170
|
+
{ key: 'app_id', match: { value: 'general' } },
|
|
171
|
+
],
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
similarTickets: results
|
|
176
|
+
.filter((r) => r.payload?.type === 'ticket')
|
|
177
|
+
.map((r) => ({
|
|
178
|
+
data: r.payload?.content as string,
|
|
179
|
+
score: r.score,
|
|
180
|
+
})),
|
|
181
|
+
knowledge: results
|
|
182
|
+
.filter((r) => r.payload?.type === 'knowledge')
|
|
183
|
+
.map((r) => ({
|
|
184
|
+
data: r.payload?.content as string,
|
|
185
|
+
score: r.score,
|
|
186
|
+
})),
|
|
187
|
+
goodResponses: results
|
|
188
|
+
.filter((r) => r.payload?.type === 'response')
|
|
189
|
+
.map((r) => ({
|
|
190
|
+
data: r.payload?.content as string,
|
|
191
|
+
score: r.score,
|
|
192
|
+
})),
|
|
193
|
+
}
|
|
194
|
+
} catch (error) {
|
|
195
|
+
console.error('searchKnowledge error:', error)
|
|
196
|
+
return { similarTickets: [], knowledge: [], goodResponses: [] }
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
}),
|
|
200
|
+
|
|
201
|
+
searchProductContent: tool({
|
|
202
|
+
description: 'Search product content (courses, tutorials, etc)',
|
|
203
|
+
inputSchema: z.object({
|
|
204
|
+
query: z.string().describe('Search query'),
|
|
205
|
+
}),
|
|
206
|
+
execute: async ({ query }) => {
|
|
207
|
+
if (!qdrantClient || !embedFn) {
|
|
208
|
+
return { results: [] }
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
try {
|
|
212
|
+
const queryVector = await embedFn(query)
|
|
213
|
+
|
|
214
|
+
// Use our custom Qdrant client - note: this searches the default collection
|
|
215
|
+
// In practice, product content might be in a separate collection
|
|
216
|
+
const results = await qdrantClient.search(queryVector, 3)
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
results: results
|
|
220
|
+
.filter((r) => r.payload?.type === 'content')
|
|
221
|
+
.map((r) => ({
|
|
222
|
+
title: r.payload?.title as string,
|
|
223
|
+
type: (r.payload?.content_type as string) || 'course',
|
|
224
|
+
url: r.payload?.url as string,
|
|
225
|
+
})),
|
|
226
|
+
}
|
|
227
|
+
} catch (error) {
|
|
228
|
+
// Collection might not exist
|
|
229
|
+
return { results: [] }
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
}),
|
|
233
|
+
|
|
234
|
+
draftResponse: tool({
|
|
235
|
+
description: 'Draft a response to send to the customer',
|
|
236
|
+
inputSchema: z.object({
|
|
237
|
+
body: z.string().describe('The response body to draft'),
|
|
238
|
+
}),
|
|
239
|
+
execute: async ({ body }) => {
|
|
240
|
+
return { drafted: true, body }
|
|
241
|
+
},
|
|
242
|
+
}),
|
|
243
|
+
|
|
244
|
+
escalateToHuman: tool({
|
|
245
|
+
description: 'Escalate the conversation to human support',
|
|
246
|
+
inputSchema: z.object({
|
|
247
|
+
reason: z.string().describe('Reason for escalation'),
|
|
248
|
+
urgency: z.enum(['low', 'medium', 'high']).describe('Urgency level'),
|
|
249
|
+
}),
|
|
250
|
+
execute: async ({ reason, urgency }) => {
|
|
251
|
+
return { escalated: true, reason, urgency }
|
|
252
|
+
},
|
|
253
|
+
}),
|
|
254
|
+
|
|
255
|
+
assignToInstructor: tool({
|
|
256
|
+
description:
|
|
257
|
+
'Assign conversation to instructor for personal correspondence',
|
|
258
|
+
inputSchema: z.object({
|
|
259
|
+
conversationId: z.string(),
|
|
260
|
+
reason: z.string(),
|
|
261
|
+
}),
|
|
262
|
+
execute: async ({ conversationId, reason }) => ({
|
|
263
|
+
status: 'pending_approval',
|
|
264
|
+
conversationId,
|
|
265
|
+
reason,
|
|
266
|
+
message: 'Instructor assignment submitted for approval',
|
|
267
|
+
}),
|
|
268
|
+
}),
|
|
269
|
+
|
|
270
|
+
processRefund: tool({
|
|
271
|
+
description: 'Process a refund for a purchase',
|
|
272
|
+
inputSchema: z.object({
|
|
273
|
+
purchaseId: z.string(),
|
|
274
|
+
appId: z.string(),
|
|
275
|
+
reason: z.string(),
|
|
276
|
+
}),
|
|
277
|
+
execute: async ({ purchaseId, reason }) => ({
|
|
278
|
+
status: 'pending_approval',
|
|
279
|
+
purchaseId,
|
|
280
|
+
reason,
|
|
281
|
+
message: 'Refund submitted for approval',
|
|
282
|
+
}),
|
|
283
|
+
}),
|
|
284
|
+
|
|
285
|
+
transferPurchase: tool({
|
|
286
|
+
description: 'Transfer purchase to another email',
|
|
287
|
+
inputSchema: z.object({
|
|
288
|
+
purchaseId: z.string(),
|
|
289
|
+
appId: z.string(),
|
|
290
|
+
fromUserId: z.string(),
|
|
291
|
+
toEmail: z.string(),
|
|
292
|
+
reason: z.string(),
|
|
293
|
+
}),
|
|
294
|
+
execute: async () => ({
|
|
295
|
+
status: 'pending_approval',
|
|
296
|
+
message: 'Transfer submitted for approval',
|
|
297
|
+
}),
|
|
298
|
+
}),
|
|
299
|
+
|
|
300
|
+
getPaymentHistory: tool({
|
|
301
|
+
description: 'Get payment history from Stripe',
|
|
302
|
+
inputSchema: z.object({
|
|
303
|
+
customerEmail: z.string(),
|
|
304
|
+
limit: z.number().optional(),
|
|
305
|
+
}),
|
|
306
|
+
execute: async ({ customerEmail: email }) => {
|
|
307
|
+
// In real system, this would call Stripe
|
|
308
|
+
// For eval, return synthetic data based on whether user exists
|
|
309
|
+
if (!mysqlPool) {
|
|
310
|
+
return { charges: [] }
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
try {
|
|
314
|
+
const [rows] = (await mysqlPool.query(
|
|
315
|
+
`SELECT 1 FROM SUPPORT_conversations WHERE customer_email = ? LIMIT 1`,
|
|
316
|
+
[email]
|
|
317
|
+
)) as any[]
|
|
318
|
+
|
|
319
|
+
if (rows.length > 0) {
|
|
320
|
+
return {
|
|
321
|
+
charges: [
|
|
322
|
+
{
|
|
323
|
+
id: `ch_eval_${Date.now()}`,
|
|
324
|
+
amount: 24900,
|
|
325
|
+
status: 'succeeded',
|
|
326
|
+
created: Date.now() - 7 * 24 * 60 * 60 * 1000,
|
|
327
|
+
},
|
|
328
|
+
],
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return { charges: [] }
|
|
333
|
+
} catch {
|
|
334
|
+
return { charges: [] }
|
|
335
|
+
}
|
|
336
|
+
},
|
|
337
|
+
}),
|
|
338
|
+
|
|
339
|
+
check_product_availability: tool({
|
|
340
|
+
description: 'Check if product is available or sold out',
|
|
341
|
+
inputSchema: z.object({
|
|
342
|
+
productId: z.string().optional(),
|
|
343
|
+
appId: z.string(),
|
|
344
|
+
}),
|
|
345
|
+
execute: async () => ({
|
|
346
|
+
soldOut: false,
|
|
347
|
+
quantityRemaining: -1,
|
|
348
|
+
enrollmentOpen: true,
|
|
349
|
+
}),
|
|
350
|
+
}),
|
|
351
|
+
|
|
352
|
+
memory_search: tool({
|
|
353
|
+
description: 'Search semantic memory',
|
|
354
|
+
inputSchema: z.object({ query: z.string() }),
|
|
355
|
+
execute: async () => ({ results: [], total: 0 }),
|
|
356
|
+
}),
|
|
357
|
+
|
|
358
|
+
memory_store: tool({
|
|
359
|
+
description: 'Store learning in memory',
|
|
360
|
+
inputSchema: z.object({
|
|
361
|
+
content: z.string(),
|
|
362
|
+
tags: z.array(z.string()).optional(),
|
|
363
|
+
}),
|
|
364
|
+
execute: async () => ({ stored: true, id: 'mem_eval_1' }),
|
|
365
|
+
}),
|
|
366
|
+
|
|
367
|
+
memory_vote: tool({
|
|
368
|
+
description: 'Vote on memory usefulness',
|
|
369
|
+
inputSchema: z.object({
|
|
370
|
+
memoryId: z.string(),
|
|
371
|
+
vote: z.enum(['up', 'down']),
|
|
372
|
+
}),
|
|
373
|
+
execute: async () => ({ success: true }),
|
|
374
|
+
}),
|
|
375
|
+
|
|
376
|
+
memory_cite: tool({
|
|
377
|
+
description: 'Cite a memory as used',
|
|
378
|
+
inputSchema: z.object({ memoryId: z.string() }),
|
|
379
|
+
execute: async () => ({ cited: true }),
|
|
380
|
+
}),
|
|
381
|
+
|
|
382
|
+
getSubscriptionStatus: tool({
|
|
383
|
+
description: 'Get subscription status',
|
|
384
|
+
inputSchema: z.object({
|
|
385
|
+
customerId: z.string(),
|
|
386
|
+
stripeAccountId: z.string(),
|
|
387
|
+
}),
|
|
388
|
+
execute: async () => ({ subscription: null }),
|
|
389
|
+
}),
|
|
390
|
+
|
|
391
|
+
lookupCharge: tool({
|
|
392
|
+
description: 'Look up specific charge',
|
|
393
|
+
inputSchema: z.object({ chargeId: z.string() }),
|
|
394
|
+
execute: async ({ chargeId }) => ({
|
|
395
|
+
charge: {
|
|
396
|
+
id: chargeId,
|
|
397
|
+
amount: 24900,
|
|
398
|
+
status: 'succeeded',
|
|
399
|
+
refunded: false,
|
|
400
|
+
},
|
|
401
|
+
}),
|
|
402
|
+
}),
|
|
403
|
+
|
|
404
|
+
verifyRefund: tool({
|
|
405
|
+
description: 'Verify refund status',
|
|
406
|
+
inputSchema: z.object({ refundId: z.string() }),
|
|
407
|
+
execute: async ({ refundId }) => ({
|
|
408
|
+
refund: {
|
|
409
|
+
id: refundId,
|
|
410
|
+
status: 'succeeded',
|
|
411
|
+
amount: 24900,
|
|
412
|
+
},
|
|
413
|
+
}),
|
|
414
|
+
}),
|
|
415
|
+
}
|
|
416
|
+
}
|