@skillrecordings/cli 0.1.0 โ 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skill.mjs +27 -0
- package/dist/chunk-2NCCVTEE.js +22342 -0
- package/dist/chunk-2NCCVTEE.js.map +1 -0
- package/dist/chunk-3E3GYSZR.js +7071 -0
- package/dist/chunk-3E3GYSZR.js.map +1 -0
- package/dist/chunk-F4EM72IH.js +86 -0
- package/dist/chunk-F4EM72IH.js.map +1 -0
- package/dist/chunk-FGP7KUQW.js +432 -0
- package/dist/chunk-FGP7KUQW.js.map +1 -0
- package/dist/chunk-H3D6VCME.js +55 -0
- package/dist/chunk-H3D6VCME.js.map +1 -0
- package/dist/chunk-HK3PEWFD.js +208 -0
- package/dist/chunk-HK3PEWFD.js.map +1 -0
- package/dist/chunk-KEV3QKXP.js +4495 -0
- package/dist/chunk-KEV3QKXP.js.map +1 -0
- package/dist/chunk-MG37YDAK.js +882 -0
- package/dist/chunk-MG37YDAK.js.map +1 -0
- package/dist/chunk-MLNDSBZ4.js +482 -0
- package/dist/chunk-MLNDSBZ4.js.map +1 -0
- package/dist/chunk-N2WIV2JV.js +22 -0
- package/dist/chunk-N2WIV2JV.js.map +1 -0
- package/dist/chunk-PWWRCN5W.js +2067 -0
- package/dist/chunk-PWWRCN5W.js.map +1 -0
- package/dist/chunk-SKHBM3XP.js +7746 -0
- package/dist/chunk-SKHBM3XP.js.map +1 -0
- package/dist/chunk-WFANXVQG.js +64 -0
- package/dist/chunk-WFANXVQG.js.map +1 -0
- package/dist/chunk-WYKL32C3.js +275 -0
- package/dist/chunk-WYKL32C3.js.map +1 -0
- package/dist/chunk-ZNF7XD2S.js +134 -0
- package/dist/chunk-ZNF7XD2S.js.map +1 -0
- package/dist/config-AUAIYDSI.js +20 -0
- package/dist/config-AUAIYDSI.js.map +1 -0
- package/dist/fileFromPath-XN7LXIBI.js +134 -0
- package/dist/fileFromPath-XN7LXIBI.js.map +1 -0
- package/dist/getMachineId-bsd-KW2E7VK3.js +42 -0
- package/dist/getMachineId-bsd-KW2E7VK3.js.map +1 -0
- package/dist/getMachineId-darwin-ROXJUJX5.js +42 -0
- package/dist/getMachineId-darwin-ROXJUJX5.js.map +1 -0
- package/dist/getMachineId-linux-KVZEHQSU.js +34 -0
- package/dist/getMachineId-linux-KVZEHQSU.js.map +1 -0
- package/dist/getMachineId-unsupported-PPRILPPA.js +25 -0
- package/dist/getMachineId-unsupported-PPRILPPA.js.map +1 -0
- package/dist/getMachineId-win-IIF36LEJ.js +44 -0
- package/dist/getMachineId-win-IIF36LEJ.js.map +1 -0
- package/dist/index.js +112703 -0
- package/dist/index.js.map +1 -0
- package/dist/lib-R6DEEJCP.js +7623 -0
- package/dist/lib-R6DEEJCP.js.map +1 -0
- package/dist/pipeline-IAVVAKTU.js +120 -0
- package/dist/pipeline-IAVVAKTU.js.map +1 -0
- package/dist/query-NTP5NVXN.js +25 -0
- package/dist/query-NTP5NVXN.js.map +1 -0
- package/dist/routing-BAEPFB7V.js +390 -0
- package/dist/routing-BAEPFB7V.js.map +1 -0
- package/dist/stripe-lookup-charge-EPRUMZDL.js +56 -0
- package/dist/stripe-lookup-charge-EPRUMZDL.js.map +1 -0
- package/dist/stripe-payment-history-SJPKA63N.js +67 -0
- package/dist/stripe-payment-history-SJPKA63N.js.map +1 -0
- package/dist/stripe-subscription-status-L4Z65GB3.js +58 -0
- package/dist/stripe-subscription-status-L4Z65GB3.js.map +1 -0
- package/dist/stripe-verify-refund-FZDKCIUQ.js +54 -0
- package/dist/stripe-verify-refund-FZDKCIUQ.js.map +1 -0
- package/dist/support-memory-WSG7SDKG.js +10 -0
- package/dist/support-memory-WSG7SDKG.js.map +1 -0
- package/package.json +10 -7
- package/.env.encrypted +0 -0
- package/CHANGELOG.md +0 -35
- package/data/tt-archive-dataset.json +0 -1
- package/data/validate-test-dataset.json +0 -97
- package/docs/CLI-AUTH.md +0 -504
- package/preload.ts +0 -18
- package/src/__tests__/init.test.ts +0 -74
- package/src/alignment-test.ts +0 -64
- package/src/check-apps.ts +0 -16
- package/src/commands/auth/decrypt.ts +0 -123
- package/src/commands/auth/encrypt.ts +0 -81
- package/src/commands/auth/index.ts +0 -50
- package/src/commands/auth/keygen.ts +0 -41
- package/src/commands/auth/status.ts +0 -164
- package/src/commands/axiom/forensic.ts +0 -868
- package/src/commands/axiom/index.ts +0 -697
- package/src/commands/build-dataset.ts +0 -311
- package/src/commands/db-status.ts +0 -47
- package/src/commands/deploys.ts +0 -219
- package/src/commands/eval-local/compare.ts +0 -171
- package/src/commands/eval-local/health.ts +0 -212
- package/src/commands/eval-local/index.ts +0 -76
- package/src/commands/eval-local/real-tools.ts +0 -416
- package/src/commands/eval-local/run.ts +0 -1168
- package/src/commands/eval-local/score-production.ts +0 -256
- package/src/commands/eval-local/seed.ts +0 -276
- package/src/commands/eval-pipeline/index.ts +0 -53
- package/src/commands/eval-pipeline/real-tools.ts +0 -492
- package/src/commands/eval-pipeline/run.ts +0 -1316
- package/src/commands/eval-pipeline/seed.ts +0 -395
- package/src/commands/eval-prompt.ts +0 -496
- package/src/commands/eval.test.ts +0 -253
- package/src/commands/eval.ts +0 -108
- package/src/commands/faq-classify.ts +0 -460
- package/src/commands/faq-cluster.ts +0 -135
- package/src/commands/faq-extract.ts +0 -249
- package/src/commands/faq-mine.ts +0 -432
- package/src/commands/faq-review.ts +0 -426
- package/src/commands/front/index.ts +0 -351
- package/src/commands/front/pull-conversations.ts +0 -275
- package/src/commands/front/tags.ts +0 -825
- package/src/commands/front-cache.ts +0 -1277
- package/src/commands/front-stats.ts +0 -75
- package/src/commands/health.test.ts +0 -82
- package/src/commands/health.ts +0 -362
- package/src/commands/init.test.ts +0 -89
- package/src/commands/init.ts +0 -106
- package/src/commands/inngest/client.ts +0 -294
- package/src/commands/inngest/events.ts +0 -296
- package/src/commands/inngest/investigate.ts +0 -382
- package/src/commands/inngest/runs.ts +0 -149
- package/src/commands/inngest/signal.ts +0 -143
- package/src/commands/kb-sync.ts +0 -498
- package/src/commands/memory/find.ts +0 -135
- package/src/commands/memory/get.ts +0 -87
- package/src/commands/memory/index.ts +0 -97
- package/src/commands/memory/stats.ts +0 -163
- package/src/commands/memory/store.ts +0 -49
- package/src/commands/memory/vote.ts +0 -159
- package/src/commands/pipeline.ts +0 -127
- package/src/commands/responses.ts +0 -856
- package/src/commands/tools.ts +0 -293
- package/src/commands/wizard.ts +0 -319
- package/src/index.ts +0 -172
- package/src/lib/crypto.ts +0 -56
- package/src/lib/env-loader.ts +0 -206
- package/src/lib/onepassword.ts +0 -137
- package/src/test-agent-local.ts +0 -115
- package/tsconfig.json +0 -11
- package/vitest.config.ts +0 -10
|
@@ -1,460 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* FAQ Topic Classification CLI Command
|
|
3
|
-
*
|
|
4
|
-
* Classifies conversations from parquet into taxonomy topics using Claude Haiku.
|
|
5
|
-
* Resumable - appends to JSONL and skips already-classified conversations.
|
|
6
|
-
*
|
|
7
|
-
* Usage:
|
|
8
|
-
* bun src/index.ts faq-classify
|
|
9
|
-
* bun src/index.ts faq-classify --batch-size 50
|
|
10
|
-
* bun src/index.ts faq-classify --dry-run
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'fs'
|
|
14
|
-
import { dirname, join, resolve } from 'path'
|
|
15
|
-
import { generateObject } from 'ai'
|
|
16
|
-
import type { Command } from 'commander'
|
|
17
|
-
import { z } from 'zod'
|
|
18
|
-
|
|
19
|
-
/** Default paths relative to project root */
|
|
20
|
-
const PROJECT_ROOT = resolve(__dirname, '../../../..')
|
|
21
|
-
const DEFAULT_PARQUET_PATH = join(
|
|
22
|
-
PROJECT_ROOT,
|
|
23
|
-
'artifacts/phase-0/embeddings/v2/conversations.parquet'
|
|
24
|
-
)
|
|
25
|
-
const DEFAULT_TAXONOMY_PATH = join(
|
|
26
|
-
PROJECT_ROOT,
|
|
27
|
-
'artifacts/phase-1/llm-topics/taxonomy.json'
|
|
28
|
-
)
|
|
29
|
-
const DEFAULT_OUTPUT_PATH = join(
|
|
30
|
-
PROJECT_ROOT,
|
|
31
|
-
'artifacts/phase-1/llm-topics/classifications.jsonl'
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
/** Rate limiting configuration */
|
|
35
|
-
const DEFAULT_BATCH_SIZE = 100
|
|
36
|
-
const CONCURRENT_LIMIT = 10
|
|
37
|
-
const DELAY_BETWEEN_BATCHES_MS = 100
|
|
38
|
-
|
|
39
|
-
/** Model for classification */
|
|
40
|
-
const MODEL = 'anthropic/claude-haiku-4-5'
|
|
41
|
-
|
|
42
|
-
// ============================================================================
|
|
43
|
-
// Types
|
|
44
|
-
// ============================================================================
|
|
45
|
-
|
|
46
|
-
interface Topic {
|
|
47
|
-
id: string
|
|
48
|
-
name: string
|
|
49
|
-
description: string
|
|
50
|
-
examples: string[]
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
interface Taxonomy {
|
|
54
|
-
version: string
|
|
55
|
-
generatedAt: string
|
|
56
|
-
model: string
|
|
57
|
-
topics: Topic[]
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
interface Conversation {
|
|
61
|
-
conversation_id: string
|
|
62
|
-
first_message: string
|
|
63
|
-
inbox_id?: string
|
|
64
|
-
tags?: string[]
|
|
65
|
-
token_count?: number
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
interface Classification {
|
|
69
|
-
conversationId: string
|
|
70
|
-
topicId: string
|
|
71
|
-
confidence: number
|
|
72
|
-
timestamp: string
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// ============================================================================
|
|
76
|
-
// DuckDB Loader (via CLI for reliability)
|
|
77
|
-
// ============================================================================
|
|
78
|
-
|
|
79
|
-
async function loadConversationsFromParquet(
|
|
80
|
-
parquetPath: string
|
|
81
|
-
): Promise<Conversation[]> {
|
|
82
|
-
const { execSync } = await import('child_process')
|
|
83
|
-
|
|
84
|
-
const query = `
|
|
85
|
-
SELECT
|
|
86
|
-
conversation_id,
|
|
87
|
-
first_message,
|
|
88
|
-
inbox_id,
|
|
89
|
-
token_count
|
|
90
|
-
FROM read_parquet('${parquetPath}')
|
|
91
|
-
WHERE first_message IS NOT NULL
|
|
92
|
-
ORDER BY conversation_id
|
|
93
|
-
`
|
|
94
|
-
|
|
95
|
-
// Use DuckDB CLI with JSON output
|
|
96
|
-
const result = execSync(`duckdb -json -c "${query.replace(/"/g, '\\"')}"`, {
|
|
97
|
-
encoding: 'utf-8',
|
|
98
|
-
maxBuffer: 100 * 1024 * 1024, // 100MB buffer for large datasets
|
|
99
|
-
})
|
|
100
|
-
|
|
101
|
-
const rows = JSON.parse(result) as Conversation[]
|
|
102
|
-
return rows
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// ============================================================================
|
|
106
|
-
// JSONL Resume Support
|
|
107
|
-
// ============================================================================
|
|
108
|
-
|
|
109
|
-
function loadExistingClassifications(outputPath: string): Set<string> {
|
|
110
|
-
const classifiedIds = new Set<string>()
|
|
111
|
-
|
|
112
|
-
if (!existsSync(outputPath)) {
|
|
113
|
-
return classifiedIds
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
const content = readFileSync(outputPath, 'utf-8')
|
|
117
|
-
const lines = content.split('\n').filter((line) => line.trim())
|
|
118
|
-
|
|
119
|
-
for (const line of lines) {
|
|
120
|
-
try {
|
|
121
|
-
const classification = JSON.parse(line) as Classification
|
|
122
|
-
classifiedIds.add(classification.conversationId)
|
|
123
|
-
} catch {
|
|
124
|
-
// Skip malformed lines
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
return classifiedIds
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
function appendClassification(
|
|
132
|
-
outputPath: string,
|
|
133
|
-
classification: Classification
|
|
134
|
-
): void {
|
|
135
|
-
appendFileSync(outputPath, JSON.stringify(classification) + '\n')
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
// ============================================================================
|
|
139
|
-
// LLM Classification
|
|
140
|
-
// ============================================================================
|
|
141
|
-
|
|
142
|
-
const classifySchema = z.object({
|
|
143
|
-
topicId: z.string(),
|
|
144
|
-
confidence: z.number().min(0).max(1),
|
|
145
|
-
})
|
|
146
|
-
|
|
147
|
-
function buildClassifyPrompt(taxonomy: Taxonomy): string {
|
|
148
|
-
const topicList = taxonomy.topics
|
|
149
|
-
.map((t) => {
|
|
150
|
-
const exampleText = t.examples.slice(0, 2).join('; ')
|
|
151
|
-
return `- ${t.id}: ${t.description} (e.g., "${exampleText}")`
|
|
152
|
-
})
|
|
153
|
-
.join('\n')
|
|
154
|
-
|
|
155
|
-
return `You are a support ticket classifier. Classify the customer's message into exactly ONE of these topics:
|
|
156
|
-
|
|
157
|
-
${topicList}
|
|
158
|
-
|
|
159
|
-
Rules:
|
|
160
|
-
- Choose the MOST specific matching topic
|
|
161
|
-
- If the message fits multiple topics, pick the primary intent
|
|
162
|
-
- Use "unknown" only if genuinely ambiguous (set topicId to "unknown")
|
|
163
|
-
- Confidence should be 0.5-1.0 based on how clear the match is
|
|
164
|
-
|
|
165
|
-
Output the topic ID and confidence.`
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
async function classifyConversation(
|
|
169
|
-
conversation: Conversation,
|
|
170
|
-
systemPrompt: string,
|
|
171
|
-
validTopicIds: Set<string>
|
|
172
|
-
): Promise<Classification> {
|
|
173
|
-
const { object } = await generateObject({
|
|
174
|
-
model: MODEL,
|
|
175
|
-
schema: classifySchema,
|
|
176
|
-
system: systemPrompt,
|
|
177
|
-
prompt: conversation.first_message.slice(0, 2000), // Truncate long messages
|
|
178
|
-
})
|
|
179
|
-
|
|
180
|
-
// Validate topic ID exists in taxonomy
|
|
181
|
-
const topicId = validTopicIds.has(object.topicId) ? object.topicId : 'unknown'
|
|
182
|
-
|
|
183
|
-
return {
|
|
184
|
-
conversationId: conversation.conversation_id,
|
|
185
|
-
topicId,
|
|
186
|
-
confidence: object.confidence,
|
|
187
|
-
timestamp: new Date().toISOString(),
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
// ============================================================================
|
|
192
|
-
// Batch Processing with Rate Limiting
|
|
193
|
-
// ============================================================================
|
|
194
|
-
|
|
195
|
-
async function processBatch(
|
|
196
|
-
conversations: Conversation[],
|
|
197
|
-
systemPrompt: string,
|
|
198
|
-
validTopicIds: Set<string>,
|
|
199
|
-
outputPath: string,
|
|
200
|
-
onProgress: (completed: number) => void
|
|
201
|
-
): Promise<{ success: number; failed: number }> {
|
|
202
|
-
let success = 0
|
|
203
|
-
let failed = 0
|
|
204
|
-
|
|
205
|
-
// Process in chunks of CONCURRENT_LIMIT
|
|
206
|
-
for (let i = 0; i < conversations.length; i += CONCURRENT_LIMIT) {
|
|
207
|
-
const chunk = conversations.slice(i, i + CONCURRENT_LIMIT)
|
|
208
|
-
|
|
209
|
-
const results = await Promise.allSettled(
|
|
210
|
-
chunk.map((conv) =>
|
|
211
|
-
classifyConversation(conv, systemPrompt, validTopicIds)
|
|
212
|
-
)
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
for (let j = 0; j < results.length; j++) {
|
|
216
|
-
const result = results[j]!
|
|
217
|
-
const conv = chunk[j]!
|
|
218
|
-
|
|
219
|
-
if (result.status === 'fulfilled') {
|
|
220
|
-
appendClassification(outputPath, result.value)
|
|
221
|
-
success++
|
|
222
|
-
} else {
|
|
223
|
-
// Log failed classification as error
|
|
224
|
-
const fallback: Classification = {
|
|
225
|
-
conversationId: conv.conversation_id,
|
|
226
|
-
topicId: 'error',
|
|
227
|
-
confidence: 0,
|
|
228
|
-
timestamp: new Date().toISOString(),
|
|
229
|
-
}
|
|
230
|
-
appendClassification(outputPath, fallback)
|
|
231
|
-
failed++
|
|
232
|
-
console.error(
|
|
233
|
-
`\n โ Failed: ${conv.conversation_id}: ${result.reason}`
|
|
234
|
-
)
|
|
235
|
-
}
|
|
236
|
-
onProgress(success + failed)
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
// Rate limit between chunks
|
|
240
|
-
if (i + CONCURRENT_LIMIT < conversations.length) {
|
|
241
|
-
await new Promise((r) => setTimeout(r, DELAY_BETWEEN_BATCHES_MS))
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
return { success, failed }
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
// ============================================================================
|
|
249
|
-
// Progress Display
|
|
250
|
-
// ============================================================================
|
|
251
|
-
|
|
252
|
-
function formatETA(remainingMs: number): string {
|
|
253
|
-
if (remainingMs < 0 || !isFinite(remainingMs)) return '--:--'
|
|
254
|
-
const seconds = Math.floor(remainingMs / 1000)
|
|
255
|
-
const minutes = Math.floor(seconds / 60)
|
|
256
|
-
const hours = Math.floor(minutes / 60)
|
|
257
|
-
if (hours > 0) {
|
|
258
|
-
return `${hours}h ${minutes % 60}m`
|
|
259
|
-
}
|
|
260
|
-
return `${minutes}m ${seconds % 60}s`
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
function createProgressBar(total: number): {
|
|
264
|
-
update: (completed: number) => void
|
|
265
|
-
done: () => void
|
|
266
|
-
} {
|
|
267
|
-
const startTime = Date.now()
|
|
268
|
-
let lastCompleted = 0
|
|
269
|
-
|
|
270
|
-
return {
|
|
271
|
-
update(completed: number) {
|
|
272
|
-
lastCompleted = completed
|
|
273
|
-
const percent = Math.round((completed / total) * 100)
|
|
274
|
-
const elapsed = Date.now() - startTime
|
|
275
|
-
const rate = completed / (elapsed / 1000)
|
|
276
|
-
const remaining = (total - completed) / rate
|
|
277
|
-
const eta = formatETA(remaining * 1000)
|
|
278
|
-
|
|
279
|
-
const barWidth = 30
|
|
280
|
-
const filledWidth = Math.round((completed / total) * barWidth)
|
|
281
|
-
const bar = 'โ'.repeat(filledWidth) + 'โ'.repeat(barWidth - filledWidth)
|
|
282
|
-
|
|
283
|
-
process.stdout.write(
|
|
284
|
-
`\r [${bar}] ${completed}/${total} (${percent}%) | ${rate.toFixed(1)}/s | ETA: ${eta} `
|
|
285
|
-
)
|
|
286
|
-
},
|
|
287
|
-
done() {
|
|
288
|
-
const elapsed = Date.now() - startTime
|
|
289
|
-
const rate = lastCompleted / (elapsed / 1000)
|
|
290
|
-
console.log(
|
|
291
|
-
`\n โ
Completed ${lastCompleted} classifications in ${formatETA(elapsed)} (${rate.toFixed(1)}/s)`
|
|
292
|
-
)
|
|
293
|
-
},
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
// ============================================================================
|
|
298
|
-
// Main Command Handler
|
|
299
|
-
// ============================================================================
|
|
300
|
-
|
|
301
|
-
async function faqClassify(options: {
|
|
302
|
-
parquetPath?: string
|
|
303
|
-
taxonomyPath?: string
|
|
304
|
-
outputPath?: string
|
|
305
|
-
batchSize?: number
|
|
306
|
-
dryRun?: boolean
|
|
307
|
-
}): Promise<void> {
|
|
308
|
-
const parquetPath = options.parquetPath ?? DEFAULT_PARQUET_PATH
|
|
309
|
-
const taxonomyPath = options.taxonomyPath ?? DEFAULT_TAXONOMY_PATH
|
|
310
|
-
const outputPath = options.outputPath ?? DEFAULT_OUTPUT_PATH
|
|
311
|
-
const batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE
|
|
312
|
-
|
|
313
|
-
console.log('๐ท๏ธ FAQ Topic Classification Pipeline')
|
|
314
|
-
console.log('='.repeat(60))
|
|
315
|
-
console.log(` Parquet source: ${parquetPath}`)
|
|
316
|
-
console.log(` Taxonomy: ${taxonomyPath}`)
|
|
317
|
-
console.log(` Output: ${outputPath}`)
|
|
318
|
-
console.log(` Batch size: ${batchSize}`)
|
|
319
|
-
console.log(` Concurrency: ${CONCURRENT_LIMIT}`)
|
|
320
|
-
console.log(` Dry run: ${options.dryRun ?? false}`)
|
|
321
|
-
console.log('')
|
|
322
|
-
|
|
323
|
-
// Validate inputs exist
|
|
324
|
-
if (!existsSync(parquetPath)) {
|
|
325
|
-
console.error(`โ Parquet file not found: ${parquetPath}`)
|
|
326
|
-
process.exit(1)
|
|
327
|
-
}
|
|
328
|
-
if (!existsSync(taxonomyPath)) {
|
|
329
|
-
console.error(`โ Taxonomy file not found: ${taxonomyPath}`)
|
|
330
|
-
process.exit(1)
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
// Ensure output directory exists
|
|
334
|
-
const outputDir = dirname(outputPath)
|
|
335
|
-
if (!existsSync(outputDir)) {
|
|
336
|
-
mkdirSync(outputDir, { recursive: true })
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
// Load taxonomy
|
|
340
|
-
console.log('๐ Loading taxonomy...')
|
|
341
|
-
const taxonomy: Taxonomy = JSON.parse(readFileSync(taxonomyPath, 'utf-8'))
|
|
342
|
-
const validTopicIds = new Set(taxonomy.topics.map((t) => t.id))
|
|
343
|
-
validTopicIds.add('unknown')
|
|
344
|
-
console.log(` Found ${taxonomy.topics.length} topics`)
|
|
345
|
-
|
|
346
|
-
// Load conversations from parquet
|
|
347
|
-
console.log('\n๐ฆ Loading conversations from parquet...')
|
|
348
|
-
const allConversations = await loadConversationsFromParquet(parquetPath)
|
|
349
|
-
console.log(` Found ${allConversations.length} conversations`)
|
|
350
|
-
|
|
351
|
-
// Load existing classifications for resume
|
|
352
|
-
console.log('\n๐ Checking for existing classifications...')
|
|
353
|
-
const classifiedIds = loadExistingClassifications(outputPath)
|
|
354
|
-
console.log(` Already classified: ${classifiedIds.size}`)
|
|
355
|
-
|
|
356
|
-
// Filter to unclassified conversations
|
|
357
|
-
const remaining = allConversations.filter(
|
|
358
|
-
(c) => !classifiedIds.has(c.conversation_id)
|
|
359
|
-
)
|
|
360
|
-
console.log(` Remaining to classify: ${remaining.length}`)
|
|
361
|
-
|
|
362
|
-
if (remaining.length === 0) {
|
|
363
|
-
console.log('\nโ
All conversations already classified!')
|
|
364
|
-
return
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
if (options.dryRun) {
|
|
368
|
-
console.log('\n๐งช Dry run - showing sample classifications:')
|
|
369
|
-
const systemPrompt = buildClassifyPrompt(taxonomy)
|
|
370
|
-
const sample = remaining.slice(0, 3)
|
|
371
|
-
for (const conv of sample) {
|
|
372
|
-
try {
|
|
373
|
-
const result = await classifyConversation(
|
|
374
|
-
conv,
|
|
375
|
-
systemPrompt,
|
|
376
|
-
validTopicIds
|
|
377
|
-
)
|
|
378
|
-
console.log(`\n ${conv.conversation_id}:`)
|
|
379
|
-
console.log(
|
|
380
|
-
` Topic: ${result.topicId} (${(result.confidence * 100).toFixed(0)}%)`
|
|
381
|
-
)
|
|
382
|
-
console.log(` Message: "${conv.first_message.slice(0, 100)}..."`)
|
|
383
|
-
} catch (error) {
|
|
384
|
-
console.log(` โ ${conv.conversation_id}: ${error}`)
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
console.log('\n๐งช Dry run complete - no classifications saved')
|
|
388
|
-
return
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// Build prompt once
|
|
392
|
-
const systemPrompt = buildClassifyPrompt(taxonomy)
|
|
393
|
-
|
|
394
|
-
// Process in batches
|
|
395
|
-
console.log('\n๐ Starting classification...')
|
|
396
|
-
const progress = createProgressBar(remaining.length)
|
|
397
|
-
let totalSuccess = 0
|
|
398
|
-
let totalFailed = 0
|
|
399
|
-
|
|
400
|
-
for (let i = 0; i < remaining.length; i += batchSize) {
|
|
401
|
-
const batch = remaining.slice(i, i + batchSize)
|
|
402
|
-
const { success, failed } = await processBatch(
|
|
403
|
-
batch,
|
|
404
|
-
systemPrompt,
|
|
405
|
-
validTopicIds,
|
|
406
|
-
outputPath,
|
|
407
|
-
(completed) => progress.update(i + completed)
|
|
408
|
-
)
|
|
409
|
-
totalSuccess += success
|
|
410
|
-
totalFailed += failed
|
|
411
|
-
|
|
412
|
-
// Update progress after batch
|
|
413
|
-
progress.update(i + batch.length)
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
progress.done()
|
|
417
|
-
|
|
418
|
-
// Summary
|
|
419
|
-
console.log('\n๐ Classification Summary:')
|
|
420
|
-
console.log(` โ
Successful: ${totalSuccess}`)
|
|
421
|
-
console.log(` โ Failed: ${totalFailed}`)
|
|
422
|
-
console.log(` ๐ Output: ${outputPath}`)
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
// ============================================================================
|
|
426
|
-
// Command Registration
|
|
427
|
-
// ============================================================================
|
|
428
|
-
|
|
429
|
-
export function registerFaqClassifyCommands(program: Command): void {
|
|
430
|
-
program
|
|
431
|
-
.command('faq-classify')
|
|
432
|
-
.description('Classify conversations into FAQ topics using LLM')
|
|
433
|
-
.option(
|
|
434
|
-
'--parquet-path <path>',
|
|
435
|
-
'Path to conversations parquet file',
|
|
436
|
-
DEFAULT_PARQUET_PATH
|
|
437
|
-
)
|
|
438
|
-
.option(
|
|
439
|
-
'--taxonomy-path <path>',
|
|
440
|
-
'Path to taxonomy JSON file',
|
|
441
|
-
DEFAULT_TAXONOMY_PATH
|
|
442
|
-
)
|
|
443
|
-
.option(
|
|
444
|
-
'--output-path <path>',
|
|
445
|
-
'Path to output JSONL file',
|
|
446
|
-
DEFAULT_OUTPUT_PATH
|
|
447
|
-
)
|
|
448
|
-
.option(
|
|
449
|
-
'--batch-size <number>',
|
|
450
|
-
'Conversations per batch',
|
|
451
|
-
String(DEFAULT_BATCH_SIZE)
|
|
452
|
-
)
|
|
453
|
-
.option('-d, --dry-run', 'Show sample classifications without saving')
|
|
454
|
-
.action((opts) => {
|
|
455
|
-
faqClassify({
|
|
456
|
-
...opts,
|
|
457
|
-
batchSize: opts.batchSize ? parseInt(opts.batchSize, 10) : undefined,
|
|
458
|
-
})
|
|
459
|
-
})
|
|
460
|
-
}
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* FAQ Clustering CLI Command
|
|
3
|
-
*
|
|
4
|
-
* Generates production clustering from Phase 0 artifacts.
|
|
5
|
-
*
|
|
6
|
-
* Usage:
|
|
7
|
-
* bun src/index.ts faq-cluster
|
|
8
|
-
* bun src/index.ts faq-cluster --version v2
|
|
9
|
-
* bun src/index.ts faq-cluster --dry-run
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import { existsSync } from 'fs'
|
|
13
|
-
import { join, resolve } from 'path'
|
|
14
|
-
import {
|
|
15
|
-
displayClusteringSummary,
|
|
16
|
-
generateProductionClustering,
|
|
17
|
-
writeProductionArtifacts,
|
|
18
|
-
} from '@skillrecordings/core/faq/production-clusterer'
|
|
19
|
-
import type { Command } from 'commander'
|
|
20
|
-
|
|
21
|
-
/** Default paths relative to project root */
|
|
22
|
-
const PROJECT_ROOT = resolve(__dirname, '../../../..')
|
|
23
|
-
const DEFAULT_PHASE0_PATH = join(PROJECT_ROOT, 'artifacts/phase-0')
|
|
24
|
-
const DEFAULT_OUTPUT_PATH = join(PROJECT_ROOT, 'artifacts/phase-1/clustering')
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Validate paths exist
|
|
28
|
-
*/
|
|
29
|
-
function validatePaths(phase0Path: string): void {
|
|
30
|
-
const assignmentsPath = join(phase0Path, 'clusters/v1/assignments.json')
|
|
31
|
-
const labelsPath = join(phase0Path, 'clusters/v1/labels.json')
|
|
32
|
-
const metricsPath = join(phase0Path, 'clusters/v1/metrics.json')
|
|
33
|
-
|
|
34
|
-
if (!existsSync(assignmentsPath)) {
|
|
35
|
-
throw new Error(
|
|
36
|
-
`Phase 0 assignments not found at ${assignmentsPath}\n` +
|
|
37
|
-
'Run Phase 0 clustering first or specify correct --phase0-path'
|
|
38
|
-
)
|
|
39
|
-
}
|
|
40
|
-
if (!existsSync(labelsPath)) {
|
|
41
|
-
throw new Error(`Phase 0 labels not found at ${labelsPath}`)
|
|
42
|
-
}
|
|
43
|
-
if (!existsSync(metricsPath)) {
|
|
44
|
-
throw new Error(`Phase 0 metrics not found at ${metricsPath}`)
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
/**
|
|
49
|
-
* Main command handler
|
|
50
|
-
*/
|
|
51
|
-
async function faqCluster(options: {
|
|
52
|
-
phase0Path?: string
|
|
53
|
-
outputPath?: string
|
|
54
|
-
version?: string
|
|
55
|
-
dryRun?: boolean
|
|
56
|
-
json?: boolean
|
|
57
|
-
}): Promise<void> {
|
|
58
|
-
const phase0Path = options.phase0Path ?? DEFAULT_PHASE0_PATH
|
|
59
|
-
const outputPath = options.outputPath ?? DEFAULT_OUTPUT_PATH
|
|
60
|
-
const version = options.version ?? 'v1'
|
|
61
|
-
|
|
62
|
-
console.log('๐ฌ Production Clustering Pipeline')
|
|
63
|
-
console.log('='.repeat(60))
|
|
64
|
-
console.log(` Phase 0 artifacts: ${phase0Path}`)
|
|
65
|
-
console.log(` Output path: ${outputPath}`)
|
|
66
|
-
console.log(` Version: ${version}`)
|
|
67
|
-
console.log(` Dry run: ${options.dryRun ?? false}`)
|
|
68
|
-
console.log('')
|
|
69
|
-
|
|
70
|
-
try {
|
|
71
|
-
// Validate Phase 0 artifacts exist
|
|
72
|
-
validatePaths(phase0Path)
|
|
73
|
-
console.log('โ
Phase 0 artifacts found')
|
|
74
|
-
|
|
75
|
-
// Generate production clustering
|
|
76
|
-
console.log('\n๐ Generating production clustering...')
|
|
77
|
-
const result = await generateProductionClustering({
|
|
78
|
-
phase0Path,
|
|
79
|
-
outputPath,
|
|
80
|
-
version,
|
|
81
|
-
})
|
|
82
|
-
|
|
83
|
-
// Display summary
|
|
84
|
-
displayClusteringSummary(result)
|
|
85
|
-
|
|
86
|
-
// Write artifacts (unless dry run)
|
|
87
|
-
if (!options.dryRun) {
|
|
88
|
-
console.log('\n๐ Writing artifacts...')
|
|
89
|
-
writeProductionArtifacts(result, outputPath)
|
|
90
|
-
console.log('\nโ
Production clustering complete!')
|
|
91
|
-
console.log(` Artifacts written to: ${join(outputPath, version)}`)
|
|
92
|
-
} else {
|
|
93
|
-
console.log('\n๐งช Dry run - no artifacts written')
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
// JSON output if requested
|
|
97
|
-
if (options.json) {
|
|
98
|
-
console.log('\n๐ JSON Output:')
|
|
99
|
-
console.log(JSON.stringify(result.stats, null, 2))
|
|
100
|
-
}
|
|
101
|
-
} catch (error) {
|
|
102
|
-
console.error(
|
|
103
|
-
'\nโ Error:',
|
|
104
|
-
error instanceof Error ? error.message : String(error)
|
|
105
|
-
)
|
|
106
|
-
process.exit(1)
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Register FAQ clustering commands with Commander
|
|
112
|
-
*/
|
|
113
|
-
export function registerFaqClusterCommands(program: Command): void {
|
|
114
|
-
program
|
|
115
|
-
.command('faq-cluster')
|
|
116
|
-
.description('Generate production clustering from Phase 0 artifacts')
|
|
117
|
-
.option(
|
|
118
|
-
'--phase0-path <path>',
|
|
119
|
-
'Path to Phase 0 artifacts',
|
|
120
|
-
DEFAULT_PHASE0_PATH
|
|
121
|
-
)
|
|
122
|
-
.option(
|
|
123
|
-
'--output-path <path>',
|
|
124
|
-
'Path to write production artifacts',
|
|
125
|
-
DEFAULT_OUTPUT_PATH
|
|
126
|
-
)
|
|
127
|
-
.option(
|
|
128
|
-
'--version <version>',
|
|
129
|
-
'Version tag for output (e.g., v1, v2)',
|
|
130
|
-
'v1'
|
|
131
|
-
)
|
|
132
|
-
.option('-d, --dry-run', 'Show summary without writing artifacts')
|
|
133
|
-
.option('--json', 'Output stats as JSON')
|
|
134
|
-
.action(faqCluster)
|
|
135
|
-
}
|