@skillrecordings/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/.env.encrypted +0 -0
  2. package/CHANGELOG.md +35 -0
  3. package/README.md +214 -0
  4. package/bin/skill.ts +3 -0
  5. package/data/tt-archive-dataset.json +1 -0
  6. package/data/validate-test-dataset.json +97 -0
  7. package/docs/CLI-AUTH.md +504 -0
  8. package/package.json +38 -0
  9. package/preload.ts +18 -0
  10. package/src/__tests__/init.test.ts +74 -0
  11. package/src/alignment-test.ts +64 -0
  12. package/src/check-apps.ts +16 -0
  13. package/src/commands/auth/decrypt.ts +123 -0
  14. package/src/commands/auth/encrypt.ts +81 -0
  15. package/src/commands/auth/index.ts +50 -0
  16. package/src/commands/auth/keygen.ts +41 -0
  17. package/src/commands/auth/status.ts +164 -0
  18. package/src/commands/axiom/forensic.ts +868 -0
  19. package/src/commands/axiom/index.ts +697 -0
  20. package/src/commands/build-dataset.ts +311 -0
  21. package/src/commands/db-status.ts +47 -0
  22. package/src/commands/deploys.ts +219 -0
  23. package/src/commands/eval-local/compare.ts +171 -0
  24. package/src/commands/eval-local/health.ts +212 -0
  25. package/src/commands/eval-local/index.ts +76 -0
  26. package/src/commands/eval-local/real-tools.ts +416 -0
  27. package/src/commands/eval-local/run.ts +1168 -0
  28. package/src/commands/eval-local/score-production.ts +256 -0
  29. package/src/commands/eval-local/seed.ts +276 -0
  30. package/src/commands/eval-pipeline/index.ts +53 -0
  31. package/src/commands/eval-pipeline/real-tools.ts +492 -0
  32. package/src/commands/eval-pipeline/run.ts +1316 -0
  33. package/src/commands/eval-pipeline/seed.ts +395 -0
  34. package/src/commands/eval-prompt.ts +496 -0
  35. package/src/commands/eval.test.ts +253 -0
  36. package/src/commands/eval.ts +108 -0
  37. package/src/commands/faq-classify.ts +460 -0
  38. package/src/commands/faq-cluster.ts +135 -0
  39. package/src/commands/faq-extract.ts +249 -0
  40. package/src/commands/faq-mine.ts +432 -0
  41. package/src/commands/faq-review.ts +426 -0
  42. package/src/commands/front/index.ts +351 -0
  43. package/src/commands/front/pull-conversations.ts +275 -0
  44. package/src/commands/front/tags.ts +825 -0
  45. package/src/commands/front-cache.ts +1277 -0
  46. package/src/commands/front-stats.ts +75 -0
  47. package/src/commands/health.test.ts +82 -0
  48. package/src/commands/health.ts +362 -0
  49. package/src/commands/init.test.ts +89 -0
  50. package/src/commands/init.ts +106 -0
  51. package/src/commands/inngest/client.ts +294 -0
  52. package/src/commands/inngest/events.ts +296 -0
  53. package/src/commands/inngest/investigate.ts +382 -0
  54. package/src/commands/inngest/runs.ts +149 -0
  55. package/src/commands/inngest/signal.ts +143 -0
  56. package/src/commands/kb-sync.ts +498 -0
  57. package/src/commands/memory/find.ts +135 -0
  58. package/src/commands/memory/get.ts +87 -0
  59. package/src/commands/memory/index.ts +97 -0
  60. package/src/commands/memory/stats.ts +163 -0
  61. package/src/commands/memory/store.ts +49 -0
  62. package/src/commands/memory/vote.ts +159 -0
  63. package/src/commands/pipeline.ts +127 -0
  64. package/src/commands/responses.ts +856 -0
  65. package/src/commands/tools.ts +293 -0
  66. package/src/commands/wizard.ts +319 -0
  67. package/src/index.ts +172 -0
  68. package/src/lib/crypto.ts +56 -0
  69. package/src/lib/env-loader.ts +206 -0
  70. package/src/lib/onepassword.ts +137 -0
  71. package/src/test-agent-local.ts +115 -0
  72. package/tsconfig.json +11 -0
  73. package/vitest.config.ts +10 -0
package/.env.encrypted ADDED
Binary file
package/CHANGELOG.md ADDED
@@ -0,0 +1,35 @@
1
+ # @skillrecordings/cli
2
+
3
+ ## 0.1.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 5734b19: Make CLI public for npm publishing
8
+
9
+ ## 0.0.4
10
+
11
+ ### Patch Changes
12
+
13
+ - Updated dependencies [7ae3e99]
14
+ - @skillrecordings/sdk@0.6.0
15
+
16
+ ## 0.0.3
17
+
18
+ ### Patch Changes
19
+
20
+ - Updated dependencies [36efccf]
21
+ - @skillrecordings/sdk@0.5.0
22
+
23
+ ## 0.0.2
24
+
25
+ ### Patch Changes
26
+
27
+ - Updated dependencies [7c5c5d8]
28
+ - @skillrecordings/sdk@0.4.0
29
+
30
+ ## 0.0.1
31
+
32
+ ### Patch Changes
33
+
34
+ - Updated dependencies [2820cb9]
35
+ - @skillrecordings/core@0.0.1
package/README.md ADDED
@@ -0,0 +1,214 @@
1
+ # @skillrecordings/cli
2
+
3
+ CLI for the support platform. Agent-friendly with non-interactive defaults.
4
+
5
+ ## Usage
6
+
7
+ ```bash
8
+ bunx @skillrecordings/cli <command> [options]
9
+
10
+ # or with direct import
11
+ skill <command> [options]
12
+ ```
13
+
14
+ All commands support `--json` for machine-readable output and reliable exit
15
+ codes.
16
+
17
+ ## Commands
18
+
19
+ ### `skill init <name>`
20
+
21
+ Initialize a new app integration with webhook secret.
22
+
23
+ ```bash
24
+ # Interactive (terminal only)
25
+ skill init
26
+
27
+ # Non-interactive (required for agents/scripts)
28
+ skill init my-app
29
+
30
+ # JSON output
31
+ skill init my-app --json
32
+ ```
33
+
34
+ **Options:**
35
+ - `--json` - Output result as JSON (machine-readable)
36
+
37
+ **Exit codes:**
38
+ - `0` - Success
39
+ - `1` - Error (name required in non-interactive mode, etc.)
40
+
41
+ ### `skill health <slug|url>`
42
+
43
+ Test integration endpoint health.
44
+
45
+ ```bash
46
+ # Using database lookup (recommended)
47
+ skill health total-typescript
48
+
49
+ # Direct URL mode
50
+ skill health https://example.com --secret whsec_xxx
51
+
52
+ # List registered apps
53
+ skill health --list
54
+
55
+ # JSON output (for agents)
56
+ skill health total-typescript --json
57
+ ```
58
+
59
+ **Options:**
60
+ - `-s, --secret <secret>` - Webhook secret (required for direct URL mode)
61
+ - `-l, --list` - List all registered apps
62
+ - `--json` - Output result as JSON (machine-readable)
63
+
64
+ **Exit codes:**
65
+ - `0` - Health check passed
66
+ - `1` - Health check failed or error
67
+
68
+ **JSON output structure:**
69
+ ```json
70
+ {
71
+ "success": true,
72
+ "endpoint": "https://...",
73
+ "status": "ok",
74
+ "responseTime": 730,
75
+ "actions": [
76
+ { "name": "lookupUser", "status": "ok" },
77
+ { "name": "getPurchases", "status": "ok" }
78
+ ],
79
+ "summary": { "ok": 4, "notImplemented": 1, "errors": 0 }
80
+ }
81
+ ```
82
+
83
+ ### `skill eval <type> <dataset>`
84
+
85
+ Run evals against a dataset (e.g., routing classifier, canned response
86
+ matcher).
87
+
88
+ ```bash
89
+ # Run routing eval with defaults
90
+ skill eval routing path/to/dataset.json
91
+
92
+ # With strict thresholds
93
+ skill eval routing dataset.json --min-precision 0.95 --min-recall 0.97
94
+
95
+ # JSON output for automation
96
+ skill eval routing dataset.json --json
97
+
98
+ # Custom thresholds
99
+ skill eval routing dataset.json \
100
+ --min-precision 0.92 \
101
+ --min-recall 0.95 \
102
+ --max-fp-rate 0.03 \
103
+ --max-fn-rate 0.02
104
+ ```
105
+
106
+ **Arguments:**
107
+ - `type` - Eval type (e.g., `routing`)
108
+ - `dataset` - Path to JSON dataset file
109
+
110
+ **Options:**
111
+ - `--json` - Output result as JSON (machine-readable)
112
+ - `--min-precision <number>` - Minimum precision threshold (default: 0.92)
113
+ - `--min-recall <number>` - Minimum recall threshold (default: 0.95)
114
+ - `--max-fp-rate <number>` - Maximum false positive rate (default: 0.03)
115
+ - `--max-fn-rate <number>` - Maximum false negative rate (default: 0.02)
116
+
117
+ **Exit codes:**
118
+ - `0` - All metrics passed thresholds
119
+ - `1` - One or more metrics below threshold or error
120
+
121
+ **Output includes:**
122
+ - Precision, recall, false positive/negative rates
123
+ - Latency percentiles (p50, p95, p99)
124
+ - Token usage and estimated cost
125
+ - Category-level breakdown (if applicable)
126
+
127
+ ## App Onboarding Workflow
128
+
129
+ Typical flow for adding a new app integration:
130
+
131
+ ```bash
132
+ # 1. Initialize with app name
133
+ skill init my-app --json
134
+ # Returns: { "success": true, "appName": "my-app",
135
+ # "webhookSecret": "whsec_xxx" }
136
+
137
+ # 2. Register webhook endpoint in your app
138
+ # Save the webhook secret and configure your endpoint to:
139
+ # POST /api/support-webhooks with Authorization: Bearer whsec_xxx
140
+
141
+ # 3. Test health before going live
142
+ skill health my-app
143
+ # Verifies: endpoint reachable, signature verification works,
144
+ # actions implemented
145
+
146
+ # 4. Run evals (optional, for routing/matching logic)
147
+ skill eval routing path/to/labeled-dataset.json --json
148
+
149
+ # 5. Deploy and monitor
150
+ # Check logs via Axiom/Langfuse for inbound messages
151
+ ```
152
+
153
+ All commands work non-interactively and report errors with exit codes
154
+ (0=success, 1=error).
155
+
156
+ ## Agent Usage
157
+
158
+ All commands support `--json` for machine-readable output and
159
+ non-interactive operation:
160
+
161
+ **init command:**
162
+ - Requires `name` argument (non-interactive mode)
163
+ - Returns JSON: `{ "success": true, "appName": "...",
164
+ "webhookSecret": "whsec_..." }`
165
+ - Use `--json` for reliable parsing
166
+
167
+ **health command:**
168
+ - Use `--json` for JSON output (structured for parsing)
169
+ - Use `--list` to discover all registered apps
170
+ - Returns exit code 0 if healthy, 1 if any check fails
171
+
172
+ **eval command:**
173
+ - Requires `type` and `dataset` arguments
174
+ - Accepts custom threshold gates (precision, recall, false
175
+ positive/negative rates)
176
+ - Returns exit code 0 if all metrics pass, 1 otherwise
177
+ - Use `--json` for machine-readable report
178
+
179
+ **Error handling:**
180
+ - All commands output `{ "success": false, "error": "message" }` on
181
+ JSON mode
182
+ - Check exit codes: 0 = success, 1 = error
183
+ - Never interactive in non-TTY environments (CI/CD safe)
184
+
185
+ ## Auth (Encrypted Secrets)
186
+
187
+ Distribute CLI secrets to team members via age encryption + 1Password.
188
+
189
+ ```bash
190
+ # Generate keypair (admin)
191
+ skill auth keygen
192
+
193
+ # Encrypt secrets (admin)
194
+ skill auth encrypt .env.local
195
+
196
+ # Decrypt secrets (team)
197
+ skill auth decrypt .env.local.age --output .env.local
198
+ ```
199
+
200
+ See [docs/CLI-AUTH.md](./docs/CLI-AUTH.md) for complete setup guide.
201
+
202
+ ## Implementation
203
+
204
+ - `packages/cli/src/commands/` - Command implementations
205
+ - `packages/cli/src/index.ts` - CLI entry point
206
+ - Entry point: `#!/usr/bin/env bun` (runs with Bun directly)
207
+
208
+ ## Do / Don't
209
+
210
+ - Do use `--json` flag for automation/agents/scripts
211
+ - Do check exit codes in shell scripts
212
+ - Do pass `name` argument to `init` in CI/CD (non-interactive required)
213
+ - Don't rely on interactive prompts outside terminal
214
+ - Don't parse stdout (use `--json` for structured output)
package/bin/skill.ts ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env bun
2
+ import '../preload'
3
+ import '../src/index'
@@ -0,0 +1 @@
1
+ []
@@ -0,0 +1,97 @@
1
+ [
2
+ {
3
+ "id": "validate_clean_response",
4
+ "name": "Clean response - should pass validation",
5
+ "draft": "Here's a login link for your account: https://example.com/login?token=abc123\n\nYou should have full access to the course now.",
6
+ "assertions": {
7
+ "noFabrication": true,
8
+ "noMetaCommentary": true,
9
+ "noInternalLeak": true,
10
+ "noBannedPhrases": true
11
+ },
12
+ "context": {
13
+ "customer": "recent-purchase"
14
+ }
15
+ },
16
+ {
17
+ "id": "validate_internal_leak",
18
+ "name": "Internal leak - exposes routing info",
19
+ "draft": "No instructor routing configured for this app. You'll want to reach out through another channel.",
20
+ "assertions": {
21
+ "noInternalLeak": true
22
+ },
23
+ "context": {
24
+ "customer": "no-purchase"
25
+ }
26
+ },
27
+ {
28
+ "id": "validate_meta_commentary",
29
+ "name": "Meta commentary - agent explains itself",
30
+ "draft": "This is clearly a vendor outreach email, not a support request. I won't draft a response here since it falls outside customer support scope.",
31
+ "assertions": {
32
+ "noMetaCommentary": true
33
+ },
34
+ "context": {
35
+ "customer": "no-purchase"
36
+ }
37
+ },
38
+ {
39
+ "id": "validate_banned_phrases",
40
+ "name": "Banned phrases - corporate speak",
41
+ "draft": "Great! I'd recommend starting with the basics. Let me know if you have any other questions! I hope this helps!",
42
+ "assertions": {
43
+ "noBannedPhrases": true
44
+ },
45
+ "context": {
46
+ "customer": "recent-purchase"
47
+ }
48
+ },
49
+ {
50
+ "id": "validate_fabrication",
51
+ "name": "Fabrication - made up course content",
52
+ "draft": "Start with the fundamentals section in Module 1. It covers core concepts like typing basics, generics, and utility types.",
53
+ "assertions": {
54
+ "noFabrication": true,
55
+ "mustNotContain": ["fundamentals section", "Module 1"]
56
+ },
57
+ "context": {
58
+ "customer": "no-purchase"
59
+ }
60
+ },
61
+ {
62
+ "id": "validate_em_dash",
63
+ "name": "Em dash - banned character",
64
+ "draft": "TypeScript — like JavaScript — is a powerful language for building web applications.",
65
+ "assertions": {
66
+ "noBannedPhrases": true
67
+ },
68
+ "context": {
69
+ "customer": "recent-purchase"
70
+ }
71
+ },
72
+ {
73
+ "id": "validate_multiple_issues",
74
+ "name": "Multiple issues - several patterns detected",
75
+ "draft": "Great! I understand your frustration. The routing failed so I can't process this. Start with the fundamentals section — it covers everything.",
76
+ "assertions": {
77
+ "noFabrication": true,
78
+ "noInternalLeak": true,
79
+ "noBannedPhrases": true
80
+ },
81
+ "context": {
82
+ "customer": "no-purchase"
83
+ }
84
+ },
85
+ {
86
+ "id": "validate_outside_scope",
87
+ "name": "Outside scope leak",
88
+ "draft": "This falls outside my scope as a support agent. You should contact the instructor directly.",
89
+ "assertions": {
90
+ "noInternalLeak": true,
91
+ "mustNotContain": ["outside", "scope"]
92
+ },
93
+ "context": {
94
+ "customer": "no-purchase"
95
+ }
96
+ }
97
+ ]