@masslessai/push-todo 4.5.0 → 4.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -433,6 +433,12 @@ The `push-todo` CLI supports these commands:
433
433
  | `push-todo --status` | Show connection status |
434
434
  | `push-todo --mark-completed <uuid>` | Mark task as completed |
435
435
  | `push-todo --json` | Output as JSON |
436
+ | `push-todo create <title>` | Create a todo from CLI |
437
+ | `push-todo create <title> --remind <text>` | Create with reminder (e.g., "tomorrow night", "in 2 hours") |
438
+ | `push-todo create <title> --remind-at <iso>` | Create with exact reminder date (ISO8601) |
439
+ | `push-todo create <title> --alarm` | Mark reminder as urgent (bypasses Focus) |
440
+ | `push-todo create <title> --content <text>` | Create with detailed content |
441
+ | `push-todo create <title> --backlog` | Create as backlog item |
436
442
  | `push-todo schedule add` | Create a remote schedule (Supabase-backed) |
437
443
  | `push-todo schedule list` | List all remote schedules |
438
444
  | `push-todo schedule remove <id>` | Remove a schedule |
package/lib/daemon.js CHANGED
@@ -2564,16 +2564,30 @@ async function handleTaskCompletion(displayNumber, exitCode) {
2564
2564
  executionSummary += ` PR: ${prUrl}`;
2565
2565
  }
2566
2566
 
2567
+ // Build structured recap for briefing visualization (JSONB column)
2568
+ const prNumber = prUrl ? parseInt((prUrl.match(/\/(\d+)$/) || [])[1] || '0', 10) || null : null;
2569
+ const executionRecap = {
2570
+ durationStr,
2571
+ durationSeconds: duration,
2572
+ diagram: visualArtifact || null,
2573
+ machineName,
2574
+ outcome: {
2575
+ prUrl: prUrl || null,
2576
+ prNumber,
2577
+ },
2578
+ };
2579
+
2567
2580
  const statusUpdated = await updateTaskStatus(displayNumber, 'session_finished', {
2568
2581
  duration,
2569
2582
  sessionId,
2570
- summary: executionSummary
2583
+ summary: executionSummary,
2584
+ executionRecap,
2571
2585
  }, info.taskId);
2572
2586
  if (!statusUpdated) {
2573
2587
  logError(`Task #${displayNumber}: Failed to update status to session_finished — will retry`);
2574
2588
  // Retry once
2575
2589
  await updateTaskStatus(displayNumber, 'session_finished', {
2576
- duration, sessionId, summary: executionSummary
2590
+ duration, sessionId, summary: executionSummary, executionRecap,
2577
2591
  }, info.taskId);
2578
2592
  }
2579
2593
 
@@ -2692,7 +2706,13 @@ async function handleTaskCompletion(displayNumber, exitCode) {
2692
2706
  ? `${failureSummary}\nExit code ${exitCode}. Ran for ${durationStr} on ${machineName}.`
2693
2707
  : `Exit code ${exitCode}: ${stderr.slice(0, 200)}`;
2694
2708
 
2695
- await updateTaskStatus(displayNumber, 'failed', { error: errorMsg, sessionId }, info.taskId);
2709
+ const failedRecap = {
2710
+ durationStr,
2711
+ durationSeconds: duration,
2712
+ machineName,
2713
+ exitCode,
2714
+ };
2715
+ await updateTaskStatus(displayNumber, 'failed', { error: errorMsg, sessionId, executionRecap: failedRecap }, info.taskId);
2696
2716
 
2697
2717
  if (NOTIFY_ON_FAILURE) {
2698
2718
  sendMacNotification(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@masslessai/push-todo",
3
- "version": "4.5.0",
3
+ "version": "4.5.2",
4
4
  "description": "Voice tasks from Push iOS app for Claude Code",
5
5
  "type": "module",
6
6
  "bin": {
@@ -19,6 +19,7 @@
19
19
  "hooks/",
20
20
  "natives/",
21
21
  "scripts/",
22
+ "skills/",
22
23
  "SKILL.md",
23
24
  "LICENSE"
24
25
  ],
@@ -6,10 +6,11 @@
6
6
  * 1. Claude Code - symlink to ~/.claude/skills/ (gives clean /push-todo command)
7
7
  * 2. OpenAI Codex - AGENTS.md in ~/.codex/
8
8
  * 3. OpenClaw - SKILL.md in ~/.openclaw/skills/ (legacy: ~/.clawdbot/)
9
- * 4. Downloads native keychain helper binary (macOS)
9
+ * 4. Bundled skills - auto-discovers bundled skills and creates per-skill symlinks
10
+ * 5. Downloads native keychain helper binary (macOS)
10
11
  */
11
12
 
12
- import { createWriteStream, existsSync, mkdirSync, unlinkSync, readFileSync, writeFileSync, symlinkSync, lstatSync, readlinkSync, rmSync, appendFileSync } from 'fs';
13
+ import { createWriteStream, existsSync, mkdirSync, unlinkSync, readFileSync, writeFileSync, symlinkSync, lstatSync, readlinkSync, rmSync, appendFileSync, readdirSync } from 'fs';
13
14
  import { chmod, stat } from 'fs/promises';
14
15
  import { pipeline } from 'stream/promises';
15
16
  import { join, dirname } from 'path';
@@ -301,6 +302,68 @@ function setupOpenClaw() {
301
302
  }
302
303
  }
303
304
 
305
+ /**
306
+ * Auto-discover bundled skills and create per-skill symlinks.
307
+ * Each bundled skill gets: <targetDir>/push-<name> -> PACKAGE_ROOT/skills/<name>
308
+ *
309
+ * Claude Code scans one level deep (~/.claude/skills/X/SKILL.md), but bundled
310
+ * skills are two levels deep from the root push-todo symlink. Per-skill symlinks
311
+ * give each bundled skill its own top-level entry for independent discovery.
312
+ *
313
+ * @param {string} targetSkillsDir - e.g. ~/.claude/skills/
314
+ * @param {string} clientLabel - e.g. "Claude Code"
315
+ * @returns {string[]} names of skills that were symlinked
316
+ */
317
+ function setupBundledSkills(targetSkillsDir, clientLabel) {
318
+ const bundledDir = join(PACKAGE_ROOT, 'skills');
319
+ if (!existsSync(bundledDir)) return [];
320
+
321
+ let entries;
322
+ try {
323
+ entries = readdirSync(bundledDir);
324
+ } catch {
325
+ return [];
326
+ }
327
+
328
+ const installed = [];
329
+
330
+ for (const name of entries) {
331
+ const skillSource = join(bundledDir, name);
332
+ const skillMd = join(skillSource, 'SKILL.md');
333
+
334
+ // Only process directories containing SKILL.md
335
+ if (!existsSync(skillMd)) continue;
336
+
337
+ // Namespace with push- prefix (skip if already prefixed)
338
+ const linkName = name.startsWith('push-') ? name : `push-${name}`;
339
+ const linkPath = join(targetSkillsDir, linkName);
340
+
341
+ try {
342
+ if (existsSync(linkPath)) {
343
+ const stats = lstatSync(linkPath);
344
+ if (stats.isSymbolicLink()) {
345
+ const currentTarget = readlinkSync(linkPath);
346
+ if (currentTarget === skillSource) {
347
+ installed.push(linkName);
348
+ continue; // Already correct
349
+ }
350
+ unlinkSync(linkPath);
351
+ } else {
352
+ rmSync(linkPath, { recursive: true });
353
+ }
354
+ }
355
+
356
+ symlinkSync(skillSource, linkPath);
357
+ installed.push(linkName);
358
+ console.log(`[push-todo] ${clientLabel}: Bundled skill installed: ${linkName}`);
359
+ } catch (err) {
360
+ console.log(`[push-todo] ${clientLabel}: Failed to install bundled skill ${linkName}: ${err.message}`);
361
+ }
362
+ }
363
+
364
+ return installed;
365
+ }
366
+
304
367
  /**
305
368
  * Download a file from URL to destination.
306
369
  *
@@ -356,15 +419,36 @@ async function main() {
356
419
  // Step 3: Set up Claude Code skill symlink
357
420
  console.log('[push-todo] Setting up Claude Code skill...');
358
421
  const claudeSuccess = setupClaudeSkill();
422
+ if (claudeSuccess) {
423
+ const bundled = setupBundledSkills(SKILL_DIR, 'Claude Code');
424
+ if (bundled.length > 0) {
425
+ console.log(`[push-todo] Claude Code: ${bundled.length} bundled skill(s) installed`);
426
+ }
427
+ }
359
428
  console.log('');
360
429
 
361
430
  // Step 4: Set up OpenAI Codex (if installed)
362
431
  const codexSuccess = setupCodex();
363
- if (codexSuccess) console.log('');
432
+ if (codexSuccess) {
433
+ const codexSkillsDir = join(CODEX_DIR, 'skills');
434
+ const bundled = setupBundledSkills(codexSkillsDir, 'Codex');
435
+ if (bundled.length > 0) {
436
+ console.log(`[push-todo] Codex: ${bundled.length} bundled skill(s) installed`);
437
+ }
438
+ console.log('');
439
+ }
364
440
 
365
441
  // Step 5: Set up OpenClaw (if installed — formerly Clawdbot)
366
442
  const openclawSuccess = setupOpenClaw();
367
- if (openclawSuccess) console.log('');
443
+ if (openclawSuccess) {
444
+ const clawDir = existsSync(OPENCLAW_DIR) ? OPENCLAW_DIR : OPENCLAW_LEGACY_DIR;
445
+ const clawSkillsDir = join(clawDir, 'skills');
446
+ const bundled = setupBundledSkills(clawSkillsDir, 'OpenClaw');
447
+ if (bundled.length > 0) {
448
+ console.log(`[push-todo] OpenClaw: ${bundled.length} bundled skill(s) installed`);
449
+ }
450
+ console.log('');
451
+ }
368
452
 
369
453
  // Track which clients were set up
370
454
  const clients = [];
@@ -0,0 +1,313 @@
1
+ ---
2
+ name: skill-builder
3
+ description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, update or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, optimize a skill's description for better triggering, or improve an existing skill's quality. This skill should be used whenever the user mentions "create a skill", "build a skill", "improve skill", "test skill", "eval skill", "benchmark skill", "skill description", "skill triggering", or asks to turn a workflow into a reusable skill.
4
+ version: 1.0.0
5
+ ---
6
+
7
+ # Skill Builder
8
+
9
+ This skill guides the complete lifecycle of building, testing, and improving Claude Code skills — from initial idea through quantitative benchmarking and iterative refinement.
10
+
11
+ ## The Core Loop
12
+
13
+ Skill development follows an iterative cycle:
14
+
15
+ 1. **Capture Intent** — Understand what the skill should do and when it should trigger
16
+ 2. **Interview & Research** — Gather details about edge cases, formats, and dependencies
17
+ 3. **Write SKILL.md** — Create the skill with metadata and instructions
18
+ 4. **Test** — Run eval cases to measure skill performance
19
+ 5. **Evaluate** — Grade outputs, aggregate benchmarks, review with user
20
+ 6. **Improve** — Refine based on feedback and data, then loop back to step 4
21
+
22
+ The job when using this skill is to figure out where in this loop the user currently is and help them move forward. Not every skill needs the full eval pipeline — a simple domain knowledge skill might only need steps 1-3, while a complex workflow skill benefits from the complete cycle.
23
+
24
+ ## Communicating with the User
25
+
26
+ Adapt communication level to the user's context. If they're casually asking "help me make a skill," keep things simple. If they're asking about "eval assertions" and "benchmark variance," they want the full technical depth.
27
+
28
+ Brief explanations of technical terms are always appropriate:
29
+ - "assertions" = specific pass/fail checks on skill output
30
+ - "trigger eval" = test whether a description causes the skill to activate for the right queries
31
+ - "benchmark" = statistical comparison across multiple test runs
32
+
33
+ ## About Skills
34
+
35
+ Skills are modular, self-contained packages that extend Claude's capabilities with specialized knowledge, workflows, and tools. They transform Claude from a general-purpose agent into a specialized agent equipped with procedural knowledge that no model can fully possess.
36
+
37
+ ### What Skills Provide
38
+
39
+ 1. **Specialized workflows** — Multi-step procedures for specific domains
40
+ 2. **Tool integrations** — Instructions for working with specific file formats or APIs
41
+ 3. **Domain expertise** — Company-specific knowledge, schemas, business logic
42
+ 4. **Bundled resources** — Scripts, references, and assets for complex and repetitive tasks
43
+
44
+ ### Anatomy of a Skill
45
+
46
+ ```
47
+ skill-name/
48
+ ├── SKILL.md (required)
49
+ │ ├── YAML frontmatter (name, description required)
50
+ │ └── Markdown instructions
51
+ └── Bundled Resources (optional)
52
+ ├── scripts/ - Executable code (Python/Bash/etc.)
53
+ ├── references/ - Documentation loaded as needed
54
+ └── assets/ - Files used in output (templates, icons, fonts)
55
+ ```
56
+
57
+ ### Progressive Disclosure
58
+
59
+ Skills use a three-level loading system to manage context efficiently:
60
+
61
+ 1. **Metadata (name + description)** — Always in context (~100 words)
62
+ 2. **SKILL.md body** — When skill triggers (<500 lines ideal)
63
+ 3. **Bundled resources** — As needed by Claude (unlimited)
64
+
65
+ This matters because the context window is a shared resource. Keep SKILL.md lean (under 500 lines) and use references/ for detailed content.
66
+
67
+ ## Creating a Skill
68
+
69
+ ### Step 1: Capture Intent
70
+
71
+ Start by understanding concrete examples of how the skill will be used. Good questions:
72
+
73
+ - "What functionality should the skill support?"
74
+ - "Can you give examples of how this skill would be used?"
75
+ - "What would a user say that should trigger this skill?"
76
+
77
+ Avoid asking too many questions at once. Start with the most important ones and follow up as needed.
78
+
79
+ ### Step 2: Interview & Research
80
+
81
+ Analyze each example to identify reusable resources:
82
+
83
+ - Does the task require **rewriting the same code** each time? → Bundle a `scripts/` utility
84
+ - Does the task need **domain knowledge** to reference? → Create a `references/` file
85
+ - Does the task use **templates or assets** in its output? → Add to `assets/`
86
+
87
+ ### Step 3: Write the SKILL.md
88
+
89
+ Initialize the skill directory:
90
+
91
+ ```bash
92
+ mkdir -p skill-name/{references,scripts}
93
+ touch skill-name/SKILL.md
94
+ ```
95
+
96
+ #### Frontmatter
97
+
98
+ The description is the most important part — it determines when Claude will use the skill:
99
+
100
+ ```yaml
101
+ ---
102
+ name: skill-name
103
+ description: Create new X, modify existing X, and optimize X performance. This skill should be used when the user asks to "specific phrase 1", "specific phrase 2", or mentions related concepts. Use whenever the user wants to...
104
+ version: 0.1.0
105
+ ---
106
+ ```
107
+
108
+ Make descriptions **"pushy"** — explicitly state when to use the skill, including near-miss scenarios:
109
+
110
+ > "Make sure to use this skill whenever the user mentions dashboards, data visualization, internal metrics, or wants to display any kind of company data, even if they don't explicitly ask for a 'dashboard.'"
111
+
112
+ This combats undertriggering, which is the more common failure mode.
113
+
114
+ #### Body
115
+
116
+ Write using **imperative form** (verb-first instructions). Use objective, instructional language:
117
+
118
+ ```markdown
119
+ # Good
120
+ Parse the frontmatter using sed.
121
+ To accomplish X, do Y.
122
+
123
+ # Bad
124
+ You should parse the frontmatter...
125
+ If you need to do X, you can...
126
+ ```
127
+
128
+ For detailed writing guidance including progressive disclosure patterns, degrees of freedom, and common mistakes, read `references/writing-guide.md`.
129
+
130
+ For plugin-specific skill patterns (auto-discovery, testing with `--plugin-dir`, packaging), read `references/plugin-skill-patterns.md`.
131
+
132
+ ### Step 4: Create Test Cases
133
+
134
+ Draft 2-3 realistic test prompts that exercise the skill's core functionality. Save them in a workspace:
135
+
136
+ ```json
137
+ {
138
+ "skill_name": "my-skill",
139
+ "evals": [
140
+ {
141
+ "id": 0,
142
+ "prompt": "Realistic user request...",
143
+ "expected_output": "Description of expected result",
144
+ "assertions": ["Output contains X", "Format is correct", "Edge case Y handled"]
145
+ }
146
+ ]
147
+ }
148
+ ```
149
+
150
+ ## Running and Evaluating Test Cases
151
+
152
+ This is the heart of skill improvement. For the full eval framework details, read `references/eval-framework.md`.
153
+
154
+ ### Overview
155
+
156
+ For each test case, spawn **two subagent runs simultaneously**:
157
+
158
+ - **With-skill run**: Uses the skill being developed
159
+ - **Baseline run**: Either no skill (new skills) or previous version (iterations)
160
+
161
+ While runs execute, draft assertions — specific pass/fail criteria for each test case.
162
+
163
+ After runs complete:
164
+
165
+ 1. **Grade** outputs using the grader agent (`agents/grader.md`)
166
+ 2. **Aggregate** results into benchmark statistics
167
+ 3. **Review** with user — present findings conversationally
168
+ 4. **Collect feedback** for the next iteration
169
+
170
+ ### Workspace Structure
171
+
172
+ ```
173
+ <skill-name>-workspace/
174
+ ├── iteration-1/
175
+ │ ├── eval-0-name/
176
+ │ │ ├── with_skill/outputs/
177
+ │ │ ├── baseline/outputs/
178
+ │ │ └── eval_metadata.json
179
+ │ ├── benchmark.json
180
+ │ └── feedback.json
181
+ ├── iteration-2/
182
+ │ └── [same structure]
183
+ └── skill-snapshot/ (previous version when improving)
184
+ ```
185
+
186
+ ## Improving the Skill
187
+
188
+ After reviewing eval results, apply improvements following these principles:
189
+
190
+ 1. **Generalize from feedback** — Don't overfit to specific test cases. The skill must work across future uses.
191
+ 2. **Keep the prompt lean** — Remove instructions that don't pull their weight.
192
+ 3. **Explain the WHY** — Help Claude understand reasoning rather than following rigid rules. Prefer "This format matters because X" over "MUST use this format."
193
+ 4. **Look for repeated work** — If all test runs independently wrote the same helper script, bundle it in `scripts/`.
194
+ 5. **Set appropriate degrees of freedom** — High-stakes formatting? Lock it down. Creative decisions? Leave room for judgment.
195
+
196
+ ### The Iteration Loop
197
+
198
+ 1. Apply improvements to the skill
199
+ 2. Rerun all test cases into `iteration-<N+1>/`
200
+ 3. Compare against previous iteration
201
+ 4. Read feedback and repeat until satisfied
202
+
203
+ For advanced comparison, use the **blind comparator** (`agents/comparator.md`) to eliminate bias — it judges two outputs without knowing which version produced them. Then use the **analyzer** (`agents/analyzer.md`) to explain why the winner won and generate actionable improvement suggestions.
204
+
205
+ ## Description Optimization
206
+
207
+ After the skill is working well, optimize the description for accurate triggering.
208
+
209
+ ### Quick Method
210
+
211
+ Generate 20 realistic eval queries:
212
+ - 8-10 that **should** trigger the skill
213
+ - 8-10 that **should not** (near-misses from adjacent domains)
214
+
215
+ Review with the user, then iteratively improve the description. For the full automated optimization pipeline using `scripts/run_loop.py`, read `references/description-optimization.md`.
216
+
217
+ ### How Triggering Works
218
+
219
+ Claude decides whether to invoke a skill based on the skill's name and description (always in context) matched against the user's request. The SKILL.md body is never read during the triggering decision — only the frontmatter description matters.
220
+
221
+ ## Validation
222
+
223
+ Before finalizing, validate the skill:
224
+
225
+ ```bash
226
+ python3 scripts/quick_validate.py <path/to/skill>
227
+ ```
228
+
229
+ This checks frontmatter format, required fields, description quality, and file organization.
230
+
231
+ ### Manual Checklist
232
+
233
+ **Structure:**
234
+ - [ ] SKILL.md exists with valid YAML frontmatter
235
+ - [ ] `name` and `description` fields present
236
+ - [ ] Referenced files actually exist
237
+
238
+ **Description:**
239
+ - [ ] Includes specific trigger phrases users would say
240
+ - [ ] Lists concrete scenarios
241
+ - [ ] "Pushy" enough to combat undertriggering
242
+
243
+ **Content:**
244
+ - [ ] Body under 500 lines (use references/ for details)
245
+ - [ ] Imperative form, objective language
246
+ - [ ] Progressive disclosure applied
247
+
248
+ **Resources:**
249
+ - [ ] Scripts are executable and documented
250
+ - [ ] References don't duplicate SKILL.md content
251
+ - [ ] Examples are complete and working
252
+
253
+ ## Bundled Scripts
254
+
255
+ This skill includes Python scripts for automated evaluation:
256
+
257
+ | Script | Purpose | Dependencies |
258
+ |--------|---------|-------------|
259
+ | `quick_validate.py` | Validate skill structure | `pyyaml` |
260
+ | `run_eval.py` | Test description triggering | `anthropic` |
261
+ | `improve_description.py` | Improve descriptions with Claude | `anthropic` |
262
+ | `run_loop.py` | Iterative eval + improve loop | `anthropic` |
263
+ | `generate_report.py` | HTML report generation | stdlib |
264
+ | `aggregate_benchmark.py` | Benchmark statistics | stdlib |
265
+
266
+ **Requirements:** Python 3.10+, `pip install anthropic pyyaml`
267
+
268
+ ## Reference Files
269
+
270
+ For detailed documentation on specific topics:
271
+
272
+ | File | When to Read |
273
+ |------|-------------|
274
+ | `references/eval-framework.md` | Setting up and running the full eval pipeline |
275
+ | `references/description-optimization.md` | Automated description improvement workflow |
276
+ | `references/json-schemas.md` | JSON format specifications for all tracking files |
277
+ | `references/writing-guide.md` | Skill writing style, philosophy, and common mistakes |
278
+ | `references/plugin-skill-patterns.md` | Building skills within Claude Code plugins |
279
+
280
+ ## Push Integration
281
+
282
+ This skill is bundled with the `push-todo` CLI. When working on a Push voice task (invoked via `/push-todo`), skills can report progress back to the Push iOS app using CLI commands.
283
+
284
+ ### Available Callbacks
285
+
286
+ | Command | Purpose |
287
+ |---------|---------|
288
+ | `push-todo --mark-completed <uuid>` | Mark a task as done |
289
+ | `push-todo --mark-completed <uuid> --completion-comment "..."` | Mark done with summary |
290
+ | `push-todo --learn-vocabulary <uuid> --keywords 'term1,term2'` | Teach routing keywords |
291
+
292
+ ### When to Use
293
+
294
+ These callbacks are relevant when:
295
+ - The user invoked `/push-todo` and selected a task to work on
296
+ - A skill completes work that resolves a Push task
297
+ - The daemon is executing a task autonomously
298
+
299
+ For skill-builder specifically: after creating or improving a skill as part of a Push task, mark it complete and contribute vocabulary terms (e.g., `skill,SKILL.md,eval,benchmark,frontmatter`).
300
+
301
+ ### Not Always Relevant
302
+
303
+ Many skill-builder invocations have nothing to do with Push — the user just wants to create a skill. The Push callbacks only matter when there's an active Push task context. Don't call them unprompted.
304
+
305
+ ## Agent Files
306
+
307
+ Specialized subagent instructions (read when spawning subagents for evaluation):
308
+
309
+ | File | Purpose |
310
+ |------|---------|
311
+ | `agents/grader.md` | Assertion-based output grading (8-step process) |
312
+ | `agents/comparator.md` | Blind A/B comparison between skill versions |
313
+ | `agents/analyzer.md` | Post-hoc analysis and improvement suggestions |