npm - jettypod - Versions diffs - 4.4.115 → 4.4.118 - Mend

jettypod 4.4.115 → 4.4.118

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/.env +7 -0
package/apps/dashboard/app/api/claude/[workItemId]/message/route.ts +25 -9
package/apps/dashboard/app/api/claude/sessions/[sessionId]/message/route.ts +7 -3
package/apps/dashboard/app/api/tests/run/stream/route.ts +13 -1
package/apps/dashboard/app/api/usage/route.ts +17 -0
package/apps/dashboard/app/connect-claude/page.tsx +24 -0
package/apps/dashboard/app/install-claude/page.tsx +8 -6
package/apps/dashboard/app/login/page.tsx +229 -0
package/apps/dashboard/app/page.tsx +5 -3
package/apps/dashboard/app/settings/page.tsx +2 -0
package/apps/dashboard/app/subscribe/page.tsx +11 -0
package/apps/dashboard/app/welcome/page.tsx +23 -0
package/apps/dashboard/components/AppShell.tsx +51 -9
package/apps/dashboard/components/CardMenu.tsx +14 -5
package/apps/dashboard/components/ClaudePanel.tsx +65 -9
package/apps/dashboard/components/ConnectClaudeScreen.tsx +223 -0
package/apps/dashboard/components/DragContext.tsx +73 -64
package/apps/dashboard/components/DraggableCard.tsx +6 -46
package/apps/dashboard/components/GateCard.tsx +21 -0
package/apps/dashboard/components/InstallClaudeScreen.tsx +132 -30
package/apps/dashboard/components/KanbanBoard.tsx +173 -56
package/apps/dashboard/components/PlaceholderCard.tsx +9 -19
package/apps/dashboard/components/ProjectSwitcher.tsx +28 -0
package/apps/dashboard/components/RealTimeKanbanWrapper.tsx +34 -3
package/apps/dashboard/components/RealTimeTestsWrapper.tsx +30 -2
package/apps/dashboard/components/SubscribeContent.tsx +191 -0
package/apps/dashboard/components/TipCard.tsx +176 -0
package/apps/dashboard/components/UpgradeBanner.tsx +29 -0
package/apps/dashboard/components/WelcomeScreen.tsx +14 -4
package/apps/dashboard/components/settings/AccountSection.tsx +163 -0
package/apps/dashboard/contexts/ClaudeSessionContext.tsx +292 -29
package/apps/dashboard/contexts/UsageContext.tsx +131 -0
package/apps/dashboard/contexts/usageHelpers.js +9 -0
package/apps/dashboard/electron/ipc-handlers.js +220 -114
package/apps/dashboard/electron/main.js +415 -37
package/apps/dashboard/electron/preload.js +23 -4
package/apps/dashboard/electron/session-manager.js +141 -0
package/apps/dashboard/electron-builder.config.js +3 -5
package/apps/dashboard/lib/claude-process-manager.ts +6 -4
package/apps/dashboard/lib/db-bridge.ts +32 -0
package/apps/dashboard/lib/db.ts +159 -13
package/apps/dashboard/lib/session-state-machine.ts +3 -0
package/apps/dashboard/lib/session-stream-manager.ts +76 -13
package/apps/dashboard/lib/tests.ts +3 -1
package/apps/dashboard/next.config.js +19 -14
package/apps/dashboard/package.json +3 -1
package/apps/dashboard/scripts/upload-to-r2.js +89 -0
package/apps/dashboard/tsconfig.tsbuildinfo +1 -0
package/apps/update-server/package.json +16 -0
package/apps/update-server/schema.sql +31 -0
package/apps/update-server/src/index.ts +1074 -0
package/apps/update-server/tsconfig.json +16 -0
package/apps/update-server/wrangler.toml +35 -0
package/docs/bdd-guidance.md +390 -0
package/jettypod.js +5 -4
package/lib/migrations/027-plan-at-creation-column.js +31 -0
package/lib/migrations/028-ready-for-review-column.js +27 -0
package/lib/schema.js +3 -1
package/lib/seed-onboarding.js +100 -68
package/lib/work-commands/index.js +43 -13
package/lib/work-tracking/index.js +46 -27
package/package.json +1 -1
package/skills-templates/bug-mode/SKILL.md +5 -11
package/skills-templates/request-routing/SKILL.md +24 -11
package/skills-templates/simple-improvement/SKILL.md +35 -19
package/skills-templates/stable-mode/SKILL.md +5 -6
package/templates/bdd-guidance.md +139 -0
package/templates/bdd-scaffolding/wait.js +18 -0
package/templates/bdd-scaffolding/world.js +19 -0
package/.jettypod-backup/work.db +0 -0
package/apps/dashboard/app/access-code/page.tsx +0 -110
package/lib/discovery-checkpoint.js +0 -123
package/skills-templates/project-discovery/SKILL.md +0 -372

package/apps/update-server/tsconfig.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "compilerOptions": {
+    "target": "ESNext",
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "lib": ["ESNext"],
+    "types": ["@cloudflare/workers-types"],
+    "strict": true,
+    "noEmit": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true,
+    "isolatedModules": true
+  },
+  "include": ["src/**/*.ts"]
+}

package/apps/update-server/wrangler.toml ADDED Viewed

@@ -0,0 +1,35 @@
+name = "jettypod-update-server"
+main = "src/index.ts"
+compatibility_date = "2025-02-01"
+# R2 bucket for release artifacts (DMG, ZIP, latest-mac.yml)
+[[r2_buckets]]
+binding = "RELEASE_ARTIFACTS"
+bucket_name = "jettypod-releases"
+# D1 database for user accounts + usage tracking
+[[d1_databases]]
+binding = "AUTH_DB"
+database_name = "jettypod-auth"
+database_id = "80205398-8f56-4e04-a612-4a348458098f"
+# KV namespace for OTP codes (with TTL)
+[[kv_namespaces]]
+binding = "AUTH_KV"
+id = "91b43f72715e4c26a15b1dc0371af225"
+# Stripe secrets - set via wrangler:
+#   wrangler secret put STRIPE_SECRET_KEY
+#   wrangler secret put STRIPE_WEBHOOK_SECRET
+#   wrangler secret put STRIPE_MONTHLY_PRICE_ID
+#   wrangler secret put STRIPE_LIFETIME_PRICE_ID
+# Auth secrets - set via wrangler:
+#   wrangler secret put GOOGLE_CLIENT_ID
+#   wrangler secret put GOOGLE_CLIENT_SECRET
+#   wrangler secret put JWT_SECRET
+#   wrangler secret put RESEND_API_KEY
+# Do NOT put actual keys here. Use wrangler secrets for sensitive values.
+# Environment variables (non-secret)
+[vars]
+ENVIRONMENT = "production"

package/docs/bdd-guidance.md ADDED Viewed

@@ -0,0 +1,390 @@
+What BDD actually is (and isn’t)
+BDD is a collaboration + specification technique that uses concrete examples to describe behavior in a shared language.
+The “unit tests” are not the point. The examples are the point.
+BDD tests should validate behavior that matters to users/business—without leaking implementation details.
+BDD ≠ “write all tests in Gherkin.” BDD can be done with plain unit/integration tests too. Gherkin is just a common interface for readability and stakeholder alignment.
+A good mental model:
+Feature files describe “what” and “why.”
+Step definitions implement “how,” but only at a high level.
+Lower-level details live in helper layers (Page Objects, API clients, domain helpers).
+The BDD flow (what “good” looks like)
+Discovery (3 Amigos: product + dev + QA)
+Agree on behavior via examples: happy path + edge cases.
+Formulation
+Turn examples into scenarios (often in Gherkin).
+Automation
+Implement step definitions that call into a small, reusable automation layer.
+Living documentation
+Keep scenarios accurate and stable; prune duplicates; version behavior over time.
+Gherkin, done well (the style rules that save you later)
+Core primitives
+Feature: coherent behavior area
+Scenario: one concrete example
+Given/When/Then:
+Given: preconditions / state
+When: action
+Then: observable outcomes
+Good scenario traits
+Small: one behavior, one reason to fail
+Declarative: describes intent, not UI clicks
+Stable: avoids brittle details (pixel-level UI, timing hacks)
+Deterministic: no reliance on “whatever data happens to exist”
+Example (good)
+Scenario: User can retry a failed payment
+  Given a user with an unpaid invoice
+  And the payment processor returns "insufficient_funds"
+  When the user retries payment with a different card
+  Then the invoice is marked as paid
+  And the user sees a receipt
+Example (brittle / not great)
+Scenario: Pay invoice
+  Given I click the "Billing" tab
+  And I wait 2 seconds
+  And I click the third button on the page
+  When I type "4111111111111111" into the card field
+  Then I should see "Success"
+This is more of a UI macro recorder than a behavioral spec.
+Step definitions: the most common place things go off the rails
+The biggest rule
+Step definitions should be thin.
+They should:
+parse parameters
+call a helper/API/page-object method
+assert outcomes at the correct level
+They should not:
+contain lots of branching logic
+do complex loops
+embed SQL queries
+“know” too much about UI selectors
+implement multi-step workflows inline
+The “thin step” pattern
+Step def → calls one intentful function (e.g., billing.retryPaymentWith(card)), rather than doing click/type/wait directly.
+Example structure:
+// step definition
+When('the user retries payment with a different card', async () => {
+  await billing.retryPaymentWith(validCard2);
+});
+// helper layer (page object / service client)
+async function retryPaymentWith(card) {
+  await openBilling();
+  await selectInvoice(...);
+  await enterCard(card);
+  await submit();
+  await waitForReceipt(); // smart wait, not sleep(2000)
+}
+Handling “complex things” in BDD tests (the hard parts)
+1) Asynchrony and eventual consistency
+Problem: background jobs, queues, delayed writes, distributed systems.
+Best practices
+Prefer event-based or state-based polling with timeouts over fixed sleeps.
+Assert intermediate states if meaningful (“processing” → “completed”).
+If possible, expose a test-only hook (e.g., “job runner runs immediately” in test env).
+What to do:
+await waitFor(() => order.status === 'COMPLETED', { timeout: 10_000 })
+avoid: sleep(5000)
+2) External dependencies (payment providers, email/SMS, maps)
+Problem: flaky tests, slow runs, rate limits.
+Best practices
+For most BDD runs: stub at the boundary (in-process fake server, contract stub).
+Have a smaller set of true end-to-end smoke tests that hit real external services (maybe nightly).
+3) Authentication flows (OAuth, magic links)
+Best practices
+Prefer test auth shortcuts:
+a test-only endpoint to mint tokens
+bypass UI login with session injection
+Keep one or two UI-login scenarios if you must, but don’t make every scenario pay the “login tax.”
+4) Data setup that is “realistic” but not fragile
+Problem: complicated prerequisites create scenario bloat.
+Best practices
+Use factories/fixtures with names that encode intent:
+givenUserWithUnpaidInvoice()
+givenWorkspaceWith3MembersAndNoAdmin()
+Avoid “Given the database has…” in feature files. That’s implementation leakage.
+5) Time, randomness, and IDs
+Best practices
+Freeze time (clock.set("2026-02-10T10:00:00Z")) or inject time providers.
+Seed randomness.
+Don’t assert on raw IDs; assert on meaning (“receipt exists”, “email sent to user”).
+6) UI interactions that are inherently finicky
+Best practices
+Use stable locators (data-testid, ARIA roles) rather than CSS chains.
+Use smart waits (element visible/enabled, network idle) not sleeps.
+Put selectors in one place (page objects / screen model).
+7) Distributed workflows (webhook in, job runs, UI updates)
+Best practices
+Split assertions by layer:
+API-level scenario verifies webhook → status update
+UI-level scenario verifies status display
+Don’t force one scenario to validate every link in the chain unless it’s explicitly a top-level acceptance test.
+The test pyramid in BDD terms (where each kind of test belongs)
+A very effective setup:
+Many unit tests (fast, deterministic): pure logic
+Many integration/contract tests: service boundaries, DB, message bus (still fast-ish)
+Some BDD scenarios: critical user journeys and key edge cases
+Very few UI E2E: smoke and “are we totally broken?” checks
+BDD scenarios can exist at multiple levels (API-level BDD is often a sweet spot).
+Mocks, stubs, fakes: what they are (and why people argue about them)
+Definitions (practical, not academic)
+Mock: a test double you can verify interactions with
+(“Was chargeCard() called with amount=4999?”)
+Stub: a test double that returns predetermined responses
+(“When /payments is called, return 402 insufficient_funds”)
+Fake: a lightweight working implementation
+(in-memory DB, fake email inbox, fake queue)
+Spy: like a mock, but wraps a real object and records calls
+When to use what
+Use stubs/fakes for most BDD scenarios because they support behavior assertions (“user sees receipt”) without coupling to call patterns.
+Use mocks sparingly, mostly in unit tests or when verifying a critical side effect is the purpose of the scenario.
+The big danger of mocks in BDD
+Mocks push you toward testing implementation details:
+“did we call X?” rather than “did the user get the outcome?”
+Sometimes verifying calls is legitimate (e.g., “audit event emitted”), but generally:
+BDD asserts outcomes, not internal choreography.
+Step definition best practices checklist (great for “is my AI behaving?”)
+✅ Green flags
+Steps are short (often 1–5 lines)
+Steps call named helper methods (domain language)
+Assertions are in Then steps (or helper assertions)
+Givens set up intentful state, not low-level DB edits
+Reuse happens through helper methods, not giant shared step defs
+Steps avoid sleeps; use smart waits
+Scenario language avoids UI specifics unless truly necessary
+🚩 Red flags (AI assistants love these)
+Step defs contain:
+loops, conditionals, try/catch gymnastics
+direct SQL / direct ORM writes sprinkled everywhere
+lots of selectors + click/type chains inline
+random sleeps/timeouts to “make it pass”
+Steps are overly generic:
+“When I do the thing”
+“Then it works”
+Heavy parameterization:
+Steps with 6–10 parameters usually mean you’re encoding a DSL no one can read
+Shared state is global and leaky across scenarios
+One scenario validates 12 different outcomes (“kitchen sink test”)
+A strict architecture that keeps BDD clean
+If you want your AI to stay disciplined, give it a structure it can’t easily “freestyle” out of:
+Recommended layers
+Feature files (behavior)
+Step definitions (glue)
+Domain tasks / Screenplay actions (intentful operations)
+Drivers
+UI driver (page objects / screen model)
+API client
+DB helper (sparingly)
+Message bus helper
+Test fixtures/factories
+Rule of thumb:
+Step defs may depend on domain tasks
+Domain tasks may depend on drivers
+Feature files know nothing about drivers
+This prevents selector soup from infecting Gherkin.
+Making your AI assistant “strict” (practical constraints you can enforce)
+Here are concrete constraints you can put in your prompt / code review rubric:
+Step definition max complexity
+No loops
+No conditionals except trivial parameter mapping
+No sleeps
+Selectors forbidden in steps
+Must live in page objects/screen models only
+One intentful call per step
+Steps call one task method
+Outcome assertions only in Then
+No shared global mutable state
+Use scenario context object only
+Deterministic data
+Factories generate known entities; tests never depend on prod-like ambient data
+If you tell the AI “follow best practices,” it’ll nod vigorously and then sleep(2000) anyway. If you tell it “sleep is banned,” it suddenly remembers how to wait for elements like an adult.
+Quick example: translating complex behavior into clean steps
+Complex behavior: “User triggers export; job runs async; user is notified; file is downloadable.”
+Good BDD split:
+Scenario asserts user-level behavior
+Implementation uses polling and test doubles
+Scenario: User can download a completed export
+  Given a user with 3 projects
+  When the user requests a project export
+  Then the export eventually completes
+  And the user can download the export file
+Implementation strategy:
+request export calls API
+eventually completes polls status endpoint with timeout
+can download checks signed URL returns 200 and file has expected headers
+No sleeps, no digging into job queue internals (unless you’re specifically testing that).

package/jettypod.js CHANGED Viewed

@@ -877,6 +877,11 @@ async function initializeProject() {
         }
       });
       if (result) {
+        // Mark onboarding chores as conversational (no worktree, no 5s delay)
+        const db = getDb();
+        for (const choreId of result.choreIds) {
+          db.run('UPDATE work_items SET conversational = 1 WHERE id = ?', [choreId]);
+        }
         console.log(`📋 Created onboarding epic with ${result.choreIds.length} chores`);
         console.log(`   Start with: jettypod work start ${result.choreIds[0]}`);
       }
@@ -2200,10 +2205,6 @@ switch (command) {
             }
           });
-          // Clear checkpoint
-          const checkpoint = require('./lib/discovery-checkpoint');
-          checkpoint.clearCheckpoint();
           await generateClaude({ autoCommit: true });
           console.log('✅ Project discovery complete!');

package/lib/migrations/027-plan-at-creation-column.js ADDED Viewed

@@ -0,0 +1,31 @@
+/**
+ * Migration: Add plan_at_creation column to work_items
+ *
+ * Purpose: Track which plan the user was on when creating each work item.
+ * Used for local usage tracking — only work items created on the 'free'
+ * plan count toward the weekly limit.
+ */
+module.exports = {
+  id: '027-plan-at-creation-column',
+  description: 'Add plan_at_creation column to work_items',
+  async up(db) {
+    return new Promise((resolve, reject) => {
+      db.run(`ALTER TABLE work_items ADD COLUMN plan_at_creation TEXT DEFAULT NULL`, (err) => {
+        if (err) return reject(err);
+        // Backfill existing work items — assume free plan for all existing items
+        db.run(`UPDATE work_items SET plan_at_creation = 'free' WHERE plan_at_creation IS NULL`, (err2) => {
+          if (err2) return reject(err2);
+          resolve();
+        });
+      });
+    });
+  },
+  async down(db) {
+    // SQLite doesn't support DROP COLUMN before 3.35.0
+    // Column will just be ignored if not used
+    return Promise.resolve();
+  }
+};

package/lib/migrations/028-ready-for-review-column.js ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Migration: Add ready_for_review column to work_items
+ *
+ * Purpose: Gate accept/reject button visibility on kanban cards.
+ * When ready_for_review = 1, the card shows accept/reject buttons.
+ * Auto-set when all child chores complete; cleared on rejection.
+ */
+module.exports = {
+  id: '028-ready-for-review-column',
+  description: 'Add ready_for_review column to work_items',
+  async up(db) {
+    return new Promise((resolve, reject) => {
+      db.run(`ALTER TABLE work_items ADD COLUMN ready_for_review INTEGER DEFAULT 0`, (err) => {
+        if (err) return reject(err);
+        resolve();
+      });
+    });
+  },
+  async down(db) {
+    // SQLite doesn't support DROP COLUMN before 3.35.0
+    // Column will just be ignored if not used
+    return Promise.resolve();
+  }
+};

package/lib/schema.js CHANGED Viewed

@@ -30,7 +30,9 @@ const SCHEMA_SQL = `
     scenario_file TEXT,
     completed_at TEXT,
     created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
-    display_order INTEGER DEFAULT NULL
+    display_order INTEGER DEFAULT NULL,
+    conversational INTEGER DEFAULT 0,
+    plan_at_creation TEXT DEFAULT NULL
   );
   CREATE TABLE IF NOT EXISTS project_config (