ohwow 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/index.js +2801 -1675
  2. package/dist/mcp-server/index.js +12 -12
  3. package/dist/migrations/107-code-skills.sql +20 -0
  4. package/dist/migrations/108-archive-procedure-skills.sql +29 -0
  5. package/dist/migrations/109-workspace-default-fs-paths.sql +19 -0
  6. package/dist/migrations/110-task-state-ttl.sql +28 -0
  7. package/dist/migrations/111-conversation-status.sql +25 -0
  8. package/dist/migrations/112-deliverables-created-at-iso.sql +40 -0
  9. package/dist/migrations/113-permission-requests.sql +36 -0
  10. package/dist/migrations/114-llm-calls-tool-telemetry.sql +32 -0
  11. package/dist/migrations/115-trigger-watchdog.sql +46 -0
  12. package/dist/migrations/116-self-findings.sql +46 -0
  13. package/dist/migrations/117-experiment-validations.sql +64 -0
  14. package/dist/migrations/118-validation-rollback.sql +33 -0
  15. package/dist/migrations/119-runtime-config-overrides.sql +44 -0
  16. package/dist/migrations/120-business-vitals.sql +44 -0
  17. package/dist/migrations/121-x-contact-events.sql +42 -0
  18. package/dist/migrations/122-video-jobs.sql +52 -0
  19. package/dist/migrations/123-insight-distiller.sql +68 -0
  20. package/dist/migrations/124-x-dm-messages.sql +55 -0
  21. package/dist/migrations/125-x-dm-messages-bodies.sql +40 -0
  22. package/dist/migrations/126-x-dm-signals.sql +52 -0
  23. package/dist/migrations/127-x-dm-contact-linking.sql +36 -0
  24. package/dist/migrations/128-attribution-view.sql +59 -0
  25. package/dist/migrations/129-x-posted-log.sql +36 -0
  26. package/dist/migrations/130-patches-attempted-log.sql +44 -0
  27. package/dist/web/assets/index-Bp9CoQ8c.css +1 -0
  28. package/dist/web/assets/index-C5xtuLcg.js +102 -0
  29. package/dist/web/index.html +2 -2
  30. package/package.json +5 -1
  31. package/dist/web/assets/index-Bgm-uSeA.js +0 -100
  32. package/dist/web/assets/index-DZAi92e-.css +0 -1
@@ -0,0 +1,46 @@
1
+ -- =====================================================================
2
+ -- Migration 116: self_findings — structured ledger for self-experimentation
3
+ --
4
+ -- Phase 1 of the self-improvement loop. Every experiment run by the
5
+ -- ExperimentRunner writes a row here: what was tested, what the verdict
6
+ -- was, what intervention (if any) was applied, and what the evidence
7
+ -- looked like. This becomes:
8
+ -- 1. The ground-truth record the next experiment reads before running
9
+ -- so the system doesn't re-investigate things it already knows.
10
+ -- 2. The feedback substrate: E1's demotion cache, E2's trigger
11
+ -- watchdog, the upcoming canary suite, etc. all write findings so
12
+ -- every future Claude session (and every agent's own planning) can
13
+ -- query a uniform "what do we know about ourselves?" surface.
14
+ -- 3. The input for the eventual meta-loop that picks the next
15
+ -- experiment to run based on what's unknown or drifting.
16
+ --
17
+ -- Nothing writes here yet after this migration — the writers land in
18
+ -- commit Phase1-B as part of the ExperimentRunner and its wrapper
19
+ -- experiments around E1/E2. This migration is the shape-only slice.
20
+ -- =====================================================================
21
+
22
+ -- @statement
23
+ CREATE TABLE IF NOT EXISTS self_findings (
24
+ id TEXT PRIMARY KEY,
25
+ experiment_id TEXT NOT NULL,
26
+ category TEXT NOT NULL,
27
+ subject TEXT,
28
+ hypothesis TEXT,
29
+ verdict TEXT NOT NULL CHECK (verdict IN ('pass', 'warning', 'fail', 'error')),
30
+ summary TEXT NOT NULL,
31
+ evidence TEXT NOT NULL DEFAULT '{}',
32
+ intervention_applied TEXT,
33
+ ran_at TEXT NOT NULL,
34
+ duration_ms INTEGER NOT NULL DEFAULT 0,
35
+ status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'superseded', 'revoked')),
36
+ superseded_by TEXT,
37
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
38
+ );
39
+ -- @statement
40
+ CREATE INDEX IF NOT EXISTS idx_findings_experiment ON self_findings(experiment_id, ran_at DESC);
41
+ -- @statement
42
+ CREATE INDEX IF NOT EXISTS idx_findings_category ON self_findings(category, ran_at DESC);
43
+ -- @statement
44
+ CREATE INDEX IF NOT EXISTS idx_findings_verdict ON self_findings(verdict, ran_at DESC) WHERE status = 'active';
45
+ -- @statement
46
+ CREATE INDEX IF NOT EXISTS idx_findings_subject ON self_findings(subject, ran_at DESC) WHERE subject IS NOT NULL;
@@ -0,0 +1,64 @@
1
+ -- =====================================================================
2
+ -- Migration 117: experiment_validations — accountability for interventions
3
+ --
4
+ -- Phase 3 of the self-improvement loop. Every time an Experiment's
5
+ -- intervene() mutates system state, the runner enqueues a validation
6
+ -- row to be processed ~15 minutes later. At validation time the
7
+ -- experiment's validate() hook reads the stored baseline, measures
8
+ -- current state, and returns held | failed | inconclusive. The outcome
9
+ -- lands as a self_findings row with category='validation' so queries
10
+ -- can trace "what the system decided and whether it was right."
11
+ --
12
+ -- Without this table, an intervention vanishes into history the moment
13
+ -- it's applied — there's no way to tell tomorrow whether yesterday's
14
+ -- stale-task-cleanup actually unblocked the queue or the queue filled
15
+ -- up again with new zombies. The validation step is the feedback loop
16
+ -- that makes every intervention a measurable claim instead of a
17
+ -- fire-and-forget side effect.
18
+ --
19
+ -- Columns:
20
+ -- intervention_finding_id — the self_findings row that carried the
21
+ -- original intervention_applied blob.
22
+ -- experiment_id — the experiment that owns the validate()
23
+ -- hook. The runner looks it up in the live
24
+ -- registry at validation time.
25
+ -- baseline — JSON snapshot captured from the
26
+ -- intervention's details. This is what the
27
+ -- validate() function gets as its first
28
+ -- argument.
29
+ -- validate_at — ISO timestamp when the runner should fire
30
+ -- the validation. Indexed so the due-query
31
+ -- stays cheap.
32
+ -- status — pending | completed | skipped | error
33
+ -- ('skipped' = experiment no longer has
34
+ -- validate() by the time the row is due)
35
+ -- outcome — held | failed | inconclusive — null until
36
+ -- validation fires.
37
+ -- outcome_finding_id — self_findings row the validation wrote.
38
+ -- =====================================================================
39
+
40
+ -- @statement
41
+ CREATE TABLE IF NOT EXISTS experiment_validations (
42
+ id TEXT PRIMARY KEY,
43
+ intervention_finding_id TEXT NOT NULL,
44
+ experiment_id TEXT NOT NULL,
45
+ baseline TEXT NOT NULL DEFAULT '{}',
46
+ validate_at TEXT NOT NULL,
47
+ status TEXT NOT NULL DEFAULT 'pending'
48
+ CHECK (status IN ('pending', 'completed', 'skipped', 'error')),
49
+ outcome TEXT CHECK (outcome IS NULL OR outcome IN ('held', 'failed', 'inconclusive')),
50
+ outcome_finding_id TEXT,
51
+ error_message TEXT,
52
+ completed_at TEXT,
53
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
54
+ );
55
+ -- @statement
56
+ CREATE INDEX IF NOT EXISTS idx_validations_due
57
+ ON experiment_validations(validate_at)
58
+ WHERE status = 'pending';
59
+ -- @statement
60
+ CREATE INDEX IF NOT EXISTS idx_validations_experiment
61
+ ON experiment_validations(experiment_id, created_at DESC);
62
+ -- @statement
63
+ CREATE INDEX IF NOT EXISTS idx_validations_intervention
64
+ ON experiment_validations(intervention_finding_id);
@@ -0,0 +1,33 @@
1
+ -- =====================================================================
2
+ -- Migration 118: Rollback tracking on experiment_validations
3
+ --
4
+ -- Phase 5-A: close the validation feedback loop by letting the runner
5
+ -- auto-revert failed interventions. When validate() returns
6
+ -- outcome='failed' and the experiment exposes a rollback() hook, the
7
+ -- runner calls it, writes a rollback finding, and stamps the
8
+ -- validation row so queries can distinguish "failed but reverted"
9
+ -- from "failed and still bad."
10
+ --
11
+ -- Columns:
12
+ -- rolled_back — 1 when a rollback ran successfully, else 0.
13
+ -- Default 0 so legacy rows show as not-rolled-back.
14
+ -- rollback_finding_id — FK to the self_findings row the runner wrote
15
+ -- for the rollback. Pair with outcome_finding_id
16
+ -- to get the full "validation said fail,
17
+ -- rollback said X" trail.
18
+ -- rolled_back_at — ISO timestamp when the rollback ran.
19
+ --
20
+ -- No schema change for Experiment implementations that don't need
21
+ -- rollback — the hook is optional on the interface.
22
+ -- =====================================================================
23
+
24
+ -- @statement
25
+ ALTER TABLE experiment_validations ADD COLUMN rolled_back INTEGER DEFAULT 0;
26
+ -- @statement
27
+ ALTER TABLE experiment_validations ADD COLUMN rollback_finding_id TEXT;
28
+ -- @statement
29
+ ALTER TABLE experiment_validations ADD COLUMN rolled_back_at TEXT;
30
+ -- @statement
31
+ CREATE INDEX IF NOT EXISTS idx_validations_rolled_back
32
+ ON experiment_validations(rolled_back, validate_at DESC)
33
+ WHERE rolled_back = 1;
@@ -0,0 +1,44 @@
1
+ -- =====================================================================
2
+ -- Migration 119: runtime_config_overrides — reversible config at runtime
3
+ --
4
+ -- Phase 5-B: key-value store for config values that an experiment
5
+ -- can change at runtime and roll back if validation fails. Used by
6
+ -- the upcoming tuner experiments (Phase 5-C) that adjust thresholds
7
+ -- like STALE_THRESHOLD_MS based on observed ledger patterns.
8
+ --
9
+ -- Design
10
+ -- ------
11
+ -- Every entry has:
12
+ -- - key — opaque string, by convention namespaced with a
13
+ -- dot (e.g. "stale_task_cleanup.threshold_ms")
14
+ -- - value — JSON-serialized value, parsed by the consumer
15
+ -- - set_by — experiment_id that wrote this entry (for audit)
16
+ -- - finding_id — the finding row that captured the decision,
17
+ -- so rollbacks can link back to the original
18
+ -- intervention
19
+ -- - set_at — ISO timestamp
20
+ --
21
+ -- Consumers pattern:
22
+ -- const threshold = await getRuntimeConfig(db, 'stale_task_cleanup.threshold_ms', DEFAULT);
23
+ -- Writers pattern (inside intervene):
24
+ -- await setRuntimeConfig(db, 'key', newValue, { setBy: exp.id, findingId });
25
+ -- Rollback pattern (inside rollback):
26
+ -- await deleteRuntimeConfig(db, 'key'); // reverts to caller's default
27
+ --
28
+ -- A module-level cache mirrors the table so hot-path reads don't hit
29
+ -- SQLite. Cache is refreshed on daemon boot + every 60s + on every
30
+ -- set/delete (local invalidation).
31
+ -- =====================================================================
32
+
33
+ -- @statement
34
+ CREATE TABLE IF NOT EXISTS runtime_config_overrides (
35
+ key TEXT PRIMARY KEY,
36
+ value TEXT NOT NULL,
37
+ set_by TEXT,
38
+ finding_id TEXT,
39
+ set_at TEXT NOT NULL DEFAULT (datetime('now')),
40
+ updated_at TEXT NOT NULL DEFAULT (datetime('now'))
41
+ );
42
+ -- @statement
43
+ CREATE INDEX IF NOT EXISTS idx_runtime_config_set_by
44
+ ON runtime_config_overrides(set_by, set_at DESC);
@@ -0,0 +1,44 @@
1
+ -- =====================================================================
2
+ -- Migration 120: business_vitals — time-series of operator business signals
3
+ --
4
+ -- Week-1 "Heart": give the homeostasis controller something to read when
5
+ -- deciding whether the runtime is producing more value than it costs.
6
+ -- Each row is one snapshot of the operator's business at time ts.
7
+ --
8
+ -- Columns are all nullable (except ts / source) so partial snapshots
9
+ -- land cleanly: a workspace with no Stripe key still accumulates
10
+ -- daily_cost_cents rows; a workspace with Stripe but no active-user
11
+ -- tracker still gets MRR.
12
+ --
13
+ -- Units:
14
+ -- mrr — monthly recurring revenue, cents
15
+ -- arr — annualized recurring revenue, cents (= mrr * 12 when
16
+ -- not derived from a separate feed)
17
+ -- active_users — count of distinct users active in the trailing window
18
+ -- daily_cost_cents — sum of agent_workforce_tasks.cost_cents for the
19
+ -- local day of ts (UTC)
20
+ -- runway_days — cash-on-hand / burn_rate when both are known
21
+ -- source — producer of this row: "stripe", "manual", "import",
22
+ -- "tasks_aggregate", etc. Never a business-specific
23
+ -- name. New producers just add their own string.
24
+ -- =====================================================================
25
+
26
+ -- @statement
27
+ CREATE TABLE IF NOT EXISTS business_vitals (
28
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
29
+ workspace_id TEXT NOT NULL,
30
+ ts TEXT NOT NULL DEFAULT (datetime('now')),
31
+ mrr INTEGER,
32
+ arr INTEGER,
33
+ active_users INTEGER,
34
+ daily_cost_cents INTEGER,
35
+ runway_days REAL,
36
+ source TEXT NOT NULL,
37
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
38
+ );
39
+ -- @statement
40
+ CREATE INDEX IF NOT EXISTS idx_business_vitals_workspace_ts
41
+ ON business_vitals(workspace_id, ts DESC);
42
+ -- @statement
43
+ CREATE INDEX IF NOT EXISTS idx_business_vitals_source
44
+ ON business_vitals(source, ts DESC);
@@ -0,0 +1,42 @@
1
+ -- 121-x-contact-events.sql
2
+ -- Layer-4 sales loop: substrate for attributing X signal → contact → revenue.
3
+ --
4
+ -- The contact_events table was introduced in 001 with columns tuned for
5
+ -- free-text CRM notes (event_type, title, description, agent_id, task_id,
6
+ -- metadata, created_at). The sales-loop flow needs a structured-event
7
+ -- shape (kind, source, payload JSON, occurred_at) that can encode the
8
+ -- funnel: x:seen, x:reached, x:replied, x:qualified, dm:received,
9
+ -- demo:booked, plan:paid. Rather than rename or migrate data, we grow
10
+ -- the new columns alongside the legacy ones so both consumer styles
11
+ -- coexist. Legacy CRM tools keep writing event_type/metadata; the
12
+ -- sales-loop path writes kind/payload/occurred_at.
13
+ --
14
+ -- All statements are idempotent. init.ts splits on `-- @statement` and
15
+ -- swallows "duplicate column" errors so re-runs against a partially
16
+ -- applied DB are safe.
17
+ --
18
+ -- Adds:
19
+ -- - outreach_token + never_sync on contacts (privacy + attribution).
20
+ -- - kind/source/payload/occurred_at on contact_events (funnel shape).
21
+ -- - contact_id + source_event_id on revenue_entries (attribution join).
22
+
23
+ ALTER TABLE agent_workforce_contacts ADD COLUMN outreach_token TEXT;
24
+ -- @statement
25
+ ALTER TABLE agent_workforce_contacts ADD COLUMN never_sync INTEGER NOT NULL DEFAULT 0;
26
+ -- @statement
27
+ ALTER TABLE agent_workforce_contact_events ADD COLUMN kind TEXT;
28
+ -- @statement
29
+ ALTER TABLE agent_workforce_contact_events ADD COLUMN source TEXT;
30
+ -- @statement
31
+ ALTER TABLE agent_workforce_contact_events ADD COLUMN payload TEXT DEFAULT '{}';
32
+ -- @statement
33
+ ALTER TABLE agent_workforce_contact_events ADD COLUMN occurred_at TEXT;
34
+ -- @statement
35
+ CREATE INDEX IF NOT EXISTS idx_contact_events_workspace_kind
36
+ ON agent_workforce_contact_events(workspace_id, kind, occurred_at);
37
+ -- @statement
38
+ ALTER TABLE agent_workforce_revenue_entries ADD COLUMN contact_id TEXT REFERENCES agent_workforce_contacts(id);
39
+ -- @statement
40
+ ALTER TABLE agent_workforce_revenue_entries ADD COLUMN source_event_id TEXT REFERENCES agent_workforce_contact_events(id);
41
+ -- @statement
42
+ CREATE INDEX IF NOT EXISTS idx_revenue_contact ON agent_workforce_revenue_entries(contact_id);
@@ -0,0 +1,52 @@
1
+ -- 122-video-jobs.sql
2
+ -- Tracking table for deterministic video renders driven by the
3
+ -- video_generation skill (src/execution/skills/video_generation.ts).
4
+ --
5
+ -- Each job corresponds to one VideoSpec → MP4 pipeline invocation. The
6
+ -- spec_hash column lets us dedupe: if a prior job with the same spec
7
+ -- hash is already 'done', callers can short-circuit and reuse that MP4
8
+ -- instead of re-rendering.
9
+ --
10
+ -- Checkpoints live in a child table so a crashed daemon can resume from
11
+ -- the last completed stage without losing earlier work (rendered voice,
12
+ -- generated music, timing solver output).
13
+
14
+ CREATE TABLE IF NOT EXISTS video_jobs (
15
+ id TEXT PRIMARY KEY,
16
+ workspace_id TEXT NOT NULL,
17
+ spec_hash TEXT NOT NULL,
18
+ spec_path TEXT NOT NULL,
19
+ status TEXT NOT NULL CHECK (status IN (
20
+ 'pending','preparing','resolving','rendering','storing',
21
+ 'done','failed','canceled'
22
+ )),
23
+ progress REAL NOT NULL DEFAULT 0,
24
+ stage TEXT,
25
+ error TEXT,
26
+ output_path TEXT,
27
+ size_bytes INTEGER,
28
+ duration_frames INTEGER,
29
+ duration_ms INTEGER,
30
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
31
+ updated_at TEXT NOT NULL DEFAULT (datetime('now'))
32
+ );
33
+ -- @statement
34
+ CREATE INDEX IF NOT EXISTS idx_video_jobs_workspace_status
35
+ ON video_jobs(workspace_id, status);
36
+ -- @statement
37
+ CREATE INDEX IF NOT EXISTS idx_video_jobs_spec_hash
38
+ ON video_jobs(spec_hash);
39
+ -- @statement
40
+ CREATE INDEX IF NOT EXISTS idx_video_jobs_created
41
+ ON video_jobs(created_at DESC);
42
+ -- @statement
43
+ CREATE TABLE IF NOT EXISTS video_job_checkpoints (
44
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
45
+ job_id TEXT NOT NULL REFERENCES video_jobs(id) ON DELETE CASCADE,
46
+ stage TEXT NOT NULL,
47
+ payload TEXT NOT NULL DEFAULT '{}',
48
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
49
+ );
50
+ -- @statement
51
+ CREATE INDEX IF NOT EXISTS idx_vjc_job_stage
52
+ ON video_job_checkpoints(job_id, stage);
@@ -0,0 +1,68 @@
1
+ -- =====================================================================
2
+ -- Migration 123: insight distiller — novelty baselines + feedback ledger
3
+ --
4
+ -- Piece 1 of the "surprise-first self-observation" bundle. Raw findings
5
+ -- keep flowing into self_findings on the 5s reactive reschedule; this
6
+ -- migration adds the two tables that let the system tell "the 500th
7
+ -- identical repetition" apart from "an unusual thing just happened":
8
+ --
9
+ -- self_observation_baselines
10
+ -- One row per (experiment_id, subject) that accumulates a rolling
11
+ -- running mean + stddev over an optional numeric evidence field
12
+ -- (`tracked_field`) via Welford's algorithm. Also tracks first-seen
13
+ -- timestamp, sample count, last verdict, and consecutive fail
14
+ -- count — enough to answer "have we seen this before?" and "has
15
+ -- the verdict been stuck for a while?" without scanning the
16
+ -- ledger. Findings-store writes this row alongside every insert
17
+ -- and mixes the resulting novelty score into the finding's
18
+ -- evidence.__novelty so the distiller can rank by surprise.
19
+ --
20
+ -- self_insight_feedback
21
+ -- Operator / agent feedback ledger: accepted / rejected /
22
+ -- deferred / applied actions taken on a specific finding, keyed
23
+ -- by finding_id. Closes the loop so the strategist and
24
+ -- experiment-author can eventually learn which suggestions
25
+ -- actually landed well. Nothing writes here yet — the REST +
26
+ -- MCP surfaces for recording feedback come in a later piece;
27
+ -- this migration is the shape-only slice.
28
+ -- =====================================================================
29
+
30
+ -- @statement
31
+ CREATE TABLE IF NOT EXISTS self_observation_baselines (
32
+ experiment_id TEXT NOT NULL,
33
+ subject TEXT NOT NULL,
34
+ first_seen_at TEXT NOT NULL,
35
+ last_seen_at TEXT NOT NULL,
36
+ sample_count INTEGER NOT NULL DEFAULT 0,
37
+ tracked_field TEXT,
38
+ running_mean REAL,
39
+ running_m2 REAL,
40
+ last_value REAL,
41
+ last_verdict TEXT,
42
+ consecutive_fails INTEGER NOT NULL DEFAULT 0,
43
+ updated_at TEXT NOT NULL DEFAULT (datetime('now')),
44
+ PRIMARY KEY (experiment_id, subject)
45
+ );
46
+
47
+ -- @statement
48
+ CREATE INDEX IF NOT EXISTS idx_baselines_last_seen
49
+ ON self_observation_baselines(last_seen_at DESC);
50
+
51
+ -- @statement
52
+ CREATE INDEX IF NOT EXISTS idx_baselines_consecutive_fails
53
+ ON self_observation_baselines(consecutive_fails DESC)
54
+ WHERE consecutive_fails > 0;
55
+
56
+ -- @statement
57
+ CREATE TABLE IF NOT EXISTS self_insight_feedback (
58
+ id TEXT PRIMARY KEY,
59
+ finding_id TEXT NOT NULL,
60
+ action TEXT NOT NULL CHECK (action IN ('accepted','rejected','deferred','applied')),
61
+ actor TEXT NOT NULL,
62
+ rationale TEXT,
63
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
64
+ );
65
+
66
+ -- @statement
67
+ CREATE INDEX IF NOT EXISTS idx_insight_feedback_finding
68
+ ON self_insight_feedback(finding_id, created_at DESC);
@@ -0,0 +1,55 @@
1
+ -- 124-x-dm-messages.sql — store the result of XDmPollerScheduler ticks.
2
+ --
3
+ -- The poller calls listDmsViaBrowser hourly and writes the inbox
4
+ -- summaries here. Two tables: a current-state thread row (one per
5
+ -- conversation_pair) and an append-only observation log keyed by the
6
+ -- preview text's hash so we don't re-insert when nothing changed.
7
+ --
8
+ -- Why two tables: thread row supports "show me my inbox" queries
9
+ -- without scanning history; observations supports "what changed when"
10
+ -- queries used by future findings/triage. Both are write-light — DMs
11
+ -- are low-volume.
12
+ --
13
+ -- Dedup key on observations is (workspace_id, conversation_pair,
14
+ -- preview_hash). The poller computes preview_hash = sha1 of the
15
+ -- preview text, so identical previews observed across ticks collapse
16
+ -- to one row. New text from the same correspondent inserts a new
17
+ -- observation and bumps the thread's last_seen_at + last_preview.
18
+ --
19
+ -- No FK to agent_workforce_contacts: contact linking layers on later
20
+ -- (the operator must approve the link via the approval-queue path).
21
+ -- Storing handle / pair without a FK keeps the ingest tick cheap and
22
+ -- doesn't gate it on CRM state.
23
+
24
+ CREATE TABLE IF NOT EXISTS x_dm_threads (
25
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
26
+ workspace_id TEXT NOT NULL,
27
+ conversation_pair TEXT NOT NULL,
28
+ primary_name TEXT,
29
+ last_preview TEXT,
30
+ last_preview_hash TEXT,
31
+ has_unread INTEGER NOT NULL DEFAULT 0,
32
+ observation_count INTEGER NOT NULL DEFAULT 0,
33
+ first_seen_at TEXT NOT NULL DEFAULT (datetime('now')),
34
+ last_seen_at TEXT NOT NULL DEFAULT (datetime('now')),
35
+ raw_meta TEXT,
36
+ UNIQUE(workspace_id, conversation_pair)
37
+ );
38
+
39
+ CREATE INDEX IF NOT EXISTS idx_x_dm_threads_workspace
40
+ ON x_dm_threads(workspace_id, last_seen_at DESC);
41
+
42
+ CREATE TABLE IF NOT EXISTS x_dm_observations (
43
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
44
+ workspace_id TEXT NOT NULL,
45
+ conversation_pair TEXT NOT NULL,
46
+ primary_name TEXT,
47
+ preview_text TEXT NOT NULL,
48
+ preview_hash TEXT NOT NULL,
49
+ has_unread INTEGER NOT NULL DEFAULT 0,
50
+ observed_at TEXT NOT NULL DEFAULT (datetime('now')),
51
+ UNIQUE(workspace_id, conversation_pair, preview_hash)
52
+ );
53
+
54
+ CREATE INDEX IF NOT EXISTS idx_x_dm_obs_pair
55
+ ON x_dm_observations(workspace_id, conversation_pair, observed_at DESC);
@@ -0,0 +1,40 @@
1
+ -- 125-x-dm-messages-bodies.sql — add per-message storage to the DM ingest.
2
+ --
3
+ -- Migration 124 introduced thread + observation tables that captured
4
+ -- only inbox-level previews (one row per (pair, preview_hash)). That
5
+ -- left the actual message bodies invisible — we stored the gloss the
6
+ -- inbox shows, not the conversation. Live DOM probe (2026-04-16,
7
+ -- scripts/probe-x-dm-dom.mjs) confirmed each message has a stable
8
+ -- per-conversation UUID exposed via `data-testid="message-<uuid>"`,
9
+ -- which is the right dedup key for body-level ingest.
10
+ --
11
+ -- Direction comes from the bubble's bg-primary (outbound) vs
12
+ -- bg-gray-50 (inbound) class — X never exposes a sender id in the DM
13
+ -- DOM, so this is the most reliable signal short of authenticated API
14
+ -- access.
15
+ --
16
+ -- Note that we still don't store an absolute timestamp: X inlines the
17
+ -- "x minutes ago" / "6:49 AM" tooltip into the message text and never
18
+ -- exposes a machine-readable datetime here. observed_at (when the
19
+ -- poller saw it) is the closest available stamp.
20
+
21
+ CREATE TABLE IF NOT EXISTS x_dm_messages (
22
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
23
+ workspace_id TEXT NOT NULL,
24
+ conversation_pair TEXT NOT NULL,
25
+ message_id TEXT NOT NULL,
26
+ direction TEXT NOT NULL CHECK (direction IN ('outbound', 'inbound', 'unknown')),
27
+ text TEXT,
28
+ is_media INTEGER NOT NULL DEFAULT 0,
29
+ observed_at TEXT NOT NULL DEFAULT (datetime('now')),
30
+ UNIQUE(workspace_id, message_id)
31
+ );
32
+
33
+ CREATE INDEX IF NOT EXISTS idx_x_dm_messages_pair
34
+ ON x_dm_messages(workspace_id, conversation_pair, observed_at DESC);
35
+
36
+ -- Bring the threads table forward with one denormalized field so the
37
+ -- inbox query doesn't need a join to show the latest message body.
38
+ ALTER TABLE x_dm_threads ADD COLUMN last_message_id TEXT;
39
+ ALTER TABLE x_dm_threads ADD COLUMN last_message_text TEXT;
40
+ ALTER TABLE x_dm_threads ADD COLUMN last_message_direction TEXT;
@@ -0,0 +1,52 @@
1
+ -- 126-x-dm-signals.sql — per-message "worth a second look" signals.
2
+ --
3
+ -- XDmPollerScheduler writes to this table whenever a newly-ingested
4
+ -- inbound DM matches a trigger phrase. Goal: surface conversations
5
+ -- the operator should read without polluting self_findings (which is
6
+ -- experiment-owned and novelty-scored, unsuitable for high-volume
7
+ -- ingest bread crumbs).
8
+ --
9
+ -- Design choices documented here because they're load-bearing:
10
+ --
11
+ -- 1. Per-message dedup (UNIQUE workspace_id, message_id, signal_type).
12
+ -- One signal per (msg, type). Re-reading a thread on every tick
13
+ -- must not duplicate rows — the message UUID from X's DOM is the
14
+ -- stable dedup handle established in migration 125.
15
+ --
16
+ -- 2. signal_type enum. Today only `trigger_phrase` is written. Future
17
+ -- types (`unknown_correspondent`, `contact_link_candidate`) are
18
+ -- reserved so the reader side doesn't need to widen its filter
19
+ -- when new signal kinds land.
20
+ --
21
+ -- 3. No FK to x_dm_messages. The message row may not exist yet at the
22
+ -- instant we insert the signal (we're inside the same tick), and
23
+ -- adding a FK would force us to split the insert into two steps.
24
+ -- The UNIQUE constraint on message_id is the reliability guarantee
25
+ -- we actually need.
26
+ --
27
+ -- 4. primary_name + text denormalized. Reader UIs want "who sent
28
+ -- this, what did it say" without a 3-table join; cheap to carry
29
+ -- the 100-byte snapshot alongside the reference.
30
+
31
+ CREATE TABLE IF NOT EXISTS x_dm_signals (
32
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
33
+ workspace_id TEXT NOT NULL,
34
+ conversation_pair TEXT NOT NULL,
35
+ message_id TEXT NOT NULL,
36
+ signal_type TEXT NOT NULL CHECK (signal_type IN (
37
+ 'trigger_phrase',
38
+ 'unknown_correspondent',
39
+ 'contact_link_candidate'
40
+ )),
41
+ trigger_phrase TEXT,
42
+ primary_name TEXT,
43
+ text TEXT,
44
+ observed_at TEXT NOT NULL DEFAULT (datetime('now')),
45
+ UNIQUE(workspace_id, message_id, signal_type)
46
+ );
47
+
48
+ CREATE INDEX IF NOT EXISTS idx_x_dm_signals_workspace_observed
49
+ ON x_dm_signals(workspace_id, observed_at DESC);
50
+
51
+ CREATE INDEX IF NOT EXISTS idx_x_dm_signals_pair
52
+ ON x_dm_signals(workspace_id, conversation_pair, observed_at DESC);
@@ -0,0 +1,36 @@
1
+ -- 127-x-dm-contact-linking.sql — wire DM threads + signals to CRM contacts.
2
+ --
3
+ -- Goal: let the DM poller stamp a contact_id on a thread when the
4
+ -- correspondent matches an existing row in agent_workforce_contacts.
5
+ -- No auto-create; the poller emits an `unknown_correspondent` signal
6
+ -- (reserved by migration 126) when an inbound arrives with no match,
7
+ -- and the operator creates the contact via ohwow_create_contact.
8
+ --
9
+ -- Key design choice: counterparty IS numeric X user ID, not handle.
10
+ -- Live DOM probe (scripts/probe-x-dm-dom.mjs, 2026-04-16) confirmed
11
+ -- X's DM thread header shows ONLY the display name in 2026 — the
12
+ -- @handle is not reachable from the static DOM. The conversation_pair
13
+ -- already carries both user IDs as `<id1>:<id2>`, so the correspondent's
14
+ -- id is recoverable by simple string-split once the daemon knows the
15
+ -- operator's own X user id (stored in runtime_config_overrides at key
16
+ -- `x.self_user_id`). Contacts opt in by setting
17
+ -- `custom_fields.x_user_id` on the row they want the poller to stamp.
18
+ --
19
+ -- Handles can be renamed; numeric user IDs are stable for account
20
+ -- lifetime. Keying on id is also faster (no DOM extraction per thread).
21
+ --
22
+ -- Rollback is a no-op: both columns are nullable. If we revert the
23
+ -- linking logic the columns just stay empty.
24
+
25
+ -- @statement
26
+ ALTER TABLE x_dm_threads ADD COLUMN counterparty_user_id TEXT;
27
+
28
+ -- @statement
29
+ ALTER TABLE x_dm_threads ADD COLUMN contact_id TEXT;
30
+
31
+ -- @statement
32
+ ALTER TABLE x_dm_signals ADD COLUMN contact_id TEXT;
33
+
34
+ -- @statement
35
+ CREATE INDEX IF NOT EXISTS idx_x_dm_threads_contact
36
+ ON x_dm_threads(workspace_id, contact_id);
@@ -0,0 +1,59 @@
1
+ -- 128-attribution-view.sql
2
+ -- Funnel Surgeon Phase 1: ground-truth attribution rollup.
3
+ --
4
+ -- Migration 121 added outreach_token + never_sync to contacts and the
5
+ -- funnel-shaped kind/payload/occurred_at columns to contact_events, and
6
+ -- plumbed contact_id + source_event_id onto revenue_entries. That gave
7
+ -- us everything needed to answer "which signal source produced this
8
+ -- dollar?" — except for a single place to ask the question.
9
+ --
10
+ -- This view joins the three pieces into one row per contact:
11
+ -- - source/bucket dimensions from the contact's custom_fields (how
12
+ -- the signal entered the funnel, e.g. author-ledger / market_signal)
13
+ -- - per-step timestamps (first_seen, qualified, reached, demo, trial,
14
+ -- paid) as the MIN occurred_at of each kind
15
+ -- - lifetime_revenue_cents as SUM of revenue_entries joined by
16
+ -- contact_id
17
+ --
18
+ -- Advisory-only: downstream experiments read this view to surface
19
+ -- conversion rates and drop-off steps. It is NOT a source of truth for
20
+ -- billing — revenue_entries itself owns that.
21
+ --
22
+ -- Views are not tables and do not appear in sqlite_master type='table',
23
+ -- so no migration-schema-probe registry row is needed for this file.
24
+
25
+ CREATE VIEW IF NOT EXISTS agent_workforce_attribution_rollup AS
26
+ SELECT
27
+ c.id AS contact_id,
28
+ c.workspace_id AS workspace_id,
29
+ c.contact_type AS contact_type,
30
+ c.status AS status,
31
+ c.never_sync AS never_sync,
32
+ json_extract(c.custom_fields, '$.x_source') AS source,
33
+ json_extract(c.custom_fields, '$.x_bucket') AS bucket,
34
+ json_extract(c.custom_fields, '$.x_intent') AS intent,
35
+ (SELECT MIN(COALESCE(e.occurred_at, e.created_at))
36
+ FROM agent_workforce_contact_events e
37
+ WHERE e.contact_id = c.id) AS first_seen_ts,
38
+ (SELECT MIN(COALESCE(e.occurred_at, e.created_at))
39
+ FROM agent_workforce_contact_events e
40
+ WHERE e.contact_id = c.id AND e.kind = 'x:qualified') AS qualified_ts,
41
+ (SELECT MIN(COALESCE(e.occurred_at, e.created_at))
42
+ FROM agent_workforce_contact_events e
43
+ WHERE e.contact_id = c.id AND e.kind = 'x:reached') AS reached_ts,
44
+ (SELECT MIN(COALESCE(e.occurred_at, e.created_at))
45
+ FROM agent_workforce_contact_events e
46
+ WHERE e.contact_id = c.id AND e.kind = 'demo:booked') AS demo_ts,
47
+ (SELECT MIN(COALESCE(e.occurred_at, e.created_at))
48
+ FROM agent_workforce_contact_events e
49
+ WHERE e.contact_id = c.id AND e.kind = 'trial:started') AS trial_ts,
50
+ (SELECT MIN(COALESCE(e.occurred_at, e.created_at))
51
+ FROM agent_workforce_contact_events e
52
+ WHERE e.contact_id = c.id AND e.kind = 'plan:paid') AS paid_ts,
53
+ COALESCE(
54
+ (SELECT SUM(r.amount_cents)
55
+ FROM agent_workforce_revenue_entries r
56
+ WHERE r.contact_id = c.id),
57
+ 0
58
+ ) AS lifetime_revenue_cents
59
+ FROM agent_workforce_contacts c;