@ainyc/canonry 3.2.4 → 3.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,6 +38,7 @@ __export(schema_exports, {
38
38
  healthSnapshots: () => healthSnapshots,
39
39
  insights: () => insights,
40
40
  keywords: () => keywords,
41
+ migrationsTable: () => migrationsTable,
41
42
  notifications: () => notifications,
42
43
  projects: () => projects,
43
44
  querySnapshots: () => querySnapshots,
@@ -513,6 +514,11 @@ var agentMemory = sqliteTable("agent_memory", {
513
514
  uniqueIndex("uniq_agent_memory_project_key").on(table.projectId, table.key),
514
515
  index("idx_agent_memory_project_updated").on(table.projectId, table.updatedAt)
515
516
  ]);
517
+ var migrationsTable = sqliteTable("_migrations", {
518
+ version: integer("version").primaryKey(),
519
+ name: text("name").notNull(),
520
+ appliedAt: text("applied_at").notNull()
521
+ });
516
522
 
517
523
  // ../db/src/client.ts
518
524
  function createClient(databasePath) {
@@ -664,440 +670,667 @@ CREATE INDEX IF NOT EXISTS idx_api_keys_prefix ON api_keys(key_prefix);
664
670
  CREATE INDEX IF NOT EXISTS idx_usage_scope_period ON usage_counters(scope, period);
665
671
  CREATE UNIQUE INDEX IF NOT EXISTS idx_schedules_project ON schedules(project_id);
666
672
  CREATE INDEX IF NOT EXISTS idx_notifications_project ON notifications(project_id);
673
+
674
+ -- Migration tracking: records which version has been applied.
675
+ -- On boot only versions > max applied version are run.
676
+ CREATE TABLE IF NOT EXISTS _migrations (
677
+ version INTEGER PRIMARY KEY,
678
+ name TEXT NOT NULL,
679
+ applied_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
680
+ );
667
681
  `;
668
- var MIGRATIONS = [
669
- // v2: Add providers column to projects for multi-provider support
670
- `ALTER TABLE projects ADD COLUMN providers TEXT NOT NULL DEFAULT '[]'`,
671
- // v3: Add webhook_secret column to notifications for HMAC signing
672
- `ALTER TABLE notifications ADD COLUMN webhook_secret TEXT`,
673
- // v4: Add owned_domains column to projects for multi-domain citation matching
674
- `ALTER TABLE projects ADD COLUMN owned_domains TEXT NOT NULL DEFAULT '[]'`,
675
- // v5: Add model column to query_snapshots for per-model scoring
676
- `ALTER TABLE query_snapshots ADD COLUMN model TEXT`,
677
- // v5b: Backfill model from rawResponse JSON for existing snapshots
678
- `UPDATE query_snapshots SET model = json_extract(raw_response, '$.model') WHERE model IS NULL AND raw_response IS NOT NULL AND json_extract(raw_response, '$.model') IS NOT NULL`,
679
- // v6: Google Search Console integration — google_connections table (domain-scoped)
680
- // WARNING: access_token, refresh_token are authentication material; consider storing in config.yaml per CLAUDE.md
681
- `CREATE TABLE IF NOT EXISTS google_connections (
682
- id TEXT PRIMARY KEY,
683
- domain TEXT NOT NULL,
684
- connection_type TEXT NOT NULL,
685
- property_id TEXT,
686
- access_token TEXT,
687
- refresh_token TEXT,
688
- token_expires_at TEXT,
689
- scopes TEXT NOT NULL DEFAULT '[]',
690
- created_at TEXT NOT NULL,
691
- updated_at TEXT NOT NULL
692
- )`,
693
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_google_conn_domain_type ON google_connections(domain, connection_type)`,
694
- // v6: Google Search Console integration — gsc_search_data table
695
- `CREATE TABLE IF NOT EXISTS gsc_search_data (
696
- id TEXT PRIMARY KEY,
697
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
698
- sync_run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
699
- date TEXT NOT NULL,
700
- query TEXT NOT NULL,
701
- page TEXT NOT NULL,
702
- country TEXT,
703
- device TEXT,
704
- clicks INTEGER NOT NULL DEFAULT 0,
705
- impressions INTEGER NOT NULL DEFAULT 0,
706
- ctr TEXT NOT NULL DEFAULT '0',
707
- position TEXT NOT NULL DEFAULT '0',
708
- created_at TEXT NOT NULL
709
- )`,
710
- `CREATE INDEX IF NOT EXISTS idx_gsc_search_project_date ON gsc_search_data(project_id, date)`,
711
- `CREATE INDEX IF NOT EXISTS idx_gsc_search_query ON gsc_search_data(query)`,
712
- `CREATE INDEX IF NOT EXISTS idx_gsc_search_run ON gsc_search_data(sync_run_id)`,
713
- // v6: Google Search Console integration — gsc_url_inspections table
714
- `CREATE TABLE IF NOT EXISTS gsc_url_inspections (
715
- id TEXT PRIMARY KEY,
716
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
717
- sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE,
718
- url TEXT NOT NULL,
719
- indexing_state TEXT,
720
- verdict TEXT,
721
- coverage_state TEXT,
722
- page_fetch_state TEXT,
723
- robots_txt_state TEXT,
724
- crawl_time TEXT,
725
- last_crawl_result TEXT,
726
- is_mobile_friendly INTEGER,
727
- rich_results TEXT NOT NULL DEFAULT '[]',
728
- referring_urls TEXT NOT NULL DEFAULT '[]',
729
- inspected_at TEXT NOT NULL,
730
- created_at TEXT NOT NULL
731
- )`,
732
- `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_project_url ON gsc_url_inspections(project_id, url)`,
733
- `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_run ON gsc_url_inspections(sync_run_id)`,
734
- `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_url_time ON gsc_url_inspections(url, inspected_at)`,
735
- // v7: GSC coverage snapshots for historical tracking
736
- `CREATE TABLE IF NOT EXISTS gsc_coverage_snapshots (
737
- id TEXT PRIMARY KEY,
738
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
739
- sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE,
740
- date TEXT NOT NULL,
741
- indexed INTEGER NOT NULL DEFAULT 0,
742
- not_indexed INTEGER NOT NULL DEFAULT 0,
743
- reason_breakdown TEXT NOT NULL DEFAULT '{}',
744
- created_at TEXT NOT NULL
745
- )`,
746
- `CREATE INDEX IF NOT EXISTS idx_gsc_coverage_snap_project_date ON gsc_coverage_snapshots(project_id, date)`,
747
- `CREATE INDEX IF NOT EXISTS idx_gsc_coverage_snap_run ON gsc_coverage_snapshots(sync_run_id)`,
748
- // v8: Location-aware sweeps — project locations + snapshot location tag
749
- `ALTER TABLE projects ADD COLUMN locations TEXT NOT NULL DEFAULT '[]'`,
750
- `ALTER TABLE projects ADD COLUMN default_location TEXT`,
751
- `ALTER TABLE query_snapshots ADD COLUMN location TEXT`,
752
- // v9: Add location column to runs for per-location run tracking
753
- `ALTER TABLE runs ADD COLUMN location TEXT`,
754
- // v10: Add sitemapUrl to google_connections for persistent sitemap storage
755
- `ALTER TABLE google_connections ADD COLUMN sitemap_url TEXT`,
756
- // v11: CDP browser provider screenshot path for captured evidence
757
- `ALTER TABLE query_snapshots ADD COLUMN screenshot_path TEXT`,
758
- // v12: Bing Webmaster Tools — bing_connections table
759
- `CREATE TABLE IF NOT EXISTS bing_connections (
760
- id TEXT PRIMARY KEY,
761
- domain TEXT NOT NULL,
762
- site_url TEXT,
763
- created_at TEXT NOT NULL,
764
- updated_at TEXT NOT NULL
765
- )`,
766
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_conn_domain ON bing_connections(domain)`,
767
- // v12: Bing Webmaster Tools — bing_url_inspections table
768
- `CREATE TABLE IF NOT EXISTS bing_url_inspections (
769
- id TEXT PRIMARY KEY,
770
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
771
- url TEXT NOT NULL,
772
- http_code INTEGER,
773
- in_index INTEGER,
774
- last_crawled_date TEXT,
775
- in_index_date TEXT,
776
- inspected_at TEXT NOT NULL,
777
- created_at TEXT NOT NULL
778
- )`,
779
- `CREATE INDEX IF NOT EXISTS idx_bing_inspect_project_url ON bing_url_inspections(project_id, url)`,
780
- `CREATE INDEX IF NOT EXISTS idx_bing_inspect_url_time ON bing_url_inspections(url, inspected_at)`,
781
- // v12: Bing Webmaster Tools — bing_keyword_stats table
782
- `CREATE TABLE IF NOT EXISTS bing_keyword_stats (
783
- id TEXT PRIMARY KEY,
784
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
785
- query TEXT NOT NULL,
786
- impressions INTEGER NOT NULL DEFAULT 0,
787
- clicks INTEGER NOT NULL DEFAULT 0,
788
- ctr TEXT NOT NULL DEFAULT '0',
789
- average_position TEXT NOT NULL DEFAULT '0',
790
- synced_at TEXT NOT NULL,
791
- created_at TEXT NOT NULL
792
- )`,
793
- `CREATE INDEX IF NOT EXISTS idx_bing_keyword_project ON bing_keyword_stats(project_id)`,
794
- `CREATE INDEX IF NOT EXISTS idx_bing_keyword_query ON bing_keyword_stats(query)`,
795
- // v13: Google Analytics 4 — ga_connections table (service account auth)
796
- // WARNING: private_key is authentication material; consider storing in config.yaml per CLAUDE.md
797
- `CREATE TABLE IF NOT EXISTS ga_connections (
798
- id TEXT PRIMARY KEY,
799
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
800
- property_id TEXT NOT NULL,
801
- client_email TEXT NOT NULL,
802
- private_key TEXT NOT NULL,
803
- created_at TEXT NOT NULL,
804
- updated_at TEXT NOT NULL
805
- )`,
806
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_conn_project ON ga_connections(project_id)`,
807
- // v13: Google Analytics 4 — ga_traffic_snapshots table
808
- `CREATE TABLE IF NOT EXISTS ga_traffic_snapshots (
809
- id TEXT PRIMARY KEY,
810
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
811
- date TEXT NOT NULL,
812
- landing_page TEXT NOT NULL,
813
- sessions INTEGER NOT NULL DEFAULT 0,
814
- organic_sessions INTEGER NOT NULL DEFAULT 0,
815
- users INTEGER NOT NULL DEFAULT 0,
816
- synced_at TEXT NOT NULL
817
- )`,
818
- `CREATE INDEX IF NOT EXISTS idx_ga_traffic_project_date ON ga_traffic_snapshots(project_id, date)`,
819
- `CREATE INDEX IF NOT EXISTS idx_ga_traffic_page ON ga_traffic_snapshots(landing_page)`,
820
- // v14: GA4 aggregate summaries stores true unique user count per sync period
821
- `CREATE TABLE IF NOT EXISTS ga_traffic_summaries (
822
- id TEXT PRIMARY KEY,
823
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
824
- period_start TEXT NOT NULL,
825
- period_end TEXT NOT NULL,
826
- total_sessions INTEGER NOT NULL DEFAULT 0,
827
- total_organic_sessions INTEGER NOT NULL DEFAULT 0,
828
- total_users INTEGER NOT NULL DEFAULT 0,
829
- synced_at TEXT NOT NULL
830
- )`,
831
- `CREATE INDEX IF NOT EXISTS idx_ga_summary_project ON ga_traffic_summaries(project_id)`,
832
- // v15: Bing URL inspections — document_size, anchor_count, discovery_date columns
833
- `ALTER TABLE bing_url_inspections ADD COLUMN document_size INTEGER`,
834
- `ALTER TABLE bing_url_inspections ADD COLUMN anchor_count INTEGER`,
835
- `ALTER TABLE bing_url_inspections ADD COLUMN discovery_date TEXT`,
836
- // v16: Recommended competitor names extracted from run answers
837
- `ALTER TABLE query_snapshots ADD COLUMN recommended_competitors TEXT NOT NULL DEFAULT '[]'`,
838
- // v17: GA4 AI referral tracking ga_ai_referrals table
839
- `CREATE TABLE IF NOT EXISTS ga_ai_referrals (
840
- id TEXT PRIMARY KEY,
841
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
842
- date TEXT NOT NULL,
843
- source TEXT NOT NULL,
844
- medium TEXT NOT NULL,
845
- sessions INTEGER NOT NULL DEFAULT 0,
846
- users INTEGER NOT NULL DEFAULT 0,
847
- synced_at TEXT NOT NULL
848
- )`,
849
- `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_project_date ON ga_ai_referrals(project_id, date)`,
850
- `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_source ON ga_ai_referrals(source)`,
851
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_ai_ref_unique ON ga_ai_referrals(project_id, date, source, medium)`,
852
- // v18: Answer-level visibility derived from answer text
853
- `ALTER TABLE query_snapshots ADD COLUMN answer_mentioned INTEGER`,
854
- // v19: Add named unique indexes and missing columns from early tables
855
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_keywords_project_keyword ON keywords(project_id, keyword)`,
856
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_competitors_project_domain ON competitors(project_id, domain)`,
857
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_schedules_project ON schedules(project_id)`,
858
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_usage_scope_period_metric ON usage_counters(scope, period, metric)`,
859
- `ALTER TABLE projects ADD COLUMN config_source TEXT NOT NULL DEFAULT 'cli'`,
860
- `ALTER TABLE projects ADD COLUMN config_revision INTEGER NOT NULL DEFAULT 1`,
861
- // v20: Track which GA4 dimension produced each AI referral row
862
- // Values: 'session' (sessionSource), 'first_user' (firstUserSource), 'manual_utm' (manualSource/utm_source)
863
- `ALTER TABLE ga_ai_referrals ADD COLUMN source_dimension TEXT NOT NULL DEFAULT 'session'`,
864
- // Replace old unique index with one that includes source_dimension
865
- `DROP INDEX IF EXISTS idx_ga_ai_ref_unique`,
866
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_ai_ref_unique_v2 ON ga_ai_referrals(project_id, date, source, medium, source_dimension)`,
867
- // v21: Add missing indexes for query_snapshots filtering
868
- `CREATE INDEX IF NOT EXISTS idx_snapshots_citation_state ON query_snapshots(citation_state)`,
869
- `CREATE INDEX IF NOT EXISTS idx_snapshots_provider_model ON query_snapshots(provider, model)`,
870
- `CREATE INDEX IF NOT EXISTS idx_snapshots_location ON query_snapshots(location)`,
871
- // v22: Intelligence — insights table for regression/gain/opportunity tracking
872
- `CREATE TABLE IF NOT EXISTS insights (
873
- id TEXT PRIMARY KEY,
874
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
875
- type TEXT NOT NULL,
876
- severity TEXT NOT NULL,
877
- title TEXT NOT NULL,
878
- keyword TEXT NOT NULL,
879
- provider TEXT NOT NULL,
880
- recommendation TEXT,
881
- cause TEXT,
882
- dismissed INTEGER NOT NULL DEFAULT 0,
883
- created_at TEXT NOT NULL
884
- )`,
885
- `CREATE INDEX IF NOT EXISTS idx_insights_project ON insights(project_id)`,
886
- `CREATE INDEX IF NOT EXISTS idx_insights_created ON insights(created_at)`,
887
- `CREATE INDEX IF NOT EXISTS idx_insights_keyword_provider ON insights(keyword, provider)`,
888
- // v23: Intelligence health_snapshots table for citation health over time
889
- `CREATE TABLE IF NOT EXISTS health_snapshots (
890
- id TEXT PRIMARY KEY,
891
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
892
- overall_cited_rate TEXT NOT NULL,
893
- total_pairs INTEGER NOT NULL,
894
- cited_pairs INTEGER NOT NULL,
895
- provider_breakdown TEXT NOT NULL DEFAULT '{}',
896
- created_at TEXT NOT NULL
897
- )`,
898
- `CREATE INDEX IF NOT EXISTS idx_health_snapshots_project ON health_snapshots(project_id)`,
899
- `CREATE INDEX IF NOT EXISTS idx_health_snapshots_created ON health_snapshots(created_at)`,
900
- // v24: Intelligence — add run_id to insights and health_snapshots for per-run correlation and idempotency
901
- `ALTER TABLE insights ADD COLUMN run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
902
- `CREATE INDEX IF NOT EXISTS idx_insights_run ON insights(run_id)`,
903
- `ALTER TABLE health_snapshots ADD COLUMN run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
904
- `CREATE INDEX IF NOT EXISTS idx_health_snapshots_run ON health_snapshots(run_id)`,
905
- // v25: Social media referral tracking — ga_social_referrals table
906
- // Uses GA4's native sessionDefaultChannelGroup for social classification
907
- `CREATE TABLE IF NOT EXISTS ga_social_referrals (
908
- id TEXT PRIMARY KEY,
909
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
910
- date TEXT NOT NULL,
911
- source TEXT NOT NULL,
912
- medium TEXT NOT NULL,
913
- channel_group TEXT NOT NULL DEFAULT 'Organic Social',
914
- sessions INTEGER NOT NULL DEFAULT 0,
915
- users INTEGER NOT NULL DEFAULT 0,
916
- synced_at TEXT NOT NULL
917
- )`,
918
- `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_project_date ON ga_social_referrals(project_id, date)`,
919
- `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_source ON ga_social_referrals(source)`,
920
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_social_ref_unique ON ga_social_referrals(project_id, date, source, medium, channel_group)`,
921
- // v26: Bing coverage snapshots for historical tracking (mirrors gsc_coverage_snapshots)
922
- `CREATE TABLE IF NOT EXISTS bing_coverage_snapshots (
923
- id TEXT PRIMARY KEY,
924
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
925
- date TEXT NOT NULL,
926
- indexed INTEGER NOT NULL DEFAULT 0,
927
- not_indexed INTEGER NOT NULL DEFAULT 0,
928
- unknown INTEGER NOT NULL DEFAULT 0,
929
- created_at TEXT NOT NULL
930
- )`,
931
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_coverage_snap_project_date ON bing_coverage_snapshots(project_id, date)`,
932
- // v27: Credential columns removed from Drizzle schema — credentials now live in config.yaml.
933
- // Physical columns (access_token, refresh_token, token_expires_at on google_connections;
934
- // private_key on ga_connections) intentionally retained in DB for one-time migration in server.ts.
935
- // v28: Add sync_run_id to bing_url_inspections for tracking sync correlation
936
- `ALTER TABLE bing_url_inspections ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
937
- `CREATE INDEX IF NOT EXISTS idx_bing_inspect_run ON bing_url_inspections(sync_run_id)`,
938
- // v29: Add sync_run_id to ga_traffic_snapshots for tracking sync correlation
939
- `ALTER TABLE ga_traffic_snapshots ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
940
- `CREATE INDEX IF NOT EXISTS idx_ga_traffic_run ON ga_traffic_snapshots(sync_run_id)`,
941
- // v30: Add sync_run_id to ga_ai_referrals for tracking sync correlation
942
- `ALTER TABLE ga_ai_referrals ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
943
- `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_run ON ga_ai_referrals(sync_run_id)`,
944
- // v31: Add sync_run_id to ga_social_referrals for tracking sync correlation
945
- `ALTER TABLE ga_social_referrals ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
946
- `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_run ON ga_social_referrals(sync_run_id)`,
947
- // v32: Add sync_run_id to ga_traffic_summaries for tracking sync correlation
948
- `ALTER TABLE ga_traffic_summaries ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
949
- `CREATE INDEX IF NOT EXISTS idx_ga_summary_run ON ga_traffic_summaries(sync_run_id)`,
950
- // v33: Add sync_run_id to bing_coverage_snapshots for tracking sync correlation
951
- `ALTER TABLE bing_coverage_snapshots ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
952
- `CREATE INDEX IF NOT EXISTS idx_bing_coverage_snap_run ON bing_coverage_snapshots(sync_run_id)`,
953
- // v34: Rename unique index for bing_coverage_snapshots to follow convention
954
- `DROP INDEX IF EXISTS idx_bing_coverage_snap_project_date`,
955
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_coverage_snap_project_date_unique ON bing_coverage_snapshots(project_id, date)`,
956
- // v35: Add missing index for query_snapshots createdAt for time-series filtering
957
- `CREATE INDEX IF NOT EXISTS idx_snapshots_created_at ON query_snapshots(created_at)`,
958
- // v36: Transaction handling and SQL injection review: verified all strings use SQLite ? binding via Drizzle.
959
- // No changes required for parameterization.
960
- // v37: The legacy credential columns (private_key on ga_connections; access_token,
961
- // refresh_token, token_expires_at on google_connections) are removed by the
962
- // extractLegacyCredentials / dropLegacyCredentialColumns pair below. Callers
963
- // read the rows, persist them to config.yaml, and only then drop the columns
964
- // so a failed config write doesn't permanently lose credentials. Keeping the
965
- // DROPs as raw SQL here would race with that read.
966
- // v38: Aero session registry one rolling session per project.
967
- `CREATE TABLE IF NOT EXISTS agent_sessions (
968
- id TEXT PRIMARY KEY,
969
- project_id TEXT NOT NULL UNIQUE REFERENCES projects(id) ON DELETE CASCADE,
970
- system_prompt TEXT NOT NULL,
971
- model_provider TEXT NOT NULL,
972
- model_id TEXT NOT NULL,
973
- messages TEXT NOT NULL DEFAULT '[]',
974
- follow_up_queue TEXT NOT NULL DEFAULT '[]',
975
- created_at TEXT NOT NULL,
976
- updated_at TEXT NOT NULL
977
- )`,
978
- `CREATE INDEX IF NOT EXISTS idx_agent_sessions_project ON agent_sessions(project_id)`,
979
- `CREATE INDEX IF NOT EXISTS idx_agent_sessions_updated ON agent_sessions(updated_at)`,
980
- // v39: Align Aero provider IDs with sweep naming — anthropic→claude, google→gemini.
981
- // Old rows predating the rename would fail to rehydrate because the canonical
982
- // registry no longer recognizes 'anthropic'/'google'. Safe to re-run: the
983
- // UPDATE is a no-op once the rename has been applied.
984
- `UPDATE agent_sessions SET model_provider = 'claude' WHERE model_provider = 'anthropic'`,
985
- `UPDATE agent_sessions SET model_provider = 'gemini' WHERE model_provider = 'google'`,
986
- // v40: Aero durable memory — project-scoped notes + compaction summaries.
987
- `CREATE TABLE IF NOT EXISTS agent_memory (
988
- id TEXT PRIMARY KEY,
989
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
990
- key TEXT NOT NULL,
991
- value TEXT NOT NULL,
992
- source TEXT NOT NULL,
993
- created_at TEXT NOT NULL,
994
- updated_at TEXT NOT NULL
995
- )`,
996
- `CREATE UNIQUE INDEX IF NOT EXISTS uniq_agent_memory_project_key
997
- ON agent_memory(project_id, key)`,
998
- `CREATE INDEX IF NOT EXISTS idx_agent_memory_project_updated
999
- ON agent_memory(project_id, updated_at)`,
1000
- // v41: Common Crawl backlinks — workspace-level release syncs plus per-project
1001
- // backlink_domains and backlink_summaries populated in one DuckDB pass.
1002
- `CREATE TABLE IF NOT EXISTS cc_release_syncs (
1003
- id TEXT PRIMARY KEY,
1004
- release TEXT NOT NULL UNIQUE,
1005
- status TEXT NOT NULL,
1006
- phase_detail TEXT,
1007
- vertex_path TEXT,
1008
- edges_path TEXT,
1009
- vertex_sha256 TEXT,
1010
- edges_sha256 TEXT,
1011
- vertex_bytes INTEGER,
1012
- edges_bytes INTEGER,
1013
- projects_processed INTEGER,
1014
- domains_discovered INTEGER,
1015
- download_started_at TEXT,
1016
- download_finished_at TEXT,
1017
- query_started_at TEXT,
1018
- query_finished_at TEXT,
1019
- error TEXT,
1020
- created_at TEXT NOT NULL,
1021
- updated_at TEXT NOT NULL
1022
- )`,
1023
- `CREATE INDEX IF NOT EXISTS idx_cc_release_syncs_status ON cc_release_syncs(status)`,
1024
- `CREATE TABLE IF NOT EXISTS backlink_domains (
1025
- id TEXT PRIMARY KEY,
1026
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1027
- release_sync_id TEXT NOT NULL REFERENCES cc_release_syncs(id) ON DELETE CASCADE,
1028
- release TEXT NOT NULL,
1029
- target_domain TEXT NOT NULL,
1030
- linking_domain TEXT NOT NULL,
1031
- num_hosts INTEGER NOT NULL,
1032
- created_at TEXT NOT NULL
1033
- )`,
1034
- `CREATE INDEX IF NOT EXISTS idx_backlink_domains_project ON backlink_domains(project_id)`,
1035
- `CREATE INDEX IF NOT EXISTS idx_backlink_domains_release_sync ON backlink_domains(release_sync_id)`,
1036
- `CREATE INDEX IF NOT EXISTS idx_backlink_domains_project_release ON backlink_domains(project_id, release)`,
1037
- `CREATE INDEX IF NOT EXISTS idx_backlink_domains_hosts ON backlink_domains(num_hosts)`,
1038
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_backlink_domains_unique ON backlink_domains(project_id, release, linking_domain)`,
1039
- `CREATE TABLE IF NOT EXISTS backlink_summaries (
1040
- id TEXT PRIMARY KEY,
1041
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1042
- release_sync_id TEXT NOT NULL REFERENCES cc_release_syncs(id) ON DELETE CASCADE,
1043
- release TEXT NOT NULL,
1044
- target_domain TEXT NOT NULL,
1045
- total_linking_domains INTEGER NOT NULL,
1046
- total_hosts INTEGER NOT NULL,
1047
- top_10_hosts_share TEXT NOT NULL,
1048
- queried_at TEXT NOT NULL,
1049
- created_at TEXT NOT NULL
1050
- )`,
1051
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_backlink_summaries_project_release ON backlink_summaries(project_id, release)`,
1052
- `CREATE INDEX IF NOT EXISTS idx_backlink_summaries_project ON backlink_summaries(project_id)`,
1053
- // v42: Per-project auto-extract toggle when a release sync transitions
1054
- // to ready, projects with this flag get a backlink-extract run enqueued.
1055
- // Stored as INTEGER (0/1) to match SQLite boolean convention.
1056
- `ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0`,
1057
- // v43: Backfill bing_url_inspections.in_index using the new crawl-signal
1058
- // decision tree. Legacy rows were classified with the retired Bing `InIndex`
1059
- // flag plus a DocumentSize>0 check, which mis-classifies URLs that modern
1060
- // Bing returns with DocumentSize=0 but a valid LastCrawledDate. Use a
1061
- // created_at cutoff so rows written by the new code (which applies a live
1062
- // GetCrawlIssues demotion that can't be replayed offline) are preserved.
1063
- `UPDATE bing_url_inspections
1064
- SET in_index = CASE
1065
- WHEN document_size IS NOT NULL AND document_size > 0 THEN 1
1066
- WHEN last_crawled_date IS NOT NULL AND http_code IS NOT NULL AND http_code >= 400 THEN 0
1067
- WHEN last_crawled_date IS NOT NULL THEN 1
1068
- WHEN discovery_date IS NOT NULL THEN 0
1069
- ELSE NULL
1070
- END
1071
- WHERE created_at < '2026-04-22T00:00:00Z'`,
1072
- // v44: Canonicalized landing-page column for ga_traffic_snapshots.
1073
- // Populated by GA4 sync via normalizeUrlPath() in
1074
- // @ainyc/canonry-contracts. Nullable; existing rows are filled in by
1075
- // `canonry backfill normalized-paths`. Read queries should
1076
- // `GROUP BY COALESCE(landing_page_normalized, landing_page)` so
1077
- // partially-backfilled state still aggregates correctly.
1078
- // See plans/ai-attribution-research.md "Step 1 data hygiene".
1079
- `ALTER TABLE ga_traffic_snapshots ADD COLUMN landing_page_normalized TEXT`,
1080
- `CREATE INDEX IF NOT EXISTS idx_ga_traffic_page_normalized
1081
- ON ga_traffic_snapshots(project_id, date, landing_page_normalized)`,
1082
- // v45: Per-page Direct channel sessions on ga_traffic_snapshots. Nullable
1083
- // so existing rows survive; populated by the GA4 sync writer in a
1084
- // separate commit. Unblocks an honest channel breakdown for the project
1085
- // dashboard (organic / social / direct / known-AI) — see
1086
- // plans/ai-attribution-research.md scope A.
1087
- `ALTER TABLE ga_traffic_snapshots ADD COLUMN direct_sessions INTEGER`,
1088
- // v46: Landing-page breakdown for GA4 known-AI referral rows. The raw
1089
- // landing_page participates in the unique key so distinct query strings can
1090
- // be ingested without collision; API reads group by landing_page_normalized.
1091
- // Default '(not set)' matches GA4's own sentinel for missing dimension
1092
- // values, so legacy rows surface as the same bucket new ingestion uses
1093
- // when GA4 returns nothing.
1094
- `ALTER TABLE ga_ai_referrals ADD COLUMN landing_page TEXT NOT NULL DEFAULT '(not set)'`,
1095
- `ALTER TABLE ga_ai_referrals ADD COLUMN landing_page_normalized TEXT`,
1096
- `DROP INDEX IF EXISTS idx_ga_ai_ref_unique_v2`,
1097
- `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_landing_page
1098
- ON ga_ai_referrals(project_id, date, landing_page_normalized)`,
1099
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_ai_ref_unique_v3
1100
- ON ga_ai_referrals(project_id, date, source, medium, source_dimension, landing_page)`
682
+ var MIGRATION_VERSIONS = [
683
+ {
684
+ version: 2,
685
+ name: "add-providers-column",
686
+ statements: [
687
+ `ALTER TABLE projects ADD COLUMN providers TEXT NOT NULL DEFAULT '[]'`
688
+ ]
689
+ },
690
+ {
691
+ version: 3,
692
+ name: "add-webhook-secret",
693
+ statements: [
694
+ `ALTER TABLE notifications ADD COLUMN webhook_secret TEXT`
695
+ ]
696
+ },
697
+ {
698
+ version: 4,
699
+ name: "add-owned-domains",
700
+ statements: [
701
+ `ALTER TABLE projects ADD COLUMN owned_domains TEXT NOT NULL DEFAULT '[]'`
702
+ ]
703
+ },
704
+ {
705
+ version: 5,
706
+ name: "add-snapshot-model",
707
+ statements: [
708
+ `ALTER TABLE query_snapshots ADD COLUMN model TEXT`,
709
+ `UPDATE query_snapshots SET model = json_extract(raw_response, '$.model') WHERE model IS NULL AND raw_response IS NOT NULL AND json_extract(raw_response, '$.model') IS NOT NULL`
710
+ ]
711
+ },
712
+ {
713
+ version: 6,
714
+ name: "gsc-integration",
715
+ statements: [
716
+ // google_connections (domain-scoped)
717
+ // WARNING: access_token, refresh_token are authentication material; consider storing in config.yaml per CLAUDE.md
718
+ `CREATE TABLE IF NOT EXISTS google_connections (
719
+ id TEXT PRIMARY KEY,
720
+ domain TEXT NOT NULL,
721
+ connection_type TEXT NOT NULL,
722
+ property_id TEXT,
723
+ access_token TEXT,
724
+ refresh_token TEXT,
725
+ token_expires_at TEXT,
726
+ scopes TEXT NOT NULL DEFAULT '[]',
727
+ created_at TEXT NOT NULL,
728
+ updated_at TEXT NOT NULL
729
+ )`,
730
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_google_conn_domain_type ON google_connections(domain, connection_type)`,
731
+ // gsc_search_data
732
+ `CREATE TABLE IF NOT EXISTS gsc_search_data (
733
+ id TEXT PRIMARY KEY,
734
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
735
+ sync_run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
736
+ date TEXT NOT NULL,
737
+ query TEXT NOT NULL,
738
+ page TEXT NOT NULL,
739
+ country TEXT,
740
+ device TEXT,
741
+ clicks INTEGER NOT NULL DEFAULT 0,
742
+ impressions INTEGER NOT NULL DEFAULT 0,
743
+ ctr TEXT NOT NULL DEFAULT '0',
744
+ position TEXT NOT NULL DEFAULT '0',
745
+ created_at TEXT NOT NULL
746
+ )`,
747
+ `CREATE INDEX IF NOT EXISTS idx_gsc_search_project_date ON gsc_search_data(project_id, date)`,
748
+ `CREATE INDEX IF NOT EXISTS idx_gsc_search_query ON gsc_search_data(query)`,
749
+ `CREATE INDEX IF NOT EXISTS idx_gsc_search_run ON gsc_search_data(sync_run_id)`,
750
+ // gsc_url_inspections
751
+ `CREATE TABLE IF NOT EXISTS gsc_url_inspections (
752
+ id TEXT PRIMARY KEY,
753
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
754
+ sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE,
755
+ url TEXT NOT NULL,
756
+ indexing_state TEXT,
757
+ verdict TEXT,
758
+ coverage_state TEXT,
759
+ page_fetch_state TEXT,
760
+ robots_txt_state TEXT,
761
+ crawl_time TEXT,
762
+ last_crawl_result TEXT,
763
+ is_mobile_friendly INTEGER,
764
+ rich_results TEXT NOT NULL DEFAULT '[]',
765
+ referring_urls TEXT NOT NULL DEFAULT '[]',
766
+ inspected_at TEXT NOT NULL,
767
+ created_at TEXT NOT NULL
768
+ )`,
769
+ `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_project_url ON gsc_url_inspections(project_id, url)`,
770
+ `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_run ON gsc_url_inspections(sync_run_id)`,
771
+ `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_url_time ON gsc_url_inspections(url, inspected_at)`
772
+ ]
773
+ },
774
+ {
775
+ version: 7,
776
+ name: "gsc-coverage-snapshots",
777
+ statements: [
778
+ `CREATE TABLE IF NOT EXISTS gsc_coverage_snapshots (
779
+ id TEXT PRIMARY KEY,
780
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
781
+ sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE,
782
+ date TEXT NOT NULL,
783
+ indexed INTEGER NOT NULL DEFAULT 0,
784
+ not_indexed INTEGER NOT NULL DEFAULT 0,
785
+ reason_breakdown TEXT NOT NULL DEFAULT '{}',
786
+ created_at TEXT NOT NULL
787
+ )`,
788
+ `CREATE INDEX IF NOT EXISTS idx_gsc_coverage_snap_project_date ON gsc_coverage_snapshots(project_id, date)`,
789
+ `CREATE INDEX IF NOT EXISTS idx_gsc_coverage_snap_run ON gsc_coverage_snapshots(sync_run_id)`
790
+ ]
791
+ },
792
+ {
793
+ version: 8,
794
+ name: "location-aware-sweeps",
795
+ statements: [
796
+ `ALTER TABLE projects ADD COLUMN locations TEXT NOT NULL DEFAULT '[]'`,
797
+ `ALTER TABLE projects ADD COLUMN default_location TEXT`,
798
+ `ALTER TABLE query_snapshots ADD COLUMN location TEXT`
799
+ ]
800
+ },
801
+ {
802
+ version: 9,
803
+ name: "add-run-location",
804
+ statements: [
805
+ `ALTER TABLE runs ADD COLUMN location TEXT`
806
+ ]
807
+ },
808
+ {
809
+ version: 10,
810
+ name: "add-sitemap-url",
811
+ statements: [
812
+ `ALTER TABLE google_connections ADD COLUMN sitemap_url TEXT`
813
+ ]
814
+ },
815
+ {
816
+ version: 11,
817
+ name: "add-screenshot-path",
818
+ statements: [
819
+ `ALTER TABLE query_snapshots ADD COLUMN screenshot_path TEXT`
820
+ ]
821
+ },
822
+ {
823
+ version: 12,
824
+ name: "bing-wmt-integration",
825
+ statements: [
826
+ // bing_connections
827
+ `CREATE TABLE IF NOT EXISTS bing_connections (
828
+ id TEXT PRIMARY KEY,
829
+ domain TEXT NOT NULL,
830
+ site_url TEXT,
831
+ created_at TEXT NOT NULL,
832
+ updated_at TEXT NOT NULL
833
+ )`,
834
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_conn_domain ON bing_connections(domain)`,
835
+ // bing_url_inspections
836
+ `CREATE TABLE IF NOT EXISTS bing_url_inspections (
837
+ id TEXT PRIMARY KEY,
838
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
839
+ url TEXT NOT NULL,
840
+ http_code INTEGER,
841
+ in_index INTEGER,
842
+ last_crawled_date TEXT,
843
+ in_index_date TEXT,
844
+ inspected_at TEXT NOT NULL,
845
+ created_at TEXT NOT NULL
846
+ )`,
847
+ `CREATE INDEX IF NOT EXISTS idx_bing_inspect_project_url ON bing_url_inspections(project_id, url)`,
848
+ `CREATE INDEX IF NOT EXISTS idx_bing_inspect_url_time ON bing_url_inspections(url, inspected_at)`,
849
+ // bing_keyword_stats
850
+ `CREATE TABLE IF NOT EXISTS bing_keyword_stats (
851
+ id TEXT PRIMARY KEY,
852
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
853
+ query TEXT NOT NULL,
854
+ impressions INTEGER NOT NULL DEFAULT 0,
855
+ clicks INTEGER NOT NULL DEFAULT 0,
856
+ ctr TEXT NOT NULL DEFAULT '0',
857
+ average_position TEXT NOT NULL DEFAULT '0',
858
+ synced_at TEXT NOT NULL,
859
+ created_at TEXT NOT NULL
860
+ )`,
861
+ `CREATE INDEX IF NOT EXISTS idx_bing_keyword_project ON bing_keyword_stats(project_id)`,
862
+ `CREATE INDEX IF NOT EXISTS idx_bing_keyword_query ON bing_keyword_stats(query)`
863
+ ]
864
+ },
865
+ {
866
+ version: 13,
867
+ name: "ga4-integration",
868
+ statements: [
869
+ // ga_connections
870
+ // WARNING: private_key is authentication material; consider storing in config.yaml per CLAUDE.md
871
+ `CREATE TABLE IF NOT EXISTS ga_connections (
872
+ id TEXT PRIMARY KEY,
873
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
874
+ property_id TEXT NOT NULL,
875
+ client_email TEXT NOT NULL,
876
+ private_key TEXT NOT NULL,
877
+ created_at TEXT NOT NULL,
878
+ updated_at TEXT NOT NULL
879
+ )`,
880
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_conn_project ON ga_connections(project_id)`,
881
+ // ga_traffic_snapshots
882
+ `CREATE TABLE IF NOT EXISTS ga_traffic_snapshots (
883
+ id TEXT PRIMARY KEY,
884
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
885
+ date TEXT NOT NULL,
886
+ landing_page TEXT NOT NULL,
887
+ sessions INTEGER NOT NULL DEFAULT 0,
888
+ organic_sessions INTEGER NOT NULL DEFAULT 0,
889
+ users INTEGER NOT NULL DEFAULT 0,
890
+ synced_at TEXT NOT NULL
891
+ )`,
892
+ `CREATE INDEX IF NOT EXISTS idx_ga_traffic_project_date ON ga_traffic_snapshots(project_id, date)`,
893
+ `CREATE INDEX IF NOT EXISTS idx_ga_traffic_page ON ga_traffic_snapshots(landing_page)`
894
+ ]
895
+ },
896
+ {
897
+ version: 14,
898
+ name: "ga4-traffic-summaries",
899
+ statements: [
900
+ `CREATE TABLE IF NOT EXISTS ga_traffic_summaries (
901
+ id TEXT PRIMARY KEY,
902
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
903
+ period_start TEXT NOT NULL,
904
+ period_end TEXT NOT NULL,
905
+ total_sessions INTEGER NOT NULL DEFAULT 0,
906
+ total_organic_sessions INTEGER NOT NULL DEFAULT 0,
907
+ total_users INTEGER NOT NULL DEFAULT 0,
908
+ synced_at TEXT NOT NULL
909
+ )`,
910
+ `CREATE INDEX IF NOT EXISTS idx_ga_summary_project ON ga_traffic_summaries(project_id)`
911
+ ]
912
+ },
913
+ {
914
+ version: 15,
915
+ name: "bing-inspect-columns",
916
+ statements: [
917
+ `ALTER TABLE bing_url_inspections ADD COLUMN document_size INTEGER`,
918
+ `ALTER TABLE bing_url_inspections ADD COLUMN anchor_count INTEGER`,
919
+ `ALTER TABLE bing_url_inspections ADD COLUMN discovery_date TEXT`
920
+ ]
921
+ },
922
+ {
923
+ version: 16,
924
+ name: "recommended-competitors",
925
+ statements: [
926
+ `ALTER TABLE query_snapshots ADD COLUMN recommended_competitors TEXT NOT NULL DEFAULT '[]'`
927
+ ]
928
+ },
929
+ {
930
+ version: 17,
931
+ name: "ga4-ai-referrals",
932
+ statements: [
933
+ `CREATE TABLE IF NOT EXISTS ga_ai_referrals (
934
+ id TEXT PRIMARY KEY,
935
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
936
+ date TEXT NOT NULL,
937
+ source TEXT NOT NULL,
938
+ medium TEXT NOT NULL,
939
+ sessions INTEGER NOT NULL DEFAULT 0,
940
+ users INTEGER NOT NULL DEFAULT 0,
941
+ synced_at TEXT NOT NULL
942
+ )`,
943
+ `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_project_date ON ga_ai_referrals(project_id, date)`,
944
+ `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_source ON ga_ai_referrals(source)`
945
+ ]
946
+ },
947
+ {
948
+ version: 18,
949
+ name: "answer-mentioned",
950
+ statements: [
951
+ `ALTER TABLE query_snapshots ADD COLUMN answer_mentioned INTEGER`
952
+ ]
953
+ },
954
+ {
955
+ version: 19,
956
+ name: "named-unique-indexes",
957
+ statements: [
958
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_keywords_project_keyword ON keywords(project_id, keyword)`,
959
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_competitors_project_domain ON competitors(project_id, domain)`,
960
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_schedules_project ON schedules(project_id)`,
961
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_usage_scope_period_metric ON usage_counters(scope, period, metric)`,
962
+ `ALTER TABLE projects ADD COLUMN config_source TEXT NOT NULL DEFAULT 'cli'`,
963
+ `ALTER TABLE projects ADD COLUMN config_revision INTEGER NOT NULL DEFAULT 1`
964
+ ]
965
+ },
966
+ {
967
+ version: 20,
968
+ name: "ga4-source-dimension",
969
+ statements: [
970
+ // Values: 'session' (sessionSource), 'first_user' (firstUserSource), 'manual_utm' (manualSource/utm_source)
971
+ `ALTER TABLE ga_ai_referrals ADD COLUMN source_dimension TEXT NOT NULL DEFAULT 'session'`,
972
+ // Adopt the widened unique key (now including source_dimension). This
973
+ // version intentionally does NOT drop the prior narrow index
974
+ // idx_ga_ai_ref_unique the original v17 + v20 pair did, but replaying
975
+ // that pair on a DB where data has since accumulated duplicates on the
976
+ // narrow key would crash (the bug this PR fixes). Any DB that ran the
977
+ // historical v20 once already has the narrow index gone; brand-new DBs
978
+ // never create it because v17 was rewritten to omit it. Anything else
979
+ // is repaired by v46, which drops idx_ga_ai_ref_unique_v2 and lands on
980
+ // the final (…, source_dimension, landing_page) index.
981
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_ai_ref_unique_v2 ON ga_ai_referrals(project_id, date, source, medium, source_dimension)`
982
+ ]
983
+ },
984
+ {
985
+ version: 21,
986
+ name: "snapshot-filtering-indexes",
987
+ statements: [
988
+ `CREATE INDEX IF NOT EXISTS idx_snapshots_citation_state ON query_snapshots(citation_state)`,
989
+ `CREATE INDEX IF NOT EXISTS idx_snapshots_provider_model ON query_snapshots(provider, model)`,
990
+ `CREATE INDEX IF NOT EXISTS idx_snapshots_location ON query_snapshots(location)`
991
+ ]
992
+ },
993
+ {
994
+ version: 22,
995
+ name: "insights-table",
996
+ statements: [
997
+ `CREATE TABLE IF NOT EXISTS insights (
998
+ id TEXT PRIMARY KEY,
999
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1000
+ type TEXT NOT NULL,
1001
+ severity TEXT NOT NULL,
1002
+ title TEXT NOT NULL,
1003
+ keyword TEXT NOT NULL,
1004
+ provider TEXT NOT NULL,
1005
+ recommendation TEXT,
1006
+ cause TEXT,
1007
+ dismissed INTEGER NOT NULL DEFAULT 0,
1008
+ created_at TEXT NOT NULL
1009
+ )`,
1010
+ `CREATE INDEX IF NOT EXISTS idx_insights_project ON insights(project_id)`,
1011
+ `CREATE INDEX IF NOT EXISTS idx_insights_created ON insights(created_at)`,
1012
+ `CREATE INDEX IF NOT EXISTS idx_insights_keyword_provider ON insights(keyword, provider)`
1013
+ ]
1014
+ },
1015
+ {
1016
+ version: 23,
1017
+ name: "health-snapshots-table",
1018
+ statements: [
1019
+ `CREATE TABLE IF NOT EXISTS health_snapshots (
1020
+ id TEXT PRIMARY KEY,
1021
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1022
+ overall_cited_rate TEXT NOT NULL,
1023
+ total_pairs INTEGER NOT NULL,
1024
+ cited_pairs INTEGER NOT NULL,
1025
+ provider_breakdown TEXT NOT NULL DEFAULT '{}',
1026
+ created_at TEXT NOT NULL
1027
+ )`,
1028
+ `CREATE INDEX IF NOT EXISTS idx_health_snapshots_project ON health_snapshots(project_id)`,
1029
+ `CREATE INDEX IF NOT EXISTS idx_health_snapshots_created ON health_snapshots(created_at)`
1030
+ ]
1031
+ },
1032
+ {
1033
+ version: 24,
1034
+ name: "intelligence-run-id",
1035
+ statements: [
1036
+ `ALTER TABLE insights ADD COLUMN run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1037
+ `CREATE INDEX IF NOT EXISTS idx_insights_run ON insights(run_id)`,
1038
+ `ALTER TABLE health_snapshots ADD COLUMN run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1039
+ `CREATE INDEX IF NOT EXISTS idx_health_snapshots_run ON health_snapshots(run_id)`
1040
+ ]
1041
+ },
1042
+ {
1043
+ version: 25,
1044
+ name: "ga4-social-referrals",
1045
+ statements: [
1046
+ // Uses GA4's native sessionDefaultChannelGroup for social classification
1047
+ `CREATE TABLE IF NOT EXISTS ga_social_referrals (
1048
+ id TEXT PRIMARY KEY,
1049
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1050
+ date TEXT NOT NULL,
1051
+ source TEXT NOT NULL,
1052
+ medium TEXT NOT NULL,
1053
+ channel_group TEXT NOT NULL DEFAULT 'Organic Social',
1054
+ sessions INTEGER NOT NULL DEFAULT 0,
1055
+ users INTEGER NOT NULL DEFAULT 0,
1056
+ synced_at TEXT NOT NULL
1057
+ )`,
1058
+ `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_project_date ON ga_social_referrals(project_id, date)`,
1059
+ `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_source ON ga_social_referrals(source)`,
1060
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_social_ref_unique ON ga_social_referrals(project_id, date, source, medium, channel_group)`
1061
+ ]
1062
+ },
1063
+ {
1064
+ version: 26,
1065
+ name: "bing-coverage-snapshots",
1066
+ statements: [
1067
+ `CREATE TABLE IF NOT EXISTS bing_coverage_snapshots (
1068
+ id TEXT PRIMARY KEY,
1069
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1070
+ date TEXT NOT NULL,
1071
+ indexed INTEGER NOT NULL DEFAULT 0,
1072
+ not_indexed INTEGER NOT NULL DEFAULT 0,
1073
+ unknown INTEGER NOT NULL DEFAULT 0,
1074
+ created_at TEXT NOT NULL
1075
+ )`,
1076
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_coverage_snap_project_date ON bing_coverage_snapshots(project_id, date)`
1077
+ ]
1078
+ },
1079
+ {
1080
+ version: 27,
1081
+ name: "credential-columns-removed-from-schema",
1082
+ statements: [
1083
+ // Credential columns removed from Drizzle schema — credentials now live in config.yaml.
1084
+ // Physical columns intentionally retained for one-time migration by server.ts.
1085
+ // No DDL statements needed.
1086
+ ]
1087
+ },
1088
+ {
1089
+ version: 28,
1090
+ name: "sync-run-id-bing-inspect",
1091
+ statements: [
1092
+ `ALTER TABLE bing_url_inspections ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1093
+ `CREATE INDEX IF NOT EXISTS idx_bing_inspect_run ON bing_url_inspections(sync_run_id)`
1094
+ ]
1095
+ },
1096
+ {
1097
+ version: 29,
1098
+ name: "sync-run-id-ga-traffic",
1099
+ statements: [
1100
+ `ALTER TABLE ga_traffic_snapshots ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1101
+ `CREATE INDEX IF NOT EXISTS idx_ga_traffic_run ON ga_traffic_snapshots(sync_run_id)`
1102
+ ]
1103
+ },
1104
+ {
1105
+ version: 30,
1106
+ name: "sync-run-id-ga-ai-ref",
1107
+ statements: [
1108
+ `ALTER TABLE ga_ai_referrals ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1109
+ `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_run ON ga_ai_referrals(sync_run_id)`
1110
+ ]
1111
+ },
1112
+ {
1113
+ version: 31,
1114
+ name: "sync-run-id-ga-social-ref",
1115
+ statements: [
1116
+ `ALTER TABLE ga_social_referrals ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1117
+ `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_run ON ga_social_referrals(sync_run_id)`
1118
+ ]
1119
+ },
1120
+ {
1121
+ version: 32,
1122
+ name: "sync-run-id-ga-summary",
1123
+ statements: [
1124
+ `ALTER TABLE ga_traffic_summaries ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1125
+ `CREATE INDEX IF NOT EXISTS idx_ga_summary_run ON ga_traffic_summaries(sync_run_id)`
1126
+ ]
1127
+ },
1128
+ {
1129
+ version: 33,
1130
+ name: "sync-run-id-bing-coverage",
1131
+ statements: [
1132
+ `ALTER TABLE bing_coverage_snapshots ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1133
+ `CREATE INDEX IF NOT EXISTS idx_bing_coverage_snap_run ON bing_coverage_snapshots(sync_run_id)`
1134
+ ]
1135
+ },
1136
+ {
1137
+ version: 34,
1138
+ name: "bing-coverage-index-rename",
1139
+ statements: [
1140
+ `DROP INDEX IF EXISTS idx_bing_coverage_snap_project_date`,
1141
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_coverage_snap_project_date_unique ON bing_coverage_snapshots(project_id, date)`
1142
+ ]
1143
+ },
1144
+ {
1145
+ version: 35,
1146
+ name: "snapshot-created-at-index",
1147
+ statements: [
1148
+ `CREATE INDEX IF NOT EXISTS idx_snapshots_created_at ON query_snapshots(created_at)`
1149
+ ]
1150
+ },
1151
+ {
1152
+ version: 36,
1153
+ name: "sql-injection-review",
1154
+ statements: [
1155
+ // Transaction handling and SQL injection review: verified all strings
1156
+ // use SQLite ? binding via Drizzle. No parameterization changes needed.
1157
+ ]
1158
+ },
1159
+ {
1160
+ version: 37,
1161
+ name: "legacy-credential-cleanup",
1162
+ statements: [
1163
+ // The legacy credential columns (private_key on ga_connections; access_token,
1164
+ // refresh_token, token_expires_at on google_connections) are removed by the
1165
+ // extractLegacyCredentials / dropLegacyCredentialColumns pair.
1166
+ // Callers read the rows, persist them to config.yaml, and only then drop
1167
+ // the columns so a failed config write doesn't permanently lose credentials.
1168
+ // No DDL statements here — columns are dropped via exported functions below.
1169
+ ]
1170
+ },
1171
+ {
1172
+ version: 38,
1173
+ name: "agent-sessions",
1174
+ statements: [
1175
+ `CREATE TABLE IF NOT EXISTS agent_sessions (
1176
+ id TEXT PRIMARY KEY,
1177
+ project_id TEXT NOT NULL UNIQUE REFERENCES projects(id) ON DELETE CASCADE,
1178
+ system_prompt TEXT NOT NULL,
1179
+ model_provider TEXT NOT NULL,
1180
+ model_id TEXT NOT NULL,
1181
+ messages TEXT NOT NULL DEFAULT '[]',
1182
+ follow_up_queue TEXT NOT NULL DEFAULT '[]',
1183
+ created_at TEXT NOT NULL,
1184
+ updated_at TEXT NOT NULL
1185
+ )`,
1186
+ `CREATE INDEX IF NOT EXISTS idx_agent_sessions_project ON agent_sessions(project_id)`,
1187
+ `CREATE INDEX IF NOT EXISTS idx_agent_sessions_updated ON agent_sessions(updated_at)`
1188
+ ]
1189
+ },
1190
+ {
1191
+ version: 39,
1192
+ name: "aero-provider-rename",
1193
+ statements: [
1194
+ // Align Aero provider IDs with sweep naming — anthropic→claude, google→gemini.
1195
+ // Idempotent: the UPDATE is a no-op once the rename has been applied.
1196
+ `UPDATE agent_sessions SET model_provider = 'claude' WHERE model_provider = 'anthropic'`,
1197
+ `UPDATE agent_sessions SET model_provider = 'gemini' WHERE model_provider = 'google'`
1198
+ ]
1199
+ },
1200
+ {
1201
+ version: 40,
1202
+ name: "agent-memory",
1203
+ statements: [
1204
+ `CREATE TABLE IF NOT EXISTS agent_memory (
1205
+ id TEXT PRIMARY KEY,
1206
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1207
+ key TEXT NOT NULL,
1208
+ value TEXT NOT NULL,
1209
+ source TEXT NOT NULL,
1210
+ created_at TEXT NOT NULL,
1211
+ updated_at TEXT NOT NULL
1212
+ )`,
1213
+ `CREATE UNIQUE INDEX IF NOT EXISTS uniq_agent_memory_project_key
1214
+ ON agent_memory(project_id, key)`,
1215
+ `CREATE INDEX IF NOT EXISTS idx_agent_memory_project_updated
1216
+ ON agent_memory(project_id, updated_at)`
1217
+ ]
1218
+ },
1219
+ {
1220
+ version: 41,
1221
+ name: "common-crawl-backlinks",
1222
+ statements: [
1223
+ // cc_release_syncs
1224
+ `CREATE TABLE IF NOT EXISTS cc_release_syncs (
1225
+ id TEXT PRIMARY KEY,
1226
+ release TEXT NOT NULL UNIQUE,
1227
+ status TEXT NOT NULL,
1228
+ phase_detail TEXT,
1229
+ vertex_path TEXT,
1230
+ edges_path TEXT,
1231
+ vertex_sha256 TEXT,
1232
+ edges_sha256 TEXT,
1233
+ vertex_bytes INTEGER,
1234
+ edges_bytes INTEGER,
1235
+ projects_processed INTEGER,
1236
+ domains_discovered INTEGER,
1237
+ download_started_at TEXT,
1238
+ download_finished_at TEXT,
1239
+ query_started_at TEXT,
1240
+ query_finished_at TEXT,
1241
+ error TEXT,
1242
+ created_at TEXT NOT NULL,
1243
+ updated_at TEXT NOT NULL
1244
+ )`,
1245
+ `CREATE INDEX IF NOT EXISTS idx_cc_release_syncs_status ON cc_release_syncs(status)`,
1246
+ // backlink_domains
1247
+ `CREATE TABLE IF NOT EXISTS backlink_domains (
1248
+ id TEXT PRIMARY KEY,
1249
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1250
+ release_sync_id TEXT NOT NULL REFERENCES cc_release_syncs(id) ON DELETE CASCADE,
1251
+ release TEXT NOT NULL,
1252
+ target_domain TEXT NOT NULL,
1253
+ linking_domain TEXT NOT NULL,
1254
+ num_hosts INTEGER NOT NULL,
1255
+ created_at TEXT NOT NULL
1256
+ )`,
1257
+ `CREATE INDEX IF NOT EXISTS idx_backlink_domains_project ON backlink_domains(project_id)`,
1258
+ `CREATE INDEX IF NOT EXISTS idx_backlink_domains_release_sync ON backlink_domains(release_sync_id)`,
1259
+ `CREATE INDEX IF NOT EXISTS idx_backlink_domains_project_release ON backlink_domains(project_id, release)`,
1260
+ `CREATE INDEX IF NOT EXISTS idx_backlink_domains_hosts ON backlink_domains(num_hosts)`,
1261
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_backlink_domains_unique ON backlink_domains(project_id, release, linking_domain)`,
1262
+ // backlink_summaries
1263
+ `CREATE TABLE IF NOT EXISTS backlink_summaries (
1264
+ id TEXT PRIMARY KEY,
1265
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1266
+ release_sync_id TEXT NOT NULL REFERENCES cc_release_syncs(id) ON DELETE CASCADE,
1267
+ release TEXT NOT NULL,
1268
+ target_domain TEXT NOT NULL,
1269
+ total_linking_domains INTEGER NOT NULL,
1270
+ total_hosts INTEGER NOT NULL,
1271
+ top_10_hosts_share TEXT NOT NULL,
1272
+ queried_at TEXT NOT NULL,
1273
+ created_at TEXT NOT NULL
1274
+ )`,
1275
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_backlink_summaries_project_release ON backlink_summaries(project_id, release)`,
1276
+ `CREATE INDEX IF NOT EXISTS idx_backlink_summaries_project ON backlink_summaries(project_id)`
1277
+ ]
1278
+ },
1279
+ {
1280
+ version: 42,
1281
+ name: "auto-extract-backlinks",
1282
+ statements: [
1283
+ `ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0`
1284
+ ]
1285
+ },
1286
+ {
1287
+ version: 43,
1288
+ name: "backfill-bing-in-index",
1289
+ statements: [
1290
+ // Backfill bing_url_inspections.in_index using the new crawl-signal
1291
+ // decision tree. Uses a created_at cutoff so rows written by the new
1292
+ // code (which applies a live GetCrawlIssues demotion that can't be
1293
+ // replayed offline) are preserved.
1294
+ `UPDATE bing_url_inspections
1295
+ SET in_index = CASE
1296
+ WHEN document_size IS NOT NULL AND document_size > 0 THEN 1
1297
+ WHEN last_crawled_date IS NOT NULL AND http_code IS NOT NULL AND http_code >= 400 THEN 0
1298
+ WHEN last_crawled_date IS NOT NULL THEN 1
1299
+ WHEN discovery_date IS NOT NULL THEN 0
1300
+ ELSE NULL
1301
+ END
1302
+ WHERE created_at < '2026-04-22T00:00:00Z'`
1303
+ ]
1304
+ },
1305
+ {
1306
+ version: 44,
1307
+ name: "ga-traffic-landing-normalized",
1308
+ statements: [
1309
+ `ALTER TABLE ga_traffic_snapshots ADD COLUMN landing_page_normalized TEXT`,
1310
+ `CREATE INDEX IF NOT EXISTS idx_ga_traffic_page_normalized
1311
+ ON ga_traffic_snapshots(project_id, date, landing_page_normalized)`
1312
+ ]
1313
+ },
1314
+ {
1315
+ version: 45,
1316
+ name: "ga-traffic-direct-sessions",
1317
+ statements: [
1318
+ `ALTER TABLE ga_traffic_snapshots ADD COLUMN direct_sessions INTEGER`
1319
+ ]
1320
+ },
1321
+ {
1322
+ version: 46,
1323
+ name: "ga-ai-landing-page",
1324
+ statements: [
1325
+ `ALTER TABLE ga_ai_referrals ADD COLUMN landing_page TEXT NOT NULL DEFAULT '(not set)'`,
1326
+ `ALTER TABLE ga_ai_referrals ADD COLUMN landing_page_normalized TEXT`,
1327
+ `DROP INDEX IF EXISTS idx_ga_ai_ref_unique_v2`,
1328
+ `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_landing_page
1329
+ ON ga_ai_referrals(project_id, date, landing_page_normalized)`,
1330
+ `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_ai_ref_unique_v3
1331
+ ON ga_ai_referrals(project_id, date, source, medium, source_dimension, landing_page)`
1332
+ ]
1333
+ }
1101
1334
  ];
1102
1335
  function isDuplicateColumnError(err) {
1103
1336
  if (!(err instanceof Error)) return false;
@@ -1182,18 +1415,32 @@ function dropLegacyCredentialColumns(db) {
1182
1415
  dropColumnIfExists(db, "ga_connections", "private_key");
1183
1416
  }
1184
1417
  }
1418
+ function getAppliedVersion(db) {
1419
+ const rows = db.all(sql`SELECT MAX(version) as max_version FROM _migrations`);
1420
+ return rows[0]?.max_version ?? 0;
1421
+ }
1422
+ function recordMigration(db, version, name) {
1423
+ db.run(sql`INSERT OR IGNORE INTO _migrations (version, name) VALUES (${version}, ${name})`);
1424
+ }
1185
1425
  function migrate(db) {
1186
1426
  const statements = MIGRATION_SQL.split(";").map((s) => s.trim()).filter((s) => s.length > 0);
1187
1427
  for (const statement of statements) {
1188
1428
  db.run(sql.raw(statement));
1189
1429
  }
1190
- for (const migration of MIGRATIONS) {
1191
- try {
1192
- db.run(sql.raw(migration));
1193
- } catch (err) {
1194
- if (isDuplicateColumnError(err)) continue;
1195
- throw err;
1196
- }
1430
+ const appliedVersion = getAppliedVersion(db);
1431
+ for (const mv of MIGRATION_VERSIONS) {
1432
+ if (mv.version <= appliedVersion) continue;
1433
+ db.transaction((tx) => {
1434
+ for (const statement of mv.statements) {
1435
+ try {
1436
+ tx.run(sql.raw(statement));
1437
+ } catch (err) {
1438
+ if (isDuplicateColumnError(err)) continue;
1439
+ throw err;
1440
+ }
1441
+ }
1442
+ recordMigration(tx, mv.version, mv.name);
1443
+ });
1197
1444
  }
1198
1445
  }
1199
1446