footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,117 @@
1
+ """
2
+ Content extraction from various file types.
3
+ """
4
+
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ logger = logging.getLogger(__name__)
10
+ logging.getLogger("pypdf").setLevel(logging.ERROR)
11
+
12
+ _pypdf_warned = False
13
+ _docx_warned = False
14
+
15
+
16
+ class ContentExtractor:
17
+ """Extract text content from various file types."""
18
+
19
+ def __init__(self, max_preview_length: int = 1000):
20
+ self.max_preview_length = max_preview_length
21
+
22
+ def extract(self, file_path: Path) -> Optional[str]:
23
+ """
24
+ Extract content from file based on type.
25
+
26
+ Returns preview of file content (first N characters).
27
+ """
28
+ try:
29
+ file_type = file_path.suffix.lower()
30
+
31
+ if file_type in [".txt", ".md", ".py", ".js", ".json", ".yaml", ".yml"]:
32
+ return self._extract_text(file_path)
33
+ elif file_type == ".pdf":
34
+ return self._extract_pdf(file_path)
35
+ elif file_type == ".docx":
36
+ return self._extract_docx(file_path)
37
+ else:
38
+ logger.debug(f"No extractor for {file_type}")
39
+ return None
40
+
41
+ except (
42
+ Exception
43
+ ) as e: # Intentional broad catch: extraction is inherently brittle (encoding, corrupt files, library bugs)
44
+ logger.error(f"Error extracting content from {file_path}: {e}")
45
+ return None
46
+
47
+ def _extract_text(self, file_path: Path) -> str:
48
+ """Extract from plain text files."""
49
+ try:
50
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
51
+ content = f.read(self.max_preview_length * 2) # Read a bit more
52
+ return content[: self.max_preview_length]
53
+ except (
54
+ Exception
55
+ ) as e: # Intentional broad catch: extraction is inherently brittle (encoding, corrupt files, library bugs)
56
+ logger.error(f"Error reading text file {file_path}: {e}")
57
+ return ""
58
+
59
+ def _extract_pdf(self, file_path: Path) -> str:
60
+ """Extract from PDF files."""
61
+ try:
62
+ import pypdf
63
+
64
+ with open(file_path, "rb") as f:
65
+ reader = pypdf.PdfReader(f)
66
+
67
+ # Extract first few pages
68
+ text = ""
69
+ for page_num in range(min(3, len(reader.pages))):
70
+ page = reader.pages[page_num]
71
+ text += page.extract_text() + "\n"
72
+
73
+ if len(text) >= self.max_preview_length:
74
+ break
75
+
76
+ return text[: self.max_preview_length]
77
+
78
+ except ImportError:
79
+ global _pypdf_warned
80
+ if not _pypdf_warned:
81
+ logger.warning("pypdf not installed, skipping PDF extraction")
82
+ _pypdf_warned = True
83
+ return None
84
+ except (
85
+ Exception
86
+ ) as e: # Intentional broad catch: extraction is inherently brittle (encoding, corrupt files, library bugs)
87
+ logger.error(f"Error reading PDF {file_path}: {e}")
88
+ return ""
89
+
90
+ def _extract_docx(self, file_path: Path) -> str:
91
+ """Extract from DOCX files."""
92
+ try:
93
+ import docx
94
+
95
+ doc = docx.Document(file_path)
96
+
97
+ # Extract paragraphs
98
+ text = ""
99
+ for para in doc.paragraphs:
100
+ text += para.text + "\n"
101
+
102
+ if len(text) >= self.max_preview_length:
103
+ break
104
+
105
+ return text[: self.max_preview_length]
106
+
107
+ except ImportError:
108
+ global _docx_warned
109
+ if not _docx_warned:
110
+ logger.warning("python-docx not installed, skipping DOCX extraction")
111
+ _docx_warned = True
112
+ return None
113
+ except (
114
+ Exception
115
+ ) as e: # Intentional broad catch: extraction is inherently brittle (encoding, corrupt files, library bugs)
116
+ logger.error(f"Error reading DOCX {file_path}: {e}")
117
+ return ""
@@ -0,0 +1,36 @@
1
+ """SQLite database schema and operations for Footprinter."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from footprinter.ingest.db.connector_schema import init_connector_schemas
7
+ from footprinter.ingest.db.schema import SchemaMixin
8
+ from footprinter.paths import get_db_path
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class Database(SchemaMixin):
14
+ """SQLite database connection and schema manager for Footprinter."""
15
+
16
+ def __init__(self, db_path: str = None, connector_specs: list = None):
17
+ if db_path is None:
18
+ db_path = str(get_db_path())
19
+ self.db_path = db_path
20
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
21
+ self.conn = None
22
+ self.init_db()
23
+ init_connector_schemas(self.conn, connector_specs or [])
24
+
25
+ def close(self):
26
+ """Close database connection."""
27
+ if self.conn:
28
+ self.conn.close()
29
+
30
+ def __enter__(self):
31
+ """Enter context manager, returning self."""
32
+ return self
33
+
34
+ def __exit__(self, exc_type, exc_val, exc_tb):
35
+ """Exit context manager, closing the database connection."""
36
+ self.close()
@@ -0,0 +1 @@
1
+ """Database sub-package."""
@@ -0,0 +1,47 @@
1
+ """Connector-scope schema extensions.
2
+
3
+ Connectors declare extra columns via ConnectorSpec.schema_extensions.
4
+ This module applies those declarations using idempotent ALTER TABLE,
5
+ mirroring the pattern in app_schema.py.
6
+ """
7
+
8
+ import logging
9
+ import sqlite3
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def init_connector_schemas(conn: sqlite3.Connection, connector_specs: list) -> None:
15
+ """Apply schema extensions for the given connector specs.
16
+
17
+ The caller is responsible for filtering to installed connectors.
18
+ For each spec with schema_extensions, calls register_connector_schema()
19
+ to add columns via ALTER TABLE.
20
+ """
21
+ for spec in connector_specs:
22
+ if spec.schema_extensions:
23
+ register_connector_schema(conn, spec.schema_extensions)
24
+
25
+
26
+ def register_connector_schema(
27
+ conn: sqlite3.Connection,
28
+ extensions: dict[str, list[tuple[str, str]]],
29
+ ) -> None:
30
+ """Add connector-declared columns to existing tables.
31
+
32
+ Args:
33
+ conn: An open sqlite3 connection with base schema already initialized.
34
+ extensions: Mapping of table_name → [(col_name, col_definition), ...].
35
+ Example: {"folders": [("web_link", "TEXT")]}
36
+ """
37
+ cursor = conn.cursor()
38
+ for table, columns in extensions.items():
39
+ for col_name, col_def in columns:
40
+ try:
41
+ cursor.execute(f"ALTER TABLE {table} ADD COLUMN {col_name} {col_def}")
42
+ except sqlite3.OperationalError as e:
43
+ if "duplicate column" in str(e).lower():
44
+ pass # column already exists
45
+ else:
46
+ raise
47
+ conn.commit()
@@ -0,0 +1,315 @@
1
+ """Database schema migration for pre-existing Footprinter databases.
2
+
3
+ Extracted for separation of concerns. Contains all
4
+ ALTER TABLE, RENAME, DROP, and data-migration logic needed to upgrade
5
+ databases created before the current DDL.
6
+
7
+ Only runs on databases that already have tables — fresh installs skip
8
+ this entirely (init_db handles everything via CREATE TABLE IF NOT EXISTS).
9
+ """
10
+
11
+ import logging
12
+ import sqlite3
13
+
14
+ from footprinter.ingest.db.schema import _INGESTS_DDL, ACCESS_CONTROL_TABLES
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ _DISPLAY_NAME_BACKFILL = {
19
+ "files": "name",
20
+ "folders": "name",
21
+ "visits": "title",
22
+ "projects": "project_name",
23
+ "chats": "title",
24
+ "messages": "SUBSTR(content, 1, 100)",
25
+ "emails": "subject",
26
+ "clients": "name",
27
+ }
28
+
29
+
30
+ def migrate_schema(cursor: sqlite3.Cursor) -> None:
31
+ """Upgrade a pre-existing database to the current schema.
32
+
33
+ Adds missing columns, renames legacy columns, drops stale artefacts,
34
+ and migrates data where needed. Silently skips tables that don't
35
+ exist yet and columns that already exist.
36
+
37
+ Must run BEFORE ``PRAGMA foreign_keys=ON`` — the browser_visits →
38
+ visits rename triggers SQLite's schema rewriter which recompiles FK
39
+ references and fails on stale compiled references with FK enforcement.
40
+ """
41
+
42
+ # ── mcp_read / mcp_view on all entity tables ──
43
+ for table in ACCESS_CONTROL_TABLES:
44
+ for col, col_def in [
45
+ ("mcp_read", "TEXT DEFAULT 'inherit'"),
46
+ ("mcp_view", "TEXT DEFAULT 'inherit'"),
47
+ ]:
48
+ try:
49
+ cursor.execute(f"ALTER TABLE {table} ADD COLUMN {col} {col_def}")
50
+ except sqlite3.OperationalError:
51
+ pass # table doesn't exist yet or column already exists
52
+
53
+ # Drop stale artefacts from chat_conversations → chats rename.
54
+ for name in (
55
+ "chat_conversations_ai",
56
+ "chat_conversations_ad",
57
+ "chat_conversations_au",
58
+ "chats_ai",
59
+ "chats_ad",
60
+ "chats_au",
61
+ ):
62
+ cursor.execute(f"DROP TRIGGER IF EXISTS {name}")
63
+ cursor.execute("DROP TABLE IF EXISTS chat_conversations_fts")
64
+ # If browser_visits still exists, the RENAME TO visits below will
65
+ # trigger SQLite's schema rewriter. With foreign_keys ON, the
66
+ # rewriter recompiles FK references and hits the stale compiled
67
+ # chat_conversations FK. Dropping chats_fts BEFORE the rename
68
+ # prevents corruption. Skip on fresh/already-migrated DBs.
69
+ cursor.execute("SELECT 1 FROM sqlite_master WHERE type='table' AND name='browser_visits'")
70
+ if cursor.fetchone() is not None:
71
+ for name in ("chats_fts_ai", "chats_fts_ad", "chats_fts_au"):
72
+ cursor.execute(f"DROP TRIGGER IF EXISTS {name}")
73
+ cursor.execute("DROP TABLE IF EXISTS chats_fts")
74
+
75
+ # Rename indexed_drive_id → remote_file_id, indexed_drive_folder_id → remote_folder_id
76
+ for old, new, table in [
77
+ ("indexed_drive_id", "remote_file_id", "files"),
78
+ ("indexed_drive_folder_id", "remote_folder_id", "folders"),
79
+ ]:
80
+ try:
81
+ cursor.execute(f"ALTER TABLE {table} RENAME COLUMN {old} TO {new}")
82
+ except sqlite3.OperationalError:
83
+ pass # table doesn't exist yet or column already renamed
84
+
85
+ # Standardize column naming conventions
86
+ for old, new, table in [
87
+ ("last_scanned_at", "scanned_at", "folders"),
88
+ ("info_vectorized_at", "metadata_vectorized_at", "chats"),
89
+ ("direct_in_drive", "remote_file_count", "folders"),
90
+ ("total_in_drive", "remote_file_count_recursive", "folders"),
91
+ ("last_drive_check", "remote_checked_at", "folders"),
92
+ ]:
93
+ try:
94
+ cursor.execute(f"ALTER TABLE {table} RENAME COLUMN {old} TO {new}")
95
+ except sqlite3.OperationalError:
96
+ pass # table doesn't exist yet or column already renamed
97
+
98
+ # Rename artifact_count → file_count (missed in artifacts → files rename)
99
+ for old, new, table in [
100
+ ("direct_artifact_count", "direct_file_count", "folders"),
101
+ ("total_artifact_count", "total_file_count", "folders"),
102
+ ]:
103
+ try:
104
+ cursor.execute(f"ALTER TABLE {table} RENAME COLUMN {old} TO {new}")
105
+ except sqlite3.OperationalError:
106
+ pass # table doesn't exist yet or column already renamed
107
+
108
+ # Rename files.content_hash → sha256_hash
109
+ try:
110
+ cursor.execute("ALTER TABLE files RENAME COLUMN content_hash TO sha256_hash")
111
+ except sqlite3.OperationalError:
112
+ pass # table doesn't exist yet or column already renamed
113
+
114
+ # Data migration: Drive files stored MD5 in content_hash — move to md5_hash
115
+ try:
116
+ cursor.execute("""
117
+ UPDATE files SET md5_hash = sha256_hash, sha256_hash = NULL
118
+ WHERE source != 'local' AND sha256_hash IS NOT NULL AND md5_hash IS NULL
119
+ """)
120
+ except sqlite3.OperationalError:
121
+ pass # table doesn't exist yet on fresh install
122
+
123
+ # Drop duplicate total_size column (total_size_bytes is canonical)
124
+ try:
125
+ cursor.execute("ALTER TABLE folders DROP COLUMN total_size")
126
+ except sqlite3.OperationalError:
127
+ pass # column doesn't exist or already dropped
128
+
129
+ # Drop dead columns: written but never read
130
+ # Include old name (counts_updated_at) for DBs that were never
131
+ # migrated through the rename step.
132
+ for col, table in [
133
+ ("stats_updated_at", "folders"),
134
+ ("counts_updated_at", "folders"),
135
+ ("summarized_at", "emails"),
136
+ ]:
137
+ try:
138
+ cursor.execute(f"ALTER TABLE {table} DROP COLUMN {col}")
139
+ except sqlite3.OperationalError:
140
+ pass # column doesn't exist or table missing
141
+
142
+ # Drop orphan tables from old schema.
143
+ for table in ("artifact_sync_state", "file_ai_analysis", "permission_defaults", "visibility_defaults"):
144
+ cursor.execute(f"DROP TABLE IF EXISTS {table}")
145
+
146
+ # Retire dead tracking tables.
147
+ # Migrate the live watermark row before dropping the table.
148
+ # Create ingests early if needed — the main DDL is idempotent.
149
+ cursor.execute(_INGESTS_DDL)
150
+ try:
151
+ cursor.execute("SELECT stage, last_completed_at FROM pipeline_watermarks")
152
+ for row in cursor.fetchall():
153
+ stage = row[0] if isinstance(row, tuple) else row["stage"]
154
+ ts = row[1] if isinstance(row, tuple) else row["last_completed_at"]
155
+ if ts:
156
+ cursor.execute(
157
+ "INSERT INTO ingests (pipe, started_at, completed_at, status) "
158
+ "SELECT ?, ?, ?, 'completed' "
159
+ "WHERE NOT EXISTS ("
160
+ " SELECT 1 FROM ingests WHERE pipe = ? AND completed_at = ?"
161
+ ")",
162
+ (stage, ts, ts, stage, ts),
163
+ )
164
+ except sqlite3.OperationalError:
165
+ pass # table doesn't exist on fresh install
166
+ cursor.execute("DROP TABLE IF EXISTS pipeline_watermarks")
167
+ cursor.execute("DROP TABLE IF EXISTS runs")
168
+
169
+ # browser_visits columns added with status/client/project support
170
+ for col, col_def in [
171
+ ("status", "TEXT DEFAULT 'active'"),
172
+ ("client_id", "INTEGER"),
173
+ ("project_id", "INTEGER"),
174
+ ]:
175
+ try:
176
+ cursor.execute(f"ALTER TABLE browser_visits ADD COLUMN {col} {col_def}")
177
+ except sqlite3.OperationalError:
178
+ pass # table doesn't exist yet or column already exists
179
+
180
+ # emails: add status column
181
+ try:
182
+ cursor.execute("ALTER TABLE emails ADD COLUMN status TEXT DEFAULT 'active'")
183
+ except sqlite3.OperationalError:
184
+ pass # table doesn't exist yet or column already exists
185
+
186
+ # files: add client_id
187
+ try:
188
+ cursor.execute("ALTER TABLE files ADD COLUMN client_id INTEGER")
189
+ except sqlite3.OperationalError:
190
+ pass # table doesn't exist yet or column already exists
191
+
192
+ # Rename browser_visits → visits
193
+ # Add mcp columns to old table first — the ACCESS_CONTROL_TABLES loop
194
+ # above targets "visits" which doesn't exist yet on legacy DBs.
195
+ for col, col_def in [
196
+ ("mcp_read", "TEXT DEFAULT 'inherit'"),
197
+ ("mcp_view", "TEXT DEFAULT 'inherit'"),
198
+ ]:
199
+ try:
200
+ cursor.execute(f"ALTER TABLE browser_visits ADD COLUMN {col} {col_def}")
201
+ except sqlite3.OperationalError:
202
+ pass # table gone, or column exists
203
+ try:
204
+ for idx in [
205
+ "idx_browser_time",
206
+ "idx_browser_browser",
207
+ "idx_browser_visits_project",
208
+ "idx_browser_unique",
209
+ "idx_browser_visits_client",
210
+ "idx_browser_visits_status",
211
+ "idx_browser_visits_visibility",
212
+ ]:
213
+ cursor.execute(f"DROP INDEX IF EXISTS {idx}")
214
+ cursor.execute("ALTER TABLE browser_visits RENAME TO visits")
215
+ except sqlite3.OperationalError:
216
+ pass # already renamed or fresh install
217
+
218
+ # clients/projects: add status_reason column
219
+ for table in ("clients", "projects"):
220
+ try:
221
+ cursor.execute(f"ALTER TABLE {table} ADD COLUMN status_reason TEXT")
222
+ except sqlite3.OperationalError:
223
+ pass # table doesn't exist yet or column already exists
224
+
225
+ # ── standard entity column set ──
226
+
227
+ # folders: add status, client_id, indexed_at
228
+ for col, col_def in [
229
+ ("status", "TEXT DEFAULT 'active'"),
230
+ ("client_id", "INTEGER"),
231
+ ("indexed_at", "DATETIME"),
232
+ ]:
233
+ try:
234
+ cursor.execute(f"ALTER TABLE folders ADD COLUMN {col} {col_def}")
235
+ except sqlite3.OperationalError:
236
+ pass # table doesn't exist yet or column already exists
237
+
238
+ # messages: add status
239
+ try:
240
+ cursor.execute("ALTER TABLE messages ADD COLUMN status TEXT DEFAULT 'active'")
241
+ except sqlite3.OperationalError:
242
+ pass
243
+
244
+ # emails: add created_at (no DEFAULT — see visits comment above)
245
+ try:
246
+ cursor.execute("ALTER TABLE emails ADD COLUMN created_at DATETIME")
247
+ except sqlite3.OperationalError:
248
+ pass
249
+
250
+ # visits / browser_visits: add created_at
251
+ # Note: ALTER TABLE cannot use CURRENT_TIMESTAMP as default
252
+ # (non-constant), so we add without default. The CREATE TABLE
253
+ # DDL has the default for fresh DBs.
254
+ for table in ("visits", "browser_visits"):
255
+ try:
256
+ cursor.execute(f"ALTER TABLE {table} ADD COLUMN created_at DATETIME")
257
+ except sqlite3.OperationalError:
258
+ pass
259
+
260
+ # display_name on all 8 entity tables
261
+ for table in ACCESS_CONTROL_TABLES:
262
+ try:
263
+ cursor.execute(f"ALTER TABLE {table} ADD COLUMN display_name TEXT")
264
+ except sqlite3.OperationalError:
265
+ pass
266
+
267
+ # Backfill display_name from source columns for existing rows
268
+ for table, source_col in _DISPLAY_NAME_BACKFILL.items():
269
+ try:
270
+ cursor.execute(f"UPDATE {table} SET display_name = {source_col} WHERE display_name IS NULL")
271
+ except sqlite3.OperationalError:
272
+ pass # table doesn't exist yet
273
+
274
+ # ── Timestamp column standardization ──
275
+
276
+ # Rename chats.updated_at → modified_at (origin timestamp)
277
+ try:
278
+ cursor.execute("ALTER TABLE chats RENAME COLUMN updated_at TO modified_at")
279
+ except sqlite3.OperationalError:
280
+ pass # already renamed or table doesn't exist
281
+
282
+ # Add updated_at audit column to all 6 entity tables
283
+ for table in ("files", "folders", "visits", "chats", "messages", "emails"):
284
+ try:
285
+ cursor.execute(f"ALTER TABLE {table} ADD COLUMN updated_at DATETIME")
286
+ except sqlite3.OperationalError:
287
+ pass # column already exists or table doesn't exist
288
+
289
+ # Add indexed_at to messages (was missing)
290
+ try:
291
+ cursor.execute("ALTER TABLE messages ADD COLUMN indexed_at DATETIME")
292
+ except sqlite3.OperationalError:
293
+ pass # column already exists
294
+
295
+ # Add vectorized_chunks to messages (matches files pattern)
296
+ try:
297
+ cursor.execute("ALTER TABLE messages ADD COLUMN vectorized_chunks INTEGER DEFAULT 0")
298
+ except sqlite3.OperationalError:
299
+ pass # column already exists
300
+
301
+ # Backfill new audit columns from existing data
302
+ _timestamp_backfill = {
303
+ "files": "UPDATE files SET updated_at = indexed_at WHERE updated_at IS NULL",
304
+ "folders": "UPDATE folders SET updated_at = indexed_at WHERE updated_at IS NULL",
305
+ "visits": "UPDATE visits SET updated_at = indexed_at WHERE updated_at IS NULL",
306
+ "chats": "UPDATE chats SET updated_at = indexed_at WHERE updated_at IS NULL",
307
+ "emails": "UPDATE emails SET updated_at = indexed_at WHERE updated_at IS NULL",
308
+ "messages_indexed": "UPDATE messages SET indexed_at = created_at WHERE indexed_at IS NULL",
309
+ "messages_updated": "UPDATE messages SET updated_at = created_at WHERE updated_at IS NULL",
310
+ }
311
+ for label, sql in _timestamp_backfill.items():
312
+ try:
313
+ cursor.execute(sql)
314
+ except sqlite3.OperationalError:
315
+ pass # table doesn't exist yet