footprinter-cli 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +431 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
- footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
- footprinter/bundled/samples/visible-file-sample.txt +2 -0
- footprinter/cli/__init__.py +135 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +327 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/_sample_seed.py +204 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +543 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +2001 -0
- footprinter/cli/status.py +747 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +602 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +724 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +487 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +315 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +223 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +112 -0
- footprinter/ingest/pipe_runner.py +200 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +186 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +67 -0
- footprinter/mcp/errors.py +105 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +19 -0
- footprinter/paths.py +117 -0
- footprinter/permissions.py +1152 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1264 -0
- footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
- footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
- footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1043 @@
|
|
|
1
|
+
"""Database schema initialization."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sqlite3
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Standard Entity Column Set
|
|
11
|
+
# ─────────────────────────
|
|
12
|
+
# All 8 entity tables (files, folders, visits, projects, chats,
|
|
13
|
+
# messages, emails, clients) share these baseline columns:
|
|
14
|
+
#
|
|
15
|
+
# id INTEGER PRIMARY KEY AUTOINCREMENT
|
|
16
|
+
# status TEXT DEFAULT 'active' CHECK (active|hidden|removed)
|
|
17
|
+
# created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
18
|
+
# display_name TEXT (auto-populated via trigger)
|
|
19
|
+
# mcp_read TEXT DEFAULT 'inherit' CHECK (allow|deny|inherit)
|
|
20
|
+
# mcp_view TEXT DEFAULT 'inherit' CHECK (hidden|opaque|visible|inherit)
|
|
21
|
+
#
|
|
22
|
+
# Data-source entities (files, folders, emails, chats, visits, messages)
|
|
23
|
+
# also have audit timestamp columns:
|
|
24
|
+
# indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP (immutable first-seen)
|
|
25
|
+
# updated_at DATETIME DEFAULT CURRENT_TIMESTAMP (refreshed on re-process)
|
|
26
|
+
# project_id INTEGER REFERENCES projects(id)
|
|
27
|
+
# client_id INTEGER REFERENCES clients(id)
|
|
28
|
+
#
|
|
29
|
+
# Timestamp format: YYYY-MM-DD HH:MM:SS (UTC, matches SQLite CURRENT_TIMESTAMP).
|
|
30
|
+
# Python code uses utils.time.UTC_FMT / utc_now_iso() for the same format.
|
|
31
|
+
#
|
|
32
|
+
# Source-specific metadata is stored in the `metadata` TEXT column
|
|
33
|
+
# (JSON) on tables that need it: files, projects, chats, messages,
|
|
34
|
+
# emails, clients.
|
|
35
|
+
#
|
|
36
|
+
# Columns populated by app or future scope
|
|
37
|
+
# ─────────────────────────────────────────
|
|
38
|
+
# summary TEXT — AI-generated summary (files, emails, chats)
|
|
39
|
+
# summarized_at DATETIME — when summary was generated (files only)
|
|
40
|
+
#
|
|
41
|
+
# files_fts and chats_fts reference the summary column via FTS5
|
|
42
|
+
# triggers, so summary stays in the standard schema. emails also
|
|
43
|
+
# has summary for consistency. Tool-only installs leave them NULL.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Single source of truth for FTS5 virtual table definitions.
|
|
47
|
+
# All CREATE TABLE, backfill, and trigger SQL is derived from this.
|
|
48
|
+
_FTS_DEFINITIONS: dict[str, dict[str, Any]] = {
|
|
49
|
+
"files_fts": {
|
|
50
|
+
"base_table": "files",
|
|
51
|
+
"columns": ["name", "content_preview", "summary"],
|
|
52
|
+
"content_columns": ["content_preview", "summary"],
|
|
53
|
+
},
|
|
54
|
+
"emails_fts": {
|
|
55
|
+
"base_table": "emails",
|
|
56
|
+
"columns": ["subject", "from_name", "from_address", "body_preview"],
|
|
57
|
+
"content_columns": ["body_preview"],
|
|
58
|
+
},
|
|
59
|
+
"chats_fts": {
|
|
60
|
+
"base_table": "chats",
|
|
61
|
+
"columns": ["title", "summary"],
|
|
62
|
+
"content_columns": ["summary"],
|
|
63
|
+
},
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# Single source of truth for the ingests table DDL.
|
|
67
|
+
# Referenced by both migration.py (early creation for last-run migration)
|
|
68
|
+
# and init_db() (canonical DDL).
|
|
69
|
+
_INGESTS_DDL = (
|
|
70
|
+
"CREATE TABLE IF NOT EXISTS ingests ("
|
|
71
|
+
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
|
72
|
+
"pipe TEXT NOT NULL, "
|
|
73
|
+
"started_at DATETIME NOT NULL, "
|
|
74
|
+
"completed_at DATETIME, "
|
|
75
|
+
"status TEXT NOT NULL DEFAULT 'running' "
|
|
76
|
+
" CHECK (status IN ('running', 'completed', 'failed', 'interrupted')), "
|
|
77
|
+
"mode TEXT, "
|
|
78
|
+
"trigger TEXT, "
|
|
79
|
+
"items_processed INTEGER DEFAULT 0, "
|
|
80
|
+
"items_new INTEGER DEFAULT 0, "
|
|
81
|
+
"items_updated INTEGER DEFAULT 0, "
|
|
82
|
+
"items_skipped INTEGER DEFAULT 0, "
|
|
83
|
+
"errors INTEGER DEFAULT 0, "
|
|
84
|
+
"elapsed_seconds REAL, "
|
|
85
|
+
"metadata TEXT)"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# All 8 entity tables that carry mcp_read / mcp_view columns.
|
|
90
|
+
# Shared by init_db() (display_name triggers) and migration.py.
|
|
91
|
+
ACCESS_CONTROL_TABLES = (
|
|
92
|
+
"files",
|
|
93
|
+
"folders",
|
|
94
|
+
"visits",
|
|
95
|
+
"projects",
|
|
96
|
+
"chats",
|
|
97
|
+
"messages",
|
|
98
|
+
"emails",
|
|
99
|
+
"clients",
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class SchemaMixin:
|
|
104
|
+
"""Mixin providing database schema initialization."""
|
|
105
|
+
|
|
106
|
+
def init_db(self):
|
|
107
|
+
"""Initialize database with schema."""
|
|
108
|
+
self.conn = sqlite3.connect(self.db_path, timeout=10)
|
|
109
|
+
self.conn.row_factory = sqlite3.Row
|
|
110
|
+
self.conn.execute("PRAGMA journal_mode=WAL")
|
|
111
|
+
self.conn.execute("PRAGMA busy_timeout=5000")
|
|
112
|
+
|
|
113
|
+
cursor = self.conn.cursor()
|
|
114
|
+
|
|
115
|
+
# Only run migration on existing databases (not fresh installs).
|
|
116
|
+
cursor.execute("SELECT 1 FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
|
|
117
|
+
if cursor.fetchone() is not None:
|
|
118
|
+
from footprinter.ingest.db.migration import migrate_schema
|
|
119
|
+
|
|
120
|
+
migrate_schema(cursor)
|
|
121
|
+
|
|
122
|
+
# Enable FK enforcement AFTER migrations. The browser_visits →
|
|
123
|
+
# visits rename triggers SQLite's schema rewriter which recompiles
|
|
124
|
+
# FK references. The messages table's FK was originally REFERENCES
|
|
125
|
+
# chat_conversations(id); with foreign_keys ON the rewriter
|
|
126
|
+
# validates the stale compiled reference and fails.
|
|
127
|
+
self.conn.execute("PRAGMA foreign_keys=ON")
|
|
128
|
+
|
|
129
|
+
# ========================================
|
|
130
|
+
# Files Table (unified content metadata)
|
|
131
|
+
# ========================================
|
|
132
|
+
cursor.execute(
|
|
133
|
+
"""
|
|
134
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
135
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
136
|
+
|
|
137
|
+
-- Source identification
|
|
138
|
+
source TEXT NOT NULL,
|
|
139
|
+
external_id TEXT,
|
|
140
|
+
account TEXT,
|
|
141
|
+
|
|
142
|
+
-- Core file info
|
|
143
|
+
name TEXT NOT NULL,
|
|
144
|
+
path TEXT,
|
|
145
|
+
content_type TEXT,
|
|
146
|
+
mime_type TEXT,
|
|
147
|
+
size_bytes INTEGER,
|
|
148
|
+
|
|
149
|
+
-- Origin timestamps
|
|
150
|
+
created_at DATETIME,
|
|
151
|
+
modified_at DATETIME,
|
|
152
|
+
accessed_at DATETIME,
|
|
153
|
+
|
|
154
|
+
-- Audit timestamps
|
|
155
|
+
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
156
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
157
|
+
|
|
158
|
+
-- Content
|
|
159
|
+
content_preview TEXT,
|
|
160
|
+
sha256_hash TEXT,
|
|
161
|
+
|
|
162
|
+
-- Vectorization status
|
|
163
|
+
vectorized_at DATETIME,
|
|
164
|
+
vectorized_chunks INTEGER DEFAULT 0,
|
|
165
|
+
|
|
166
|
+
-- Project/client association
|
|
167
|
+
project_id INTEGER REFERENCES projects(id),
|
|
168
|
+
client_id INTEGER REFERENCES clients(id),
|
|
169
|
+
|
|
170
|
+
-- Flexible metadata (source-specific fields as JSON)
|
|
171
|
+
metadata TEXT,
|
|
172
|
+
|
|
173
|
+
-- Folder linkage
|
|
174
|
+
folder_id INTEGER REFERENCES folders(id),
|
|
175
|
+
|
|
176
|
+
-- Hash for Drive linking
|
|
177
|
+
md5_hash TEXT,
|
|
178
|
+
|
|
179
|
+
-- Status tracking
|
|
180
|
+
status TEXT DEFAULT 'active'
|
|
181
|
+
CHECK (status IN ('active', 'hidden', 'removed')),
|
|
182
|
+
status_reason TEXT,
|
|
183
|
+
status_changed_at DATETIME,
|
|
184
|
+
|
|
185
|
+
-- MCP access control
|
|
186
|
+
mcp_read TEXT DEFAULT 'inherit'
|
|
187
|
+
CHECK (mcp_read IN ('allow', 'deny', 'inherit')),
|
|
188
|
+
mcp_view TEXT DEFAULT 'inherit'
|
|
189
|
+
CHECK (mcp_view IN ('hidden', 'opaque', 'visible', 'inherit')),
|
|
190
|
+
|
|
191
|
+
-- AI-generated summaries
|
|
192
|
+
summary TEXT,
|
|
193
|
+
summarized_at DATETIME,
|
|
194
|
+
|
|
195
|
+
-- Display
|
|
196
|
+
display_name TEXT
|
|
197
|
+
)
|
|
198
|
+
"""
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Files indexes
|
|
202
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_source ON files(source)")
|
|
203
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)")
|
|
204
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_modified ON files(modified_at)")
|
|
205
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_type ON files(content_type)")
|
|
206
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_project ON files(project_id)")
|
|
207
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_hash ON files(sha256_hash)")
|
|
208
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_account ON files(account)")
|
|
209
|
+
|
|
210
|
+
cursor.execute(
|
|
211
|
+
"""
|
|
212
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_files_local_unique
|
|
213
|
+
ON files(source, path)
|
|
214
|
+
WHERE source = 'local' AND path IS NOT NULL
|
|
215
|
+
"""
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
cursor.execute(
|
|
219
|
+
"""
|
|
220
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_files_drive_unique
|
|
221
|
+
ON files(source, external_id, account)
|
|
222
|
+
WHERE source != 'local' AND external_id IS NOT NULL
|
|
223
|
+
"""
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_folder ON files(folder_id)")
|
|
227
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_md5 ON files(md5_hash)")
|
|
228
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_status ON files(status)")
|
|
229
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_visibility ON files(mcp_view)")
|
|
230
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_client ON files(client_id)")
|
|
231
|
+
|
|
232
|
+
# ========================================
|
|
233
|
+
# Folders Table
|
|
234
|
+
# ========================================
|
|
235
|
+
cursor.execute(
|
|
236
|
+
"""
|
|
237
|
+
CREATE TABLE IF NOT EXISTS folders (
|
|
238
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
239
|
+
|
|
240
|
+
-- Core folder info
|
|
241
|
+
path TEXT NOT NULL,
|
|
242
|
+
relative_path TEXT NOT NULL,
|
|
243
|
+
name TEXT NOT NULL,
|
|
244
|
+
parent_path TEXT,
|
|
245
|
+
|
|
246
|
+
-- Stats
|
|
247
|
+
file_count INTEGER DEFAULT 0,
|
|
248
|
+
|
|
249
|
+
-- Timestamps
|
|
250
|
+
scanned_at DATETIME,
|
|
251
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
252
|
+
|
|
253
|
+
-- Project association
|
|
254
|
+
project_id INTEGER REFERENCES projects(id),
|
|
255
|
+
|
|
256
|
+
-- Source identification (for remote folders)
|
|
257
|
+
source TEXT DEFAULT 'local',
|
|
258
|
+
external_id TEXT,
|
|
259
|
+
account TEXT,
|
|
260
|
+
|
|
261
|
+
-- Hierarchy
|
|
262
|
+
parent_folder_id INTEGER REFERENCES folders(id),
|
|
263
|
+
|
|
264
|
+
-- Pre-computed counts
|
|
265
|
+
direct_file_count INTEGER DEFAULT 0,
|
|
266
|
+
total_file_count INTEGER DEFAULT 0,
|
|
267
|
+
total_size_bytes INTEGER DEFAULT 0,
|
|
268
|
+
|
|
269
|
+
-- Status tracking
|
|
270
|
+
status TEXT DEFAULT 'active'
|
|
271
|
+
CHECK (status IN ('active', 'hidden', 'removed')),
|
|
272
|
+
|
|
273
|
+
-- Audit timestamps
|
|
274
|
+
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
275
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
276
|
+
|
|
277
|
+
-- Client association
|
|
278
|
+
client_id INTEGER REFERENCES clients(id),
|
|
279
|
+
|
|
280
|
+
-- MCP access control
|
|
281
|
+
mcp_view TEXT DEFAULT 'inherit'
|
|
282
|
+
CHECK (mcp_view IN ('hidden', 'opaque', 'visible', 'inherit')),
|
|
283
|
+
mcp_read TEXT DEFAULT 'inherit'
|
|
284
|
+
CHECK (mcp_read IN ('allow', 'deny', 'inherit')),
|
|
285
|
+
|
|
286
|
+
-- Display
|
|
287
|
+
display_name TEXT
|
|
288
|
+
)
|
|
289
|
+
"""
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_folders_path ON folders(path)")
|
|
293
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_folders_project ON folders(project_id)")
|
|
294
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_folders_source ON folders(source)")
|
|
295
|
+
cursor.execute(
|
|
296
|
+
"CREATE UNIQUE INDEX IF NOT EXISTS idx_folders_unique_path ON folders(path) WHERE source = 'local'"
|
|
297
|
+
)
|
|
298
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_folders_visibility ON folders(mcp_view)")
|
|
299
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_folders_status ON folders(status)")
|
|
300
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_folders_client ON folders(client_id)")
|
|
301
|
+
|
|
302
|
+
# ========================================
|
|
303
|
+
# Visits Table
|
|
304
|
+
# ========================================
|
|
305
|
+
cursor.execute(
|
|
306
|
+
"""
|
|
307
|
+
CREATE TABLE IF NOT EXISTS visits (
|
|
308
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
309
|
+
url TEXT NOT NULL,
|
|
310
|
+
title TEXT,
|
|
311
|
+
visit_time DATETIME NOT NULL,
|
|
312
|
+
browser TEXT NOT NULL,
|
|
313
|
+
visit_count INTEGER DEFAULT 1,
|
|
314
|
+
|
|
315
|
+
-- Audit timestamps
|
|
316
|
+
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
317
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
318
|
+
|
|
319
|
+
-- Status tracking
|
|
320
|
+
status TEXT DEFAULT 'active'
|
|
321
|
+
CHECK (status IN ('active', 'hidden', 'removed')),
|
|
322
|
+
|
|
323
|
+
-- MCP access control
|
|
324
|
+
mcp_read TEXT DEFAULT 'inherit'
|
|
325
|
+
CHECK (mcp_read IN ('allow', 'deny', 'inherit')),
|
|
326
|
+
mcp_view TEXT DEFAULT 'inherit'
|
|
327
|
+
CHECK (mcp_view IN ('hidden', 'opaque', 'visible', 'inherit')),
|
|
328
|
+
|
|
329
|
+
-- Origin timestamps
|
|
330
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
331
|
+
|
|
332
|
+
-- Client/project association
|
|
333
|
+
client_id INTEGER REFERENCES clients(id),
|
|
334
|
+
project_id INTEGER REFERENCES projects(id),
|
|
335
|
+
|
|
336
|
+
-- Display
|
|
337
|
+
display_name TEXT
|
|
338
|
+
)
|
|
339
|
+
"""
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_visits_time ON visits(visit_time)")
|
|
343
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_visits_browser ON visits(browser)")
|
|
344
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_visits_project ON visits(project_id)")
|
|
345
|
+
cursor.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_visits_unique ON visits(url, visit_time, browser)")
|
|
346
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_visits_client ON visits(client_id)")
|
|
347
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_visits_status ON visits(status)")
|
|
348
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_visits_visibility ON visits(mcp_view)")
|
|
349
|
+
|
|
350
|
+
# ========================================
|
|
351
|
+
# Projects Table
|
|
352
|
+
# ========================================
|
|
353
|
+
cursor.execute(
|
|
354
|
+
"""
|
|
355
|
+
CREATE TABLE IF NOT EXISTS projects (
|
|
356
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
357
|
+
project_name TEXT NOT NULL,
|
|
358
|
+
description TEXT,
|
|
359
|
+
status TEXT DEFAULT 'active'
|
|
360
|
+
CHECK (status IN ('active', 'hidden', 'removed',
|
|
361
|
+
'paused', 'completed', 'abandoned',
|
|
362
|
+
'archived', 'merged')),
|
|
363
|
+
status_reason TEXT,
|
|
364
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
365
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
366
|
+
metadata TEXT,
|
|
367
|
+
|
|
368
|
+
-- Code project info (app-scope adds more columns)
|
|
369
|
+
root_path TEXT,
|
|
370
|
+
project_type TEXT,
|
|
371
|
+
|
|
372
|
+
-- Client association
|
|
373
|
+
client_id INTEGER REFERENCES clients(id),
|
|
374
|
+
client TEXT,
|
|
375
|
+
github_url TEXT,
|
|
376
|
+
root_folder_id INTEGER REFERENCES folders(id),
|
|
377
|
+
|
|
378
|
+
-- MCP access control
|
|
379
|
+
mcp_read TEXT DEFAULT 'inherit'
|
|
380
|
+
CHECK (mcp_read IN ('allow', 'deny', 'inherit')),
|
|
381
|
+
mcp_view TEXT DEFAULT 'inherit'
|
|
382
|
+
CHECK (mcp_view IN ('hidden', 'opaque', 'visible', 'inherit')),
|
|
383
|
+
|
|
384
|
+
-- Display
|
|
385
|
+
display_name TEXT
|
|
386
|
+
)
|
|
387
|
+
"""
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
cursor.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_projects_root ON projects(root_path)")
|
|
391
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_projects_client ON projects(client_id)")
|
|
392
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_projects_visibility ON projects(mcp_view)")
|
|
393
|
+
|
|
394
|
+
# ========================================
|
|
395
|
+
# Chats Table
|
|
396
|
+
# ========================================
|
|
397
|
+
cursor.execute(
|
|
398
|
+
"""
|
|
399
|
+
CREATE TABLE IF NOT EXISTS chats (
|
|
400
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
401
|
+
external_id TEXT UNIQUE NOT NULL,
|
|
402
|
+
account TEXT NOT NULL,
|
|
403
|
+
title TEXT,
|
|
404
|
+
summary TEXT,
|
|
405
|
+
|
|
406
|
+
-- Origin timestamps
|
|
407
|
+
created_at DATETIME,
|
|
408
|
+
modified_at DATETIME,
|
|
409
|
+
|
|
410
|
+
message_count INTEGER DEFAULT 0,
|
|
411
|
+
|
|
412
|
+
-- Audit timestamps
|
|
413
|
+
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
414
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
415
|
+
|
|
416
|
+
metadata TEXT,
|
|
417
|
+
|
|
418
|
+
-- Vectorization
|
|
419
|
+
metadata_vectorized_at DATETIME,
|
|
420
|
+
|
|
421
|
+
-- Status tracking
|
|
422
|
+
status TEXT DEFAULT 'active'
|
|
423
|
+
CHECK (status IN ('active', 'hidden', 'removed', 'merged')),
|
|
424
|
+
|
|
425
|
+
-- MCP access control
|
|
426
|
+
mcp_read TEXT DEFAULT 'inherit'
|
|
427
|
+
CHECK (mcp_read IN ('allow', 'deny', 'inherit')),
|
|
428
|
+
mcp_view TEXT DEFAULT 'inherit'
|
|
429
|
+
CHECK (mcp_view IN ('hidden', 'opaque', 'visible', 'inherit')),
|
|
430
|
+
|
|
431
|
+
-- Client/project association
|
|
432
|
+
client_id INTEGER REFERENCES clients(id),
|
|
433
|
+
project_id INTEGER REFERENCES projects(id),
|
|
434
|
+
|
|
435
|
+
-- Merge tracking
|
|
436
|
+
merged_into_id INTEGER REFERENCES chats(id),
|
|
437
|
+
|
|
438
|
+
-- Display
|
|
439
|
+
display_name TEXT
|
|
440
|
+
)
|
|
441
|
+
"""
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_chat_conv_created ON chats(created_at)")
|
|
445
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_chat_conv_account ON chats(account)")
|
|
446
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_chat_conv_status ON chats(status)")
|
|
447
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_chats_client ON chats(client_id)")
|
|
448
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_chats_project ON chats(project_id)")
|
|
449
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_chats_visibility ON chats(mcp_view)")
|
|
450
|
+
|
|
451
|
+
# ========================================
|
|
452
|
+
# Messages Table
|
|
453
|
+
# ========================================
|
|
454
|
+
cursor.execute(
|
|
455
|
+
"""
|
|
456
|
+
CREATE TABLE IF NOT EXISTS messages (
|
|
457
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
458
|
+
chat_id INTEGER NOT NULL,
|
|
459
|
+
message_id TEXT,
|
|
460
|
+
role TEXT NOT NULL,
|
|
461
|
+
content TEXT,
|
|
462
|
+
created_at DATETIME,
|
|
463
|
+
metadata TEXT,
|
|
464
|
+
vectorized_at DATETIME,
|
|
465
|
+
vectorized_chunks INTEGER DEFAULT 0,
|
|
466
|
+
|
|
467
|
+
-- Audit timestamps
|
|
468
|
+
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
469
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
470
|
+
|
|
471
|
+
-- Status tracking
|
|
472
|
+
status TEXT DEFAULT 'active'
|
|
473
|
+
CHECK (status IN ('active', 'hidden', 'removed')),
|
|
474
|
+
|
|
475
|
+
-- MCP access control
|
|
476
|
+
mcp_read TEXT DEFAULT 'inherit'
|
|
477
|
+
CHECK (mcp_read IN ('allow', 'deny', 'inherit')),
|
|
478
|
+
mcp_view TEXT DEFAULT 'inherit'
|
|
479
|
+
CHECK (mcp_view IN ('hidden', 'opaque', 'visible', 'inherit')),
|
|
480
|
+
|
|
481
|
+
-- Display
|
|
482
|
+
display_name TEXT,
|
|
483
|
+
|
|
484
|
+
FOREIGN KEY (chat_id) REFERENCES chats(id)
|
|
485
|
+
)
|
|
486
|
+
"""
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_chat_msg_conv ON messages(chat_id)")
|
|
490
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_chat_msg_created ON messages(created_at)")
|
|
491
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_messages_visibility ON messages(mcp_view)")
|
|
492
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_messages_status ON messages(status)")
|
|
493
|
+
|
|
494
|
+
# ========================================
|
|
495
|
+
# Emails Table
|
|
496
|
+
# ========================================
|
|
497
|
+
cursor.execute(
|
|
498
|
+
"""
|
|
499
|
+
CREATE TABLE IF NOT EXISTS emails (
|
|
500
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
501
|
+
message_id TEXT NOT NULL,
|
|
502
|
+
thread_id TEXT NOT NULL,
|
|
503
|
+
account TEXT NOT NULL,
|
|
504
|
+
from_address TEXT,
|
|
505
|
+
from_name TEXT,
|
|
506
|
+
to_addresses TEXT,
|
|
507
|
+
cc_addresses TEXT,
|
|
508
|
+
subject TEXT,
|
|
509
|
+
body_preview TEXT,
|
|
510
|
+
received_at DATETIME NOT NULL,
|
|
511
|
+
labels TEXT,
|
|
512
|
+
has_attachments BOOLEAN DEFAULT 0,
|
|
513
|
+
is_read BOOLEAN DEFAULT 1,
|
|
514
|
+
|
|
515
|
+
-- Audit timestamps
|
|
516
|
+
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
517
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
518
|
+
|
|
519
|
+
metadata TEXT,
|
|
520
|
+
|
|
521
|
+
-- Status tracking
|
|
522
|
+
status TEXT DEFAULT 'active'
|
|
523
|
+
CHECK (status IN ('active', 'hidden', 'removed')),
|
|
524
|
+
|
|
525
|
+
-- MCP access control
|
|
526
|
+
mcp_read TEXT DEFAULT 'inherit'
|
|
527
|
+
CHECK (mcp_read IN ('allow', 'deny', 'inherit')),
|
|
528
|
+
mcp_view TEXT DEFAULT 'inherit'
|
|
529
|
+
CHECK (mcp_view IN ('hidden', 'opaque', 'visible', 'inherit')),
|
|
530
|
+
|
|
531
|
+
-- AI-generated summaries
|
|
532
|
+
summary TEXT,
|
|
533
|
+
|
|
534
|
+
-- Timestamps
|
|
535
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
536
|
+
|
|
537
|
+
-- Client/project association
|
|
538
|
+
client_id INTEGER REFERENCES clients(id),
|
|
539
|
+
project_id INTEGER REFERENCES projects(id),
|
|
540
|
+
|
|
541
|
+
-- Display
|
|
542
|
+
display_name TEXT,
|
|
543
|
+
|
|
544
|
+
UNIQUE(message_id, account)
|
|
545
|
+
)
|
|
546
|
+
"""
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_email_account ON emails(account)")
|
|
550
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_email_received ON emails(received_at)")
|
|
551
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_email_from ON emails(from_address)")
|
|
552
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_email_thread ON emails(thread_id)")
|
|
553
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_emails_client ON emails(client_id)")
|
|
554
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_emails_project ON emails(project_id)")
|
|
555
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_emails_visibility ON emails(mcp_view)")
|
|
556
|
+
|
|
557
|
+
# ========================================
|
|
558
|
+
# Clients Table
|
|
559
|
+
# ========================================
|
|
560
|
+
cursor.execute(
|
|
561
|
+
"""
|
|
562
|
+
CREATE TABLE IF NOT EXISTS clients (
|
|
563
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
564
|
+
name TEXT NOT NULL UNIQUE,
|
|
565
|
+
slug TEXT NOT NULL UNIQUE,
|
|
566
|
+
client_type TEXT NOT NULL,
|
|
567
|
+
path_pattern TEXT,
|
|
568
|
+
status TEXT DEFAULT 'active'
|
|
569
|
+
CHECK (status IN ('active', 'hidden', 'removed')),
|
|
570
|
+
status_reason TEXT,
|
|
571
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
572
|
+
metadata TEXT,
|
|
573
|
+
|
|
574
|
+
-- MCP access control
|
|
575
|
+
mcp_read TEXT DEFAULT 'inherit'
|
|
576
|
+
CHECK (mcp_read IN ('allow', 'deny', 'inherit')),
|
|
577
|
+
mcp_view TEXT DEFAULT 'inherit'
|
|
578
|
+
CHECK (mcp_view IN ('hidden', 'opaque', 'visible', 'inherit')),
|
|
579
|
+
|
|
580
|
+
-- Display
|
|
581
|
+
display_name TEXT
|
|
582
|
+
)
|
|
583
|
+
"""
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_clients_slug ON clients(slug)")
|
|
587
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_clients_type ON clients(client_type)")
|
|
588
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_clients_visibility ON clients(mcp_view)")
|
|
589
|
+
|
|
590
|
+
# ========================================
|
|
591
|
+
# Sources Table (runtime registry)
|
|
592
|
+
# ========================================
|
|
593
|
+
cursor.execute(
|
|
594
|
+
"""
|
|
595
|
+
CREATE TABLE IF NOT EXISTS sources (
|
|
596
|
+
name TEXT PRIMARY KEY,
|
|
597
|
+
source_type TEXT NOT NULL,
|
|
598
|
+
adapter TEXT,
|
|
599
|
+
account TEXT,
|
|
600
|
+
label TEXT,
|
|
601
|
+
icon TEXT,
|
|
602
|
+
enabled INTEGER DEFAULT 1,
|
|
603
|
+
config TEXT,
|
|
604
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
605
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
606
|
+
)
|
|
607
|
+
"""
|
|
608
|
+
)
|
|
609
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_sources_type ON sources(source_type)")
|
|
610
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_sources_enabled ON sources(enabled)")
|
|
611
|
+
|
|
612
|
+
# ========================================
|
|
613
|
+
# Uploads Table (generic upload log)
|
|
614
|
+
# ========================================
|
|
615
|
+
cursor.execute(
|
|
616
|
+
"""
|
|
617
|
+
CREATE TABLE IF NOT EXISTS uploads (
|
|
618
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
619
|
+
filename TEXT NOT NULL,
|
|
620
|
+
file_hash TEXT NOT NULL UNIQUE,
|
|
621
|
+
file_size INTEGER,
|
|
622
|
+
type TEXT NOT NULL,
|
|
623
|
+
source TEXT,
|
|
624
|
+
items_added INTEGER DEFAULT 0,
|
|
625
|
+
items_updated INTEGER DEFAULT 0,
|
|
626
|
+
items_total INTEGER DEFAULT 0,
|
|
627
|
+
status TEXT DEFAULT 'pending',
|
|
628
|
+
error_message TEXT,
|
|
629
|
+
uploaded_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
630
|
+
completed_at DATETIME,
|
|
631
|
+
metadata TEXT
|
|
632
|
+
)
|
|
633
|
+
"""
|
|
634
|
+
)
|
|
635
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_uploads_type ON uploads(type)")
|
|
636
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_uploads_hash ON uploads(file_hash)")
|
|
637
|
+
|
|
638
|
+
# ========================================
|
|
639
|
+
# Permission Policies Table
|
|
640
|
+
# ========================================
|
|
641
|
+
cursor.execute(
|
|
642
|
+
"""
|
|
643
|
+
CREATE TABLE IF NOT EXISTS permission_policies (
|
|
644
|
+
scope TEXT PRIMARY KEY,
|
|
645
|
+
setting TEXT NOT NULL CHECK (setting IN ('allow', 'deny')),
|
|
646
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
647
|
+
)
|
|
648
|
+
"""
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
# ========================================
|
|
652
|
+
# Visibility Policies Table
|
|
653
|
+
# ========================================
|
|
654
|
+
cursor.execute(
|
|
655
|
+
"""
|
|
656
|
+
CREATE TABLE IF NOT EXISTS visibility_policies (
|
|
657
|
+
scope TEXT PRIMARY KEY,
|
|
658
|
+
setting TEXT NOT NULL CHECK (setting IN ('hidden', 'opaque', 'visible')),
|
|
659
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
660
|
+
)
|
|
661
|
+
"""
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
# ========================================
|
|
665
|
+
# Ingests Table (per-pipe run history)
|
|
666
|
+
# ========================================
|
|
667
|
+
cursor.execute(_INGESTS_DDL)
|
|
668
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_ingests_pipe_status ON ingests(pipe, status)")
|
|
669
|
+
|
|
670
|
+
# ========================================
|
|
671
|
+
# FTS5 Full-Text Search Indexes
|
|
672
|
+
# ========================================
|
|
673
|
+
for fts_table in _FTS_DEFINITIONS:
|
|
674
|
+
try:
|
|
675
|
+
cursor.execute(self._fts_create_sql(fts_table, if_not_exists=True))
|
|
676
|
+
except sqlite3.OperationalError as e:
|
|
677
|
+
if "no such module: fts5" in str(e):
|
|
678
|
+
logger.warning(
|
|
679
|
+
"FTS5 not available — %s keyword search will use LIKE fallback",
|
|
680
|
+
_FTS_DEFINITIONS[fts_table]["base_table"],
|
|
681
|
+
)
|
|
682
|
+
else:
|
|
683
|
+
raise
|
|
684
|
+
|
|
685
|
+
# Drop old FTS _au triggers so they can be recreated with WHEN
|
|
686
|
+
# clauses (prevents spurious re-indexing on non-FTS column updates).
|
|
687
|
+
for fts_table in _FTS_DEFINITIONS:
|
|
688
|
+
cursor.execute(f"DROP TRIGGER IF EXISTS {fts_table}_au")
|
|
689
|
+
|
|
690
|
+
# Create all FTS triggers (shared with rebuild_fts_indexes)
|
|
691
|
+
self.create_fts_triggers()
|
|
692
|
+
|
|
693
|
+
# ========================================
|
|
694
|
+
# FTS5 Backfill (idempotent)
|
|
695
|
+
# ========================================
|
|
696
|
+
try:
|
|
697
|
+
for fts_table in _FTS_DEFINITIONS:
|
|
698
|
+
cursor.execute(f"SELECT COUNT(*) FROM {fts_table}")
|
|
699
|
+
if cursor.fetchone()[0] == 0:
|
|
700
|
+
cursor.execute(self._fts_backfill_sql(fts_table))
|
|
701
|
+
except sqlite3.OperationalError:
|
|
702
|
+
logger.debug("FTS5 backfill skipped — FTS tables do not exist")
|
|
703
|
+
|
|
704
|
+
# ========================================
|
|
705
|
+
# display_name AFTER INSERT triggers
|
|
706
|
+
# ========================================
|
|
707
|
+
_DISPLAY_NAME_SOURCES = {
|
|
708
|
+
"files": "NEW.name",
|
|
709
|
+
"folders": "NEW.name",
|
|
710
|
+
"visits": "NEW.title",
|
|
711
|
+
"projects": "NEW.project_name",
|
|
712
|
+
"chats": "NEW.title",
|
|
713
|
+
"messages": "SUBSTR(NEW.content, 1, 100)",
|
|
714
|
+
"emails": "NEW.subject",
|
|
715
|
+
"clients": "NEW.name",
|
|
716
|
+
}
|
|
717
|
+
for table, source_expr in _DISPLAY_NAME_SOURCES.items():
|
|
718
|
+
cursor.execute(f"""
|
|
719
|
+
CREATE TRIGGER IF NOT EXISTS set_display_name_{table}
|
|
720
|
+
AFTER INSERT ON {table}
|
|
721
|
+
FOR EACH ROW
|
|
722
|
+
WHEN NEW.display_name IS NULL
|
|
723
|
+
BEGIN
|
|
724
|
+
UPDATE {table} SET display_name = {source_expr}
|
|
725
|
+
WHERE id = NEW.id;
|
|
726
|
+
END
|
|
727
|
+
""")
|
|
728
|
+
|
|
729
|
+
self.conn.commit()
|
|
730
|
+
|
|
731
|
+
# Seed the sources registry from config
|
|
732
|
+
try:
|
|
733
|
+
from footprinter.source_registry import SourceRegistry
|
|
734
|
+
|
|
735
|
+
registry = SourceRegistry(self.conn)
|
|
736
|
+
registry.seed_from_config()
|
|
737
|
+
except Exception as e:
|
|
738
|
+
logger.warning(f"Could not seed sources from config: {e}")
|
|
739
|
+
|
|
740
|
+
# ========================================
|
|
741
|
+
# FTS Trigger Management
|
|
742
|
+
# ========================================
|
|
743
|
+
|
|
744
|
+
_FTS_TRIGGER_NAMES = [f"{fts_table}_{suffix}" for fts_table in _FTS_DEFINITIONS for suffix in ("ai", "ad", "au")]
|
|
745
|
+
|
|
746
|
+
@staticmethod
|
|
747
|
+
def _fts_create_sql(fts_table: str, *, if_not_exists: bool = False) -> str:
|
|
748
|
+
"""Return CREATE VIRTUAL TABLE SQL for an FTS5 table."""
|
|
749
|
+
defn = _FTS_DEFINITIONS[fts_table]
|
|
750
|
+
cols = ", ".join(defn["columns"])
|
|
751
|
+
exists = "IF NOT EXISTS " if if_not_exists else ""
|
|
752
|
+
return (
|
|
753
|
+
f"CREATE VIRTUAL TABLE {exists}{fts_table} USING fts5("
|
|
754
|
+
f"{cols}, content='{defn['base_table']}', content_rowid='id')"
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
@staticmethod
|
|
758
|
+
def _fts_backfill_sql(fts_table: str) -> str:
|
|
759
|
+
"""Return INSERT...SELECT SQL to backfill an FTS table from its base table."""
|
|
760
|
+
defn = _FTS_DEFINITIONS[fts_table]
|
|
761
|
+
content_cols = set(defn.get("content_columns", []))
|
|
762
|
+
cols_str = ", ".join(defn["columns"])
|
|
763
|
+
select_exprs = []
|
|
764
|
+
for col in defn["columns"]:
|
|
765
|
+
if col in content_cols:
|
|
766
|
+
select_exprs.append(
|
|
767
|
+
f"CASE WHEN COALESCE(mcp_view, 'inherit') IN ('opaque', 'hidden') THEN NULL ELSE {col} END"
|
|
768
|
+
)
|
|
769
|
+
else:
|
|
770
|
+
select_exprs.append(col)
|
|
771
|
+
select_str = ", ".join(select_exprs)
|
|
772
|
+
return f"INSERT INTO {fts_table}(rowid, {cols_str}) SELECT id, {select_str} FROM {defn['base_table']}"
|
|
773
|
+
|
|
774
|
+
@staticmethod
|
|
775
|
+
def _fts_col_expr(col: str, prefix: str, content_columns: set[str]) -> str:
|
|
776
|
+
"""Return a SQL expression for a column value in FTS triggers.
|
|
777
|
+
|
|
778
|
+
Content columns are NULLed when mcp_view is opaque or hidden,
|
|
779
|
+
preventing sensitive content from entering the FTS index.
|
|
780
|
+
Metadata columns (name, subject, title, etc.) pass through unchanged.
|
|
781
|
+
"""
|
|
782
|
+
if col in content_columns:
|
|
783
|
+
return (
|
|
784
|
+
f"CASE WHEN COALESCE({prefix}.mcp_view, 'inherit') "
|
|
785
|
+
f"IN ('opaque', 'hidden') THEN NULL ELSE {prefix}.{col} END"
|
|
786
|
+
)
|
|
787
|
+
return f"{prefix}.{col}"
|
|
788
|
+
|
|
789
|
+
@staticmethod
|
|
790
|
+
def _fts_trigger_sql(fts_table: str) -> list[str]:
|
|
791
|
+
"""Return the 3 trigger CREATE statements (ai, ad, au) for an FTS table."""
|
|
792
|
+
defn = _FTS_DEFINITIONS[fts_table]
|
|
793
|
+
base = defn["base_table"]
|
|
794
|
+
cols = defn["columns"]
|
|
795
|
+
content_cols = set(defn.get("content_columns", []))
|
|
796
|
+
cols_str = ", ".join(cols)
|
|
797
|
+
|
|
798
|
+
new_vals = ", ".join(SchemaMixin._fts_col_expr(c, "new", content_cols) for c in cols)
|
|
799
|
+
old_vals = ", ".join(SchemaMixin._fts_col_expr(c, "old", content_cols) for c in cols)
|
|
800
|
+
|
|
801
|
+
# WHEN clause for _au: only re-index when FTS-tracked columns or
|
|
802
|
+
# mcp_view change. mcp_view affects what's stored in FTS for content
|
|
803
|
+
# columns (opaque/hidden → NULL). Prevents spurious re-indexing from
|
|
804
|
+
# non-FTS updates (e.g. display_name) and avoids corruption when
|
|
805
|
+
# AFTER INSERT triggers do UPDATE on the same row.
|
|
806
|
+
when_cols = list(cols) + ["mcp_view"]
|
|
807
|
+
when_parts = " OR ".join(f"OLD.{c} IS NOT NEW.{c}" for c in when_cols)
|
|
808
|
+
|
|
809
|
+
return [
|
|
810
|
+
# AFTER INSERT
|
|
811
|
+
f"CREATE TRIGGER IF NOT EXISTS {fts_table}_ai AFTER INSERT ON {base} BEGIN "
|
|
812
|
+
f"INSERT INTO {fts_table}(rowid, {cols_str}) "
|
|
813
|
+
f"VALUES (new.id, {new_vals}); END",
|
|
814
|
+
# AFTER DELETE
|
|
815
|
+
f"CREATE TRIGGER IF NOT EXISTS {fts_table}_ad AFTER DELETE ON {base} BEGIN "
|
|
816
|
+
f"INSERT INTO {fts_table}({fts_table}, rowid, {cols_str}) "
|
|
817
|
+
f"VALUES ('delete', old.id, {old_vals}); END",
|
|
818
|
+
# AFTER UPDATE (only when FTS-tracked columns change)
|
|
819
|
+
f"CREATE TRIGGER IF NOT EXISTS {fts_table}_au AFTER UPDATE ON {base} "
|
|
820
|
+
f"WHEN {when_parts} BEGIN "
|
|
821
|
+
f"INSERT INTO {fts_table}({fts_table}, rowid, {cols_str}) "
|
|
822
|
+
f"VALUES ('delete', old.id, {old_vals}); "
|
|
823
|
+
f"INSERT INTO {fts_table}(rowid, {cols_str}) "
|
|
824
|
+
f"VALUES (new.id, {new_vals}); END",
|
|
825
|
+
]
|
|
826
|
+
|
|
827
|
+
def check_fts_triggers(self) -> list[str]:
|
|
828
|
+
"""Return names of expected FTS triggers that are missing from the database.
|
|
829
|
+
|
|
830
|
+
Returns an empty list when all triggers are present.
|
|
831
|
+
"""
|
|
832
|
+
cursor = self.conn.cursor()
|
|
833
|
+
placeholders = ", ".join("?" for _ in self._FTS_TRIGGER_NAMES)
|
|
834
|
+
present = {
|
|
835
|
+
row[0]
|
|
836
|
+
for row in cursor.execute(
|
|
837
|
+
f"SELECT name FROM sqlite_master WHERE type='trigger' AND name IN ({placeholders})",
|
|
838
|
+
self._FTS_TRIGGER_NAMES,
|
|
839
|
+
).fetchall()
|
|
840
|
+
}
|
|
841
|
+
return [name for name in self._FTS_TRIGGER_NAMES if name not in present]
|
|
842
|
+
|
|
843
|
+
def drop_fts_triggers(self) -> None:
|
|
844
|
+
"""Drop all FTS sync triggers. Safe to call when FTS5 is unavailable."""
|
|
845
|
+
try:
|
|
846
|
+
cursor = self.conn.cursor()
|
|
847
|
+
for name in self._FTS_TRIGGER_NAMES:
|
|
848
|
+
cursor.execute(f"DROP TRIGGER IF EXISTS {name}")
|
|
849
|
+
self.conn.commit()
|
|
850
|
+
logger.info("Dropped FTS triggers for bulk ingest")
|
|
851
|
+
except sqlite3.OperationalError as e:
|
|
852
|
+
if "no such module: fts5" in str(e):
|
|
853
|
+
logger.debug("drop_fts_triggers skipped — FTS5 not available")
|
|
854
|
+
else:
|
|
855
|
+
raise
|
|
856
|
+
|
|
857
|
+
def create_fts_triggers(self) -> None:
|
|
858
|
+
"""Create all FTS sync triggers. Safe to call when FTS5 is unavailable."""
|
|
859
|
+
try:
|
|
860
|
+
cursor = self.conn.cursor()
|
|
861
|
+
|
|
862
|
+
# Only create triggers if FTS tables exist
|
|
863
|
+
placeholders = ", ".join("?" for _ in _FTS_DEFINITIONS)
|
|
864
|
+
cursor.execute(
|
|
865
|
+
f"SELECT name FROM sqlite_master WHERE type='table' AND name IN ({placeholders})",
|
|
866
|
+
list(_FTS_DEFINITIONS.keys()),
|
|
867
|
+
)
|
|
868
|
+
fts_tables = {row[0] for row in cursor.fetchall()}
|
|
869
|
+
if not fts_tables:
|
|
870
|
+
logger.debug("create_fts_triggers skipped — no FTS tables exist")
|
|
871
|
+
return
|
|
872
|
+
|
|
873
|
+
for fts_table in _FTS_DEFINITIONS:
|
|
874
|
+
if fts_table in fts_tables:
|
|
875
|
+
for sql in self._fts_trigger_sql(fts_table):
|
|
876
|
+
cursor.execute(sql)
|
|
877
|
+
|
|
878
|
+
self.conn.commit()
|
|
879
|
+
except sqlite3.OperationalError as e:
|
|
880
|
+
if "no such module: fts5" in str(e):
|
|
881
|
+
logger.debug("create_fts_triggers skipped — FTS5 not available")
|
|
882
|
+
else:
|
|
883
|
+
raise
|
|
884
|
+
|
|
885
|
+
def rebuild_fts_indexes(self) -> None:
|
|
886
|
+
"""Rebuild all FTS indexes from base tables and restore triggers.
|
|
887
|
+
|
|
888
|
+
Uses drop+create+backfill (not FTS5 ``rebuild``) so that content
|
|
889
|
+
columns are NULLed for opaque/hidden records via ``_fts_backfill_sql``.
|
|
890
|
+
Safe to call when FTS5 is unavailable.
|
|
891
|
+
"""
|
|
892
|
+
try:
|
|
893
|
+
cursor = self.conn.cursor()
|
|
894
|
+
|
|
895
|
+
# Drop triggers first (they reference FTS tables)
|
|
896
|
+
for name in self._FTS_TRIGGER_NAMES:
|
|
897
|
+
cursor.execute(f"DROP TRIGGER IF EXISTS {name}")
|
|
898
|
+
|
|
899
|
+
# Drop and recreate with filtered backfill
|
|
900
|
+
for fts_table in _FTS_DEFINITIONS:
|
|
901
|
+
cursor.execute(f"DROP TABLE IF EXISTS {fts_table}")
|
|
902
|
+
cursor.execute(self._fts_create_sql(fts_table))
|
|
903
|
+
cursor.execute(self._fts_backfill_sql(fts_table))
|
|
904
|
+
|
|
905
|
+
counts = {
|
|
906
|
+
fts_table: cursor.execute(f"SELECT COUNT(*) FROM {fts_table}").fetchone()[0]
|
|
907
|
+
for fts_table in _FTS_DEFINITIONS
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
self.conn.commit()
|
|
911
|
+
logger.info(
|
|
912
|
+
"Rebuilt FTS indexes: %s",
|
|
913
|
+
", ".join(f"{t}={c}" for t, c in counts.items()),
|
|
914
|
+
)
|
|
915
|
+
except sqlite3.OperationalError as e:
|
|
916
|
+
if "no such table" in str(e) or "no such module" in str(e):
|
|
917
|
+
logger.debug("rebuild_fts_indexes skipped: %s", e)
|
|
918
|
+
else:
|
|
919
|
+
raise
|
|
920
|
+
finally:
|
|
921
|
+
# Always restore triggers — even if rebuild raised
|
|
922
|
+
self.create_fts_triggers()
|
|
923
|
+
|
|
924
|
+
# ========================================
|
|
925
|
+
# FTS Health Check & Repair
|
|
926
|
+
# ========================================
|
|
927
|
+
|
|
928
|
+
_FTS_TABLE_MAP = {k: v["base_table"] for k, v in _FTS_DEFINITIONS.items()}
|
|
929
|
+
|
|
930
|
+
def check_fts_health(self) -> dict:
|
|
931
|
+
"""Check FTS table health: existence and queryability.
|
|
932
|
+
|
|
933
|
+
All three FTS tables are external content tables, so
|
|
934
|
+
``SELECT COUNT(*)`` delegates to the content table and row counts
|
|
935
|
+
always match. Drift detection via row counts is therefore a no-op.
|
|
936
|
+
Real drift protection comes from sync triggers and
|
|
937
|
+
auto-recovery on pipeline startup.
|
|
938
|
+
|
|
939
|
+
We don't use FTS5 ``integrity-check`` because our triggers
|
|
940
|
+
intentionally NULL content columns for opaque/hidden records.
|
|
941
|
+
|
|
942
|
+
Safe to call when FTS5 is unavailable — returns all tables as
|
|
943
|
+
``"error"`` with an explanatory message.
|
|
944
|
+
|
|
945
|
+
Returns a dict keyed by FTS table name, each with:
|
|
946
|
+
status: "ok" | "error"
|
|
947
|
+
fts_rows: int (or None if table missing)
|
|
948
|
+
base_rows: int
|
|
949
|
+
message: str (only on error)
|
|
950
|
+
triggers_missing: list[str] (trigger names missing for this table)
|
|
951
|
+
"""
|
|
952
|
+
cursor = self.conn.cursor()
|
|
953
|
+
result = {}
|
|
954
|
+
all_missing = set(self.check_fts_triggers())
|
|
955
|
+
|
|
956
|
+
for fts_table, base_table in self._FTS_TABLE_MAP.items():
|
|
957
|
+
table_triggers_missing = [t for t in all_missing if t.startswith(f"{fts_table}_")]
|
|
958
|
+
base_rows = cursor.execute(f"SELECT COUNT(*) FROM {base_table}").fetchone()[0]
|
|
959
|
+
|
|
960
|
+
try:
|
|
961
|
+
fts_rows = cursor.execute(f"SELECT COUNT(*) FROM {fts_table}").fetchone()[0]
|
|
962
|
+
except sqlite3.OperationalError as e:
|
|
963
|
+
if "no such module: fts5" in str(e) or "no such table" in str(e):
|
|
964
|
+
result[fts_table] = {
|
|
965
|
+
"status": "error",
|
|
966
|
+
"fts_rows": None,
|
|
967
|
+
"base_rows": base_rows,
|
|
968
|
+
"message": f"{fts_table} is missing or corrupted",
|
|
969
|
+
"triggers_missing": table_triggers_missing,
|
|
970
|
+
}
|
|
971
|
+
continue
|
|
972
|
+
raise
|
|
973
|
+
except sqlite3.DatabaseError:
|
|
974
|
+
result[fts_table] = {
|
|
975
|
+
"status": "error",
|
|
976
|
+
"fts_rows": None,
|
|
977
|
+
"base_rows": base_rows,
|
|
978
|
+
"message": f"{fts_table} is corrupted or unreadable",
|
|
979
|
+
"triggers_missing": table_triggers_missing,
|
|
980
|
+
}
|
|
981
|
+
continue
|
|
982
|
+
|
|
983
|
+
result[fts_table] = {
|
|
984
|
+
"status": "ok",
|
|
985
|
+
"fts_rows": fts_rows,
|
|
986
|
+
"base_rows": base_rows,
|
|
987
|
+
"triggers_missing": table_triggers_missing,
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
return result
|
|
991
|
+
|
|
992
|
+
def repair_fts(self) -> dict:
|
|
993
|
+
"""Drop and rebuild all FTS tables from base table data.
|
|
994
|
+
|
|
995
|
+
Safe to call when FTS5 is unavailable — logs a debug message
|
|
996
|
+
and returns empty dict. Always restores triggers in a finally
|
|
997
|
+
block, matching the safety pattern of ``rebuild_fts_indexes()``.
|
|
998
|
+
|
|
999
|
+
Returns a dict keyed by FTS table name with before/after row counts.
|
|
1000
|
+
"""
|
|
1001
|
+
try:
|
|
1002
|
+
cursor = self.conn.cursor()
|
|
1003
|
+
|
|
1004
|
+
# Capture before state
|
|
1005
|
+
before = {}
|
|
1006
|
+
for fts_table in self._FTS_TABLE_MAP:
|
|
1007
|
+
try:
|
|
1008
|
+
before[fts_table] = cursor.execute(f"SELECT COUNT(*) FROM {fts_table}").fetchone()[0]
|
|
1009
|
+
except sqlite3.OperationalError:
|
|
1010
|
+
before[fts_table] = None
|
|
1011
|
+
|
|
1012
|
+
# Drop triggers and FTS tables
|
|
1013
|
+
self.drop_fts_triggers()
|
|
1014
|
+
for fts_table in self._FTS_TABLE_MAP:
|
|
1015
|
+
cursor.execute(f"DROP TABLE IF EXISTS {fts_table}")
|
|
1016
|
+
|
|
1017
|
+
# Recreate FTS virtual tables and backfill from base tables
|
|
1018
|
+
for fts_table in _FTS_DEFINITIONS:
|
|
1019
|
+
cursor.execute(self._fts_create_sql(fts_table))
|
|
1020
|
+
cursor.execute(self._fts_backfill_sql(fts_table))
|
|
1021
|
+
|
|
1022
|
+
self.conn.commit()
|
|
1023
|
+
|
|
1024
|
+
# Capture after state
|
|
1025
|
+
result = {}
|
|
1026
|
+
for fts_table in self._FTS_TABLE_MAP:
|
|
1027
|
+
after = cursor.execute(f"SELECT COUNT(*) FROM {fts_table}").fetchone()[0]
|
|
1028
|
+
result[fts_table] = {"before": before[fts_table], "after": after}
|
|
1029
|
+
|
|
1030
|
+
logger.info(
|
|
1031
|
+
"Repaired FTS indexes: %s",
|
|
1032
|
+
", ".join(f"{t}={r['after']}" for t, r in result.items()),
|
|
1033
|
+
)
|
|
1034
|
+
return result
|
|
1035
|
+
|
|
1036
|
+
except sqlite3.OperationalError as e:
|
|
1037
|
+
if "no such module: fts5" in str(e):
|
|
1038
|
+
logger.debug("repair_fts skipped — FTS5 not available")
|
|
1039
|
+
return {}
|
|
1040
|
+
raise
|
|
1041
|
+
finally:
|
|
1042
|
+
# Always restore triggers — even if repair raised
|
|
1043
|
+
self.create_fts_triggers()
|