footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,827 @@
|
|
|
1
|
+
"""CLI utilities — _rebuild_vectors with phase isolation and interrupt safety."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import signal
|
|
5
|
+
import sqlite3
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from footprinter.paths import get_db_path
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
_BATCH_SIZE = 100
|
|
14
|
+
|
|
15
|
+
# Graceful shutdown flag — set by SIGINT/SIGTERM handler
|
|
16
|
+
_shutdown = False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _handle_shutdown(signum, frame):
|
|
20
|
+
"""Signal handler that requests graceful shutdown."""
|
|
21
|
+
global _shutdown
|
|
22
|
+
_shutdown = True
|
|
23
|
+
sig_name = signal.Signals(signum).name
|
|
24
|
+
logger.warning("Received %s — finishing current batch...", sig_name)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _repair_fts(quiet: bool = False):
|
|
28
|
+
"""Drop and rebuild all FTS search indexes from base table data."""
|
|
29
|
+
from rich.console import Console
|
|
30
|
+
|
|
31
|
+
from footprinter.ingest.database import Database
|
|
32
|
+
|
|
33
|
+
console = Console() if not quiet else None
|
|
34
|
+
|
|
35
|
+
db = Database(str(get_db_path()))
|
|
36
|
+
|
|
37
|
+
if console:
|
|
38
|
+
before = db.check_fts_health()
|
|
39
|
+
console.print()
|
|
40
|
+
console.print("[bold]FTS Repair[/bold]")
|
|
41
|
+
console.print()
|
|
42
|
+
for table, info in before.items():
|
|
43
|
+
status = info["status"]
|
|
44
|
+
icon = {"ok": "[green]ok[/green]", "error": "[red]error[/red]"}.get(status, status)
|
|
45
|
+
console.print(f" Before: {table} {icon}")
|
|
46
|
+
|
|
47
|
+
result = db.repair_fts()
|
|
48
|
+
db.close()
|
|
49
|
+
|
|
50
|
+
if console:
|
|
51
|
+
console.print()
|
|
52
|
+
for table, counts in result.items():
|
|
53
|
+
console.print(f" After: {table} [green]{counts['after']}[/green] rows")
|
|
54
|
+
console.print()
|
|
55
|
+
console.print("[bold green]FTS repair complete[/bold green]")
|
|
56
|
+
console.print()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
# Pre-flight validation
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _preflight_check(conn, cursor, files_enabled, chats_enabled, console, mode: str = "full") -> dict:
|
|
65
|
+
"""Run pre-flight validation before rebuild.
|
|
66
|
+
|
|
67
|
+
Returns dict with counts: {"files": N, "messages": M, "chats": K}.
|
|
68
|
+
Raises RuntimeError if validation fails.
|
|
69
|
+
"""
|
|
70
|
+
# Test DB writability
|
|
71
|
+
try:
|
|
72
|
+
cursor.execute("CREATE TEMP TABLE IF NOT EXISTS _fp_preflight (x INTEGER)")
|
|
73
|
+
cursor.execute("INSERT INTO _fp_preflight VALUES (1)")
|
|
74
|
+
cursor.execute("DELETE FROM _fp_preflight")
|
|
75
|
+
except sqlite3.OperationalError as e:
|
|
76
|
+
raise RuntimeError(f"Database is not writable: {e}") from e
|
|
77
|
+
|
|
78
|
+
# Count items to process — in incremental/sync mode, count only
|
|
79
|
+
# items that actually need processing (new/modified/removed).
|
|
80
|
+
counts = {"files": 0, "messages": 0, "chats": 0}
|
|
81
|
+
incremental = mode in ("incremental", "sync")
|
|
82
|
+
if files_enabled:
|
|
83
|
+
if incremental:
|
|
84
|
+
cursor.execute(
|
|
85
|
+
"SELECT COUNT(*) FROM files "
|
|
86
|
+
"WHERE source = 'local' AND status != 'removed' AND path IS NOT NULL"
|
|
87
|
+
" AND COALESCE(json_extract(metadata, '$.vectorize'), 1) != 0"
|
|
88
|
+
" AND (vectorized_at IS NULL OR modified_at > vectorized_at)"
|
|
89
|
+
)
|
|
90
|
+
else:
|
|
91
|
+
cursor.execute(
|
|
92
|
+
"SELECT COUNT(*) FROM files "
|
|
93
|
+
"WHERE source = 'local' AND status != 'removed' AND path IS NOT NULL"
|
|
94
|
+
" AND COALESCE(json_extract(metadata, '$.vectorize'), 1) != 0"
|
|
95
|
+
)
|
|
96
|
+
counts["files"] = cursor.fetchone()[0]
|
|
97
|
+
if chats_enabled:
|
|
98
|
+
if incremental:
|
|
99
|
+
cursor.execute(
|
|
100
|
+
"SELECT COUNT(*) FROM messages "
|
|
101
|
+
"WHERE content IS NOT NULL AND TRIM(content) != ''"
|
|
102
|
+
" AND status != 'removed'"
|
|
103
|
+
" AND COALESCE(json_extract(metadata, '$.vectorize'), 1) != 0"
|
|
104
|
+
" AND vectorized_at IS NULL"
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
cursor.execute(
|
|
108
|
+
"SELECT COUNT(*) FROM messages "
|
|
109
|
+
"WHERE content IS NOT NULL AND TRIM(content) != ''"
|
|
110
|
+
" AND status != 'removed'"
|
|
111
|
+
" AND COALESCE(json_extract(metadata, '$.vectorize'), 1) != 0"
|
|
112
|
+
)
|
|
113
|
+
counts["messages"] = cursor.fetchone()[0]
|
|
114
|
+
if incremental:
|
|
115
|
+
cursor.execute(
|
|
116
|
+
"SELECT COUNT(*) FROM chats "
|
|
117
|
+
"WHERE status != 'removed'"
|
|
118
|
+
" AND COALESCE(json_extract(metadata, '$.vectorize'), 1) != 0"
|
|
119
|
+
" AND metadata_vectorized_at IS NULL"
|
|
120
|
+
)
|
|
121
|
+
else:
|
|
122
|
+
cursor.execute(
|
|
123
|
+
"SELECT COUNT(*) FROM chats "
|
|
124
|
+
"WHERE status != 'removed'"
|
|
125
|
+
" AND COALESCE(json_extract(metadata, '$.vectorize'), 1) != 0"
|
|
126
|
+
)
|
|
127
|
+
counts["chats"] = cursor.fetchone()[0]
|
|
128
|
+
|
|
129
|
+
if console:
|
|
130
|
+
parts = []
|
|
131
|
+
if files_enabled:
|
|
132
|
+
parts.append(f"{counts['files']} files")
|
|
133
|
+
if chats_enabled:
|
|
134
|
+
parts.append(f"{counts['messages']} messages")
|
|
135
|
+
parts.append(f"{counts['chats']} chats")
|
|
136
|
+
label = "Will process" if incremental else "Will vectorize"
|
|
137
|
+
console.print(f" {label}: {', '.join(parts)}")
|
|
138
|
+
|
|
139
|
+
return counts
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ---------------------------------------------------------------------------
|
|
143
|
+
# Phase functions
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _cleanup_removed_vectors(conn, cursor, store, *, clean_files=True, clean_messages=True, clean_chats=True) -> dict:
|
|
148
|
+
"""Remove vectors for files, messages, and chats with status='removed'.
|
|
149
|
+
|
|
150
|
+
Returns {"removed": N, "removed_messages": M, "removed_chats": C}.
|
|
151
|
+
"""
|
|
152
|
+
removed_count = 0
|
|
153
|
+
msg_count = 0
|
|
154
|
+
chat_count = 0
|
|
155
|
+
|
|
156
|
+
# --- Files ---
|
|
157
|
+
if clean_files:
|
|
158
|
+
cursor.execute("SELECT id FROM files WHERE status = 'removed' AND vectorized_at IS NOT NULL")
|
|
159
|
+
for f in cursor.fetchall():
|
|
160
|
+
try:
|
|
161
|
+
store.delete_file(f["id"])
|
|
162
|
+
cursor.execute(
|
|
163
|
+
"UPDATE files SET vectorized_at = NULL, vectorized_chunks = NULL WHERE id = ?",
|
|
164
|
+
(f["id"],),
|
|
165
|
+
)
|
|
166
|
+
removed_count += 1
|
|
167
|
+
except Exception as e:
|
|
168
|
+
logger.warning("Failed to remove vectors for file %s: %s", f["id"], e)
|
|
169
|
+
|
|
170
|
+
# --- Messages ---
|
|
171
|
+
if clean_messages:
|
|
172
|
+
cursor.execute("SELECT id FROM messages WHERE status = 'removed' AND vectorized_at IS NOT NULL")
|
|
173
|
+
for m in cursor.fetchall():
|
|
174
|
+
try:
|
|
175
|
+
store.delete_message(m["id"])
|
|
176
|
+
cursor.execute(
|
|
177
|
+
"UPDATE messages SET vectorized_at = NULL, vectorized_chunks = NULL WHERE id = ?",
|
|
178
|
+
(m["id"],),
|
|
179
|
+
)
|
|
180
|
+
msg_count += 1
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.warning("Failed to remove vectors for message %s: %s", m["id"], e)
|
|
183
|
+
|
|
184
|
+
# --- Chats ---
|
|
185
|
+
if clean_chats:
|
|
186
|
+
cursor.execute("SELECT id FROM chats WHERE status = 'removed' AND metadata_vectorized_at IS NOT NULL")
|
|
187
|
+
for c in cursor.fetchall():
|
|
188
|
+
try:
|
|
189
|
+
store.delete_chat(c["id"])
|
|
190
|
+
cursor.execute(
|
|
191
|
+
"UPDATE chats SET metadata_vectorized_at = NULL WHERE id = ?",
|
|
192
|
+
(c["id"],),
|
|
193
|
+
)
|
|
194
|
+
# delete_chat also removes message chunks for this chat —
|
|
195
|
+
# clear their vectorization state to keep DB in sync
|
|
196
|
+
cursor.execute(
|
|
197
|
+
"UPDATE messages SET vectorized_at = NULL,"
|
|
198
|
+
" vectorized_chunks = NULL"
|
|
199
|
+
" WHERE chat_id = ? AND vectorized_at IS NOT NULL",
|
|
200
|
+
(c["id"],),
|
|
201
|
+
)
|
|
202
|
+
chat_count += 1
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.warning("Failed to remove vectors for chat %s: %s", c["id"], e)
|
|
205
|
+
|
|
206
|
+
conn.commit()
|
|
207
|
+
return {"removed": removed_count, "removed_messages": msg_count, "removed_chats": chat_count}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _vectorize_files(conn, cursor, store, extractor, vec_config, console, mode: str = "full") -> dict:
|
|
211
|
+
"""Vectorize local files.
|
|
212
|
+
|
|
213
|
+
Returns {"done": N, "chunks": M, "interrupted": bool}.
|
|
214
|
+
"""
|
|
215
|
+
global _shutdown
|
|
216
|
+
|
|
217
|
+
file_types = vec_config.get("file_types")
|
|
218
|
+
exclude_patterns = vec_config.get("exclude_patterns", [])
|
|
219
|
+
where_parts = [
|
|
220
|
+
"source = 'local'",
|
|
221
|
+
"status != 'removed'",
|
|
222
|
+
"path IS NOT NULL",
|
|
223
|
+
"COALESCE(json_extract(metadata, '$.vectorize'), 1) != 0",
|
|
224
|
+
]
|
|
225
|
+
params: list = []
|
|
226
|
+
|
|
227
|
+
# Incremental/sync: only process new or modified files
|
|
228
|
+
if mode in ("incremental", "sync"):
|
|
229
|
+
where_parts.append("(vectorized_at IS NULL OR modified_at > vectorized_at)")
|
|
230
|
+
|
|
231
|
+
if file_types:
|
|
232
|
+
like_clauses = " OR ".join("LOWER(path) LIKE ?" for _ in file_types)
|
|
233
|
+
where_parts.append(f"({like_clauses})")
|
|
234
|
+
params.extend(f"%{ext.lower()}" for ext in file_types)
|
|
235
|
+
for pat in exclude_patterns:
|
|
236
|
+
sql_pat = pat.replace("**", "%").replace("*", "%")
|
|
237
|
+
where_parts.append("path NOT LIKE ?")
|
|
238
|
+
params.append(sql_pat)
|
|
239
|
+
|
|
240
|
+
cursor.execute(
|
|
241
|
+
f"SELECT id, path as file_path FROM files WHERE {' AND '.join(where_parts)} ORDER BY id",
|
|
242
|
+
params,
|
|
243
|
+
)
|
|
244
|
+
files = cursor.fetchall()
|
|
245
|
+
total = len(files)
|
|
246
|
+
done = 0
|
|
247
|
+
chunks = 0
|
|
248
|
+
|
|
249
|
+
# Use upsert in incremental/sync mode (handles both new and modified)
|
|
250
|
+
use_upsert = mode in ("incremental", "sync")
|
|
251
|
+
|
|
252
|
+
for f in files:
|
|
253
|
+
if _shutdown:
|
|
254
|
+
conn.commit()
|
|
255
|
+
if console:
|
|
256
|
+
console.print(f" [yellow]Interrupted[/yellow] at {done}/{total} files")
|
|
257
|
+
return {"done": done, "chunks": chunks, "interrupted": True}
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
fpath = Path(f["file_path"])
|
|
261
|
+
if not fpath.exists():
|
|
262
|
+
continue
|
|
263
|
+
file_chunks = extractor.extract_with_chunking(fpath)
|
|
264
|
+
if file_chunks:
|
|
265
|
+
metadata = {"file_type": fpath.suffix.lower(), "file_name": fpath.name}
|
|
266
|
+
try:
|
|
267
|
+
if use_upsert:
|
|
268
|
+
store.upsert_file(f["id"], f["file_path"], file_chunks, metadata)
|
|
269
|
+
else:
|
|
270
|
+
store.index_file(f["id"], f["file_path"], file_chunks, metadata)
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.warning("Chroma write failed for file %s: %s", f["file_path"], e)
|
|
273
|
+
continue
|
|
274
|
+
# SQLite update only after chroma success
|
|
275
|
+
cursor.execute(
|
|
276
|
+
"UPDATE files SET vectorized_at = CURRENT_TIMESTAMP, vectorized_chunks = ? WHERE id = ?",
|
|
277
|
+
(len(file_chunks), f["id"]),
|
|
278
|
+
)
|
|
279
|
+
done += 1
|
|
280
|
+
chunks += len(file_chunks)
|
|
281
|
+
if done % 100 == 0:
|
|
282
|
+
conn.commit()
|
|
283
|
+
if console:
|
|
284
|
+
console.print(f" Vectorizing files: {done}/{total}")
|
|
285
|
+
except Exception as e:
|
|
286
|
+
logger.debug("Skipped file %s: %s", f["file_path"], e)
|
|
287
|
+
|
|
288
|
+
conn.commit()
|
|
289
|
+
return {"done": done, "chunks": chunks, "interrupted": False}
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _vectorize_messages(conn, cursor, store, console, mode: str = "full") -> dict:
|
|
293
|
+
"""Vectorize chat messages.
|
|
294
|
+
|
|
295
|
+
Returns {"done": N, "interrupted": bool}.
|
|
296
|
+
"""
|
|
297
|
+
global _shutdown
|
|
298
|
+
|
|
299
|
+
from footprinter.semantic.chunking import chunk_content
|
|
300
|
+
|
|
301
|
+
incremental_filter = ""
|
|
302
|
+
use_upsert = mode in ("incremental", "sync")
|
|
303
|
+
if use_upsert:
|
|
304
|
+
incremental_filter = "AND message.vectorized_at IS NULL"
|
|
305
|
+
|
|
306
|
+
cursor.execute(
|
|
307
|
+
f"""
|
|
308
|
+
SELECT message.id, message.chat_id, message.role, message.content,
|
|
309
|
+
message.created_at, chat.title, chat.account as source
|
|
310
|
+
FROM messages message
|
|
311
|
+
JOIN chats chat ON message.chat_id = chat.id
|
|
312
|
+
WHERE message.content IS NOT NULL AND message.content != ''
|
|
313
|
+
AND message.status != 'removed'
|
|
314
|
+
AND COALESCE(json_extract(message.metadata, '$.vectorize'), 1) != 0
|
|
315
|
+
{incremental_filter}
|
|
316
|
+
ORDER BY message.id
|
|
317
|
+
"""
|
|
318
|
+
)
|
|
319
|
+
messages = cursor.fetchall()
|
|
320
|
+
total = len(messages)
|
|
321
|
+
done = 0
|
|
322
|
+
|
|
323
|
+
batch_ids: list = []
|
|
324
|
+
batch_texts: list = []
|
|
325
|
+
batch_metas: list = []
|
|
326
|
+
batch_msg_ids: list = []
|
|
327
|
+
batch_msg_chunks: dict = {} # msg_id -> chunk count
|
|
328
|
+
|
|
329
|
+
def flush_batch() -> int:
|
|
330
|
+
"""Flush current batch to chroma then SQLite. Returns count flushed."""
|
|
331
|
+
if not batch_ids:
|
|
332
|
+
return 0
|
|
333
|
+
count = len(batch_msg_ids)
|
|
334
|
+
try:
|
|
335
|
+
embeddings = store.ef(batch_texts)
|
|
336
|
+
chroma_op = store._chats.upsert if use_upsert else store._chats.add
|
|
337
|
+
chroma_op(
|
|
338
|
+
ids=list(batch_ids),
|
|
339
|
+
embeddings=embeddings,
|
|
340
|
+
documents=list(batch_texts),
|
|
341
|
+
metadatas=list(batch_metas),
|
|
342
|
+
)
|
|
343
|
+
except Exception as e:
|
|
344
|
+
logger.warning("Chroma write failed for message batch: %s", e)
|
|
345
|
+
batch_ids.clear()
|
|
346
|
+
batch_texts.clear()
|
|
347
|
+
batch_metas.clear()
|
|
348
|
+
batch_msg_ids.clear()
|
|
349
|
+
batch_msg_chunks.clear()
|
|
350
|
+
return 0
|
|
351
|
+
# SQLite update only after chroma success
|
|
352
|
+
for mid in batch_msg_ids:
|
|
353
|
+
cursor.execute(
|
|
354
|
+
"UPDATE messages SET vectorized_at = CURRENT_TIMESTAMP, vectorized_chunks = ? WHERE id = ?",
|
|
355
|
+
(batch_msg_chunks.get(mid, 0), mid),
|
|
356
|
+
)
|
|
357
|
+
batch_ids.clear()
|
|
358
|
+
batch_texts.clear()
|
|
359
|
+
batch_metas.clear()
|
|
360
|
+
batch_msg_ids.clear()
|
|
361
|
+
batch_msg_chunks.clear()
|
|
362
|
+
return count
|
|
363
|
+
|
|
364
|
+
for msg in messages:
|
|
365
|
+
if _shutdown:
|
|
366
|
+
done += flush_batch()
|
|
367
|
+
conn.commit()
|
|
368
|
+
if console:
|
|
369
|
+
console.print(f" [yellow]Interrupted[/yellow] at {done}/{total} messages")
|
|
370
|
+
return {"done": done, "interrupted": True}
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
content = msg["content"]
|
|
374
|
+
if not content or not content.strip():
|
|
375
|
+
continue
|
|
376
|
+
msg_chunks = chunk_content(content)
|
|
377
|
+
for chunk_text, chunk_index, total_chunks in msg_chunks:
|
|
378
|
+
batch_ids.append(f"msg_{msg['id']}_chunk_{chunk_index}")
|
|
379
|
+
batch_texts.append(chunk_text)
|
|
380
|
+
batch_metas.append(
|
|
381
|
+
{
|
|
382
|
+
"message_id": msg["id"],
|
|
383
|
+
"chat_id": msg["chat_id"],
|
|
384
|
+
"chunk_index": chunk_index,
|
|
385
|
+
"total_chunks": total_chunks,
|
|
386
|
+
"content_length": len(chunk_text),
|
|
387
|
+
"source": msg["source"] or "unknown",
|
|
388
|
+
"role": msg["role"] or "unknown",
|
|
389
|
+
"chat_title": (msg["title"] or "(untitled)")[:200],
|
|
390
|
+
"created_at": msg["created_at"] or "",
|
|
391
|
+
"message_position": 0,
|
|
392
|
+
}
|
|
393
|
+
)
|
|
394
|
+
batch_msg_chunks[msg["id"]] = len(msg_chunks)
|
|
395
|
+
batch_msg_ids.append(msg["id"])
|
|
396
|
+
if len(batch_ids) >= _BATCH_SIZE:
|
|
397
|
+
done += flush_batch()
|
|
398
|
+
conn.commit()
|
|
399
|
+
if console:
|
|
400
|
+
console.print(f" Vectorizing messages: {done}/{total}")
|
|
401
|
+
except Exception as e:
|
|
402
|
+
logger.debug("Skipped message %s: %s", msg["id"], e)
|
|
403
|
+
|
|
404
|
+
done += flush_batch()
|
|
405
|
+
conn.commit()
|
|
406
|
+
return {"done": done, "interrupted": False}
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def _vectorize_chat_info(conn, cursor, store, console, mode: str = "full") -> dict:
|
|
410
|
+
"""Vectorize chat info records.
|
|
411
|
+
|
|
412
|
+
Returns {"done": N, "interrupted": bool}.
|
|
413
|
+
"""
|
|
414
|
+
global _shutdown
|
|
415
|
+
|
|
416
|
+
incremental_filter = ""
|
|
417
|
+
if mode in ("incremental", "sync"):
|
|
418
|
+
incremental_filter = "AND metadata_vectorized_at IS NULL"
|
|
419
|
+
|
|
420
|
+
cursor.execute(
|
|
421
|
+
f"""
|
|
422
|
+
SELECT id, title, summary, account as source, created_at, message_count
|
|
423
|
+
FROM chats
|
|
424
|
+
WHERE status != 'removed'
|
|
425
|
+
AND COALESCE(json_extract(metadata, '$.vectorize'), 1) != 0
|
|
426
|
+
{incremental_filter}
|
|
427
|
+
ORDER BY id
|
|
428
|
+
"""
|
|
429
|
+
)
|
|
430
|
+
chats = cursor.fetchall()
|
|
431
|
+
total = len(chats)
|
|
432
|
+
done = 0
|
|
433
|
+
|
|
434
|
+
batch_ids: list = []
|
|
435
|
+
batch_texts: list = []
|
|
436
|
+
batch_metas: list = []
|
|
437
|
+
batch_conv_ids: list = []
|
|
438
|
+
|
|
439
|
+
def flush_batch() -> int:
|
|
440
|
+
"""Flush current batch to chroma then SQLite. Returns count flushed."""
|
|
441
|
+
if not batch_ids:
|
|
442
|
+
return 0
|
|
443
|
+
count = len(batch_conv_ids)
|
|
444
|
+
try:
|
|
445
|
+
embeddings = store.ef(batch_texts)
|
|
446
|
+
store._chats.upsert(
|
|
447
|
+
ids=list(batch_ids),
|
|
448
|
+
embeddings=embeddings,
|
|
449
|
+
documents=list(batch_texts),
|
|
450
|
+
metadatas=list(batch_metas),
|
|
451
|
+
)
|
|
452
|
+
except Exception as e:
|
|
453
|
+
logger.warning("Chroma write failed for chat info batch: %s", e)
|
|
454
|
+
batch_ids.clear()
|
|
455
|
+
batch_texts.clear()
|
|
456
|
+
batch_metas.clear()
|
|
457
|
+
batch_conv_ids.clear()
|
|
458
|
+
return 0
|
|
459
|
+
# SQLite update only after chroma success
|
|
460
|
+
for cid in batch_conv_ids:
|
|
461
|
+
cursor.execute(
|
|
462
|
+
"UPDATE chats SET metadata_vectorized_at = CURRENT_TIMESTAMP WHERE id = ?",
|
|
463
|
+
(cid,),
|
|
464
|
+
)
|
|
465
|
+
batch_ids.clear()
|
|
466
|
+
batch_texts.clear()
|
|
467
|
+
batch_metas.clear()
|
|
468
|
+
batch_conv_ids.clear()
|
|
469
|
+
return count
|
|
470
|
+
|
|
471
|
+
for conv in chats:
|
|
472
|
+
if _shutdown:
|
|
473
|
+
done += flush_batch()
|
|
474
|
+
conn.commit()
|
|
475
|
+
if console:
|
|
476
|
+
console.print(f" [yellow]Interrupted[/yellow] at {done}/{total} chats")
|
|
477
|
+
return {"done": done, "interrupted": True}
|
|
478
|
+
|
|
479
|
+
try:
|
|
480
|
+
text_parts = [f"Chat: {conv['title'] or '(untitled)'}"]
|
|
481
|
+
if conv["summary"]:
|
|
482
|
+
text_parts.append(f"Summary: {conv['summary']}")
|
|
483
|
+
text_parts.append(f"Source: {conv['source']}")
|
|
484
|
+
searchable_text = "\n\n".join(text_parts)
|
|
485
|
+
|
|
486
|
+
batch_ids.append(f"chat_info_{conv['id']}")
|
|
487
|
+
batch_texts.append(searchable_text)
|
|
488
|
+
batch_metas.append(
|
|
489
|
+
{
|
|
490
|
+
"chat_id": conv["id"],
|
|
491
|
+
"chat_title": (conv["title"] or "(untitled)")[:200],
|
|
492
|
+
"source": conv["source"] or "unknown",
|
|
493
|
+
"created_at": conv["created_at"] or "",
|
|
494
|
+
"message_count": conv["message_count"] or 0,
|
|
495
|
+
"chunk_type": "chat_info",
|
|
496
|
+
"has_summary": bool(conv["summary"]),
|
|
497
|
+
}
|
|
498
|
+
)
|
|
499
|
+
batch_conv_ids.append(conv["id"])
|
|
500
|
+
if len(batch_ids) >= _BATCH_SIZE:
|
|
501
|
+
done += flush_batch()
|
|
502
|
+
conn.commit()
|
|
503
|
+
if console:
|
|
504
|
+
console.print(f" Vectorizing chat info: {done}/{total}")
|
|
505
|
+
except Exception as e:
|
|
506
|
+
logger.debug("Skipped chat %s: %s", conv["id"], e)
|
|
507
|
+
|
|
508
|
+
done += flush_batch()
|
|
509
|
+
conn.commit()
|
|
510
|
+
return {"done": done, "interrupted": False}
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
# ---------------------------------------------------------------------------
|
|
514
|
+
# Sync verification
|
|
515
|
+
# ---------------------------------------------------------------------------
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def _sync_verify(cursor, store, files_enabled, chats_enabled, console) -> None:
|
|
519
|
+
"""Compare DB vectorization counts against chroma document counts."""
|
|
520
|
+
if console:
|
|
521
|
+
console.print()
|
|
522
|
+
console.print("[bold]Sync verification[/bold]")
|
|
523
|
+
|
|
524
|
+
if files_enabled:
|
|
525
|
+
cursor.execute(
|
|
526
|
+
"SELECT COALESCE(SUM(vectorized_chunks), 0) FROM files"
|
|
527
|
+
" WHERE vectorized_at IS NOT NULL AND status != 'removed'"
|
|
528
|
+
)
|
|
529
|
+
db_file_chunks = cursor.fetchone()[0]
|
|
530
|
+
chroma_file_chunks = store.get_file_stats().get("total_chunks", 0)
|
|
531
|
+
|
|
532
|
+
if db_file_chunks == chroma_file_chunks:
|
|
533
|
+
if console:
|
|
534
|
+
console.print(
|
|
535
|
+
f" [green]\u2713[/green] Files: DB={db_file_chunks} chunks, chroma={chroma_file_chunks} chunks"
|
|
536
|
+
)
|
|
537
|
+
else:
|
|
538
|
+
if console:
|
|
539
|
+
console.print(
|
|
540
|
+
f" [yellow]\u26a0[/yellow] Files: DB={db_file_chunks}"
|
|
541
|
+
f" chunks, chroma={chroma_file_chunks} chunks"
|
|
542
|
+
f" (discrepancy: {abs(db_file_chunks - chroma_file_chunks)})"
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
if chats_enabled:
|
|
546
|
+
# Chroma stores message chunks + chat_info docs in one collection.
|
|
547
|
+
# DB side: SUM(vectorized_chunks) for messages (chunk count),
|
|
548
|
+
# COUNT(*) for chat_info (each chat = exactly 1 chroma doc).
|
|
549
|
+
cursor.execute(
|
|
550
|
+
"SELECT COALESCE(SUM(vectorized_chunks), 0) FROM messages"
|
|
551
|
+
" WHERE status != 'removed' AND vectorized_at IS NOT NULL"
|
|
552
|
+
)
|
|
553
|
+
db_msg_chunks = cursor.fetchone()[0]
|
|
554
|
+
|
|
555
|
+
# Detect stale chunk counts: messages vectorized before the
|
|
556
|
+
# vectorized_chunks column was added will have chunks=0.
|
|
557
|
+
cursor.execute(
|
|
558
|
+
"SELECT COUNT(*) FROM messages"
|
|
559
|
+
" WHERE status != 'removed' AND vectorized_at IS NOT NULL AND vectorized_chunks = 0"
|
|
560
|
+
)
|
|
561
|
+
stale_count = cursor.fetchone()[0]
|
|
562
|
+
|
|
563
|
+
cursor.execute("SELECT COUNT(*) FROM chats WHERE status != 'removed' AND metadata_vectorized_at IS NOT NULL")
|
|
564
|
+
db_chat_info_count = cursor.fetchone()[0]
|
|
565
|
+
chroma_chat_docs = store.get_chat_stats().get("total_documents", 0)
|
|
566
|
+
|
|
567
|
+
if console:
|
|
568
|
+
if stale_count > 0:
|
|
569
|
+
console.print(
|
|
570
|
+
f" [yellow]\u26a0[/yellow] Chats: {stale_count} messages"
|
|
571
|
+
" missing chunk counts — re-run"
|
|
572
|
+
" [bold]fp ingest --rebuild-vectors[/bold] to populate"
|
|
573
|
+
)
|
|
574
|
+
else:
|
|
575
|
+
db_total = db_msg_chunks + db_chat_info_count
|
|
576
|
+
if db_total == chroma_chat_docs:
|
|
577
|
+
console.print(
|
|
578
|
+
f" [green]\u2713[/green] Chats:"
|
|
579
|
+
f" DB={db_msg_chunks} message chunks"
|
|
580
|
+
f" + {db_chat_info_count} chat info,"
|
|
581
|
+
f" chroma={chroma_chat_docs} documents"
|
|
582
|
+
)
|
|
583
|
+
else:
|
|
584
|
+
console.print(
|
|
585
|
+
f" [yellow]\u26a0[/yellow] Chats:"
|
|
586
|
+
f" DB={db_msg_chunks} message chunks"
|
|
587
|
+
f" + {db_chat_info_count} chat info,"
|
|
588
|
+
f" chroma={chroma_chat_docs} documents"
|
|
589
|
+
f" (discrepancy:"
|
|
590
|
+
f" {abs(db_total - chroma_chat_docs)})"
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
# ---------------------------------------------------------------------------
|
|
595
|
+
# Main entry point
|
|
596
|
+
# ---------------------------------------------------------------------------
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
def _rebuild_vectors(
|
|
600
|
+
quiet: bool = False,
|
|
601
|
+
source: str = "all",
|
|
602
|
+
phase: Optional[str] = None,
|
|
603
|
+
mode: str = "incremental",
|
|
604
|
+
):
|
|
605
|
+
"""Rebuild the vector store from database contents.
|
|
606
|
+
|
|
607
|
+
Args:
|
|
608
|
+
quiet: Suppress Rich output.
|
|
609
|
+
source: Which vectors to rebuild — "files", "chats", or "all".
|
|
610
|
+
phase: Run a single phase — "files", "messages", or "chat_info".
|
|
611
|
+
Overrides source. When set, chroma is not deleted.
|
|
612
|
+
mode: Rebuild mode — "incremental" (default, new/modified/removed only),
|
|
613
|
+
"sync" (incremental + count verification), or "full" (delete all, rebuild).
|
|
614
|
+
"""
|
|
615
|
+
global _shutdown
|
|
616
|
+
_shutdown = False
|
|
617
|
+
|
|
618
|
+
import shutil
|
|
619
|
+
|
|
620
|
+
from rich.console import Console
|
|
621
|
+
|
|
622
|
+
from footprinter.paths import get_chroma_path
|
|
623
|
+
from footprinter.source_registry import get_config
|
|
624
|
+
|
|
625
|
+
from ..semantic.vector_store import (
|
|
626
|
+
VectorStore,
|
|
627
|
+
_chat_vectorization_enabled,
|
|
628
|
+
_file_vectorization_enabled,
|
|
629
|
+
)
|
|
630
|
+
from .full_content_extractor import FullContentExtractor
|
|
631
|
+
|
|
632
|
+
config = get_config()
|
|
633
|
+
console = Console() if not quiet else None
|
|
634
|
+
|
|
635
|
+
# Determine which phases are enabled based on phase/source and flags
|
|
636
|
+
do_files = source in ("files", "all")
|
|
637
|
+
do_chats = source in ("chats", "all")
|
|
638
|
+
|
|
639
|
+
if phase == "files":
|
|
640
|
+
files_enabled = _file_vectorization_enabled()
|
|
641
|
+
chats_enabled = False
|
|
642
|
+
elif phase in ("messages", "chat_info"):
|
|
643
|
+
files_enabled = False
|
|
644
|
+
chats_enabled = _chat_vectorization_enabled()
|
|
645
|
+
else:
|
|
646
|
+
files_enabled = do_files and _file_vectorization_enabled()
|
|
647
|
+
chats_enabled = do_chats and _chat_vectorization_enabled()
|
|
648
|
+
|
|
649
|
+
# Guard: refuse when vectorization is disabled for requested phases
|
|
650
|
+
if not files_enabled and not chats_enabled:
|
|
651
|
+
if console:
|
|
652
|
+
console.print()
|
|
653
|
+
console.print(
|
|
654
|
+
"[bold yellow]Vectorization is not enabled[/bold yellow] — "
|
|
655
|
+
"run [bold]fp setup[/bold] or add a [bold]semantic:[/bold] "
|
|
656
|
+
"section to config."
|
|
657
|
+
)
|
|
658
|
+
console.print()
|
|
659
|
+
return
|
|
660
|
+
|
|
661
|
+
# Install signal handlers for graceful shutdown
|
|
662
|
+
old_sigint = signal.getsignal(signal.SIGINT)
|
|
663
|
+
old_sigterm = signal.getsignal(signal.SIGTERM)
|
|
664
|
+
signal.signal(signal.SIGINT, _handle_shutdown)
|
|
665
|
+
signal.signal(signal.SIGTERM, _handle_shutdown)
|
|
666
|
+
|
|
667
|
+
conn = None
|
|
668
|
+
try:
|
|
669
|
+
# Open DB connection — before any destructive action
|
|
670
|
+
conn = sqlite3.connect(str(get_db_path()), timeout=10)
|
|
671
|
+
conn.row_factory = sqlite3.Row
|
|
672
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
673
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
674
|
+
cursor = conn.cursor()
|
|
675
|
+
|
|
676
|
+
# Pre-flight validation
|
|
677
|
+
try:
|
|
678
|
+
counts = _preflight_check(conn, cursor, files_enabled, chats_enabled, console, mode=mode)
|
|
679
|
+
except RuntimeError as e:
|
|
680
|
+
if console:
|
|
681
|
+
console.print(f"\n[bold red]Pre-flight failed:[/bold red] {e}")
|
|
682
|
+
return
|
|
683
|
+
|
|
684
|
+
# Chroma handling: full mode (no phase) deletes everything;
|
|
685
|
+
# incremental/sync and single-phase preserve existing chroma.
|
|
686
|
+
if mode == "full" and phase is None:
|
|
687
|
+
# Full rebuild — delete chroma and start fresh
|
|
688
|
+
VectorStore.reset_instance()
|
|
689
|
+
chroma_path = get_chroma_path()
|
|
690
|
+
if chroma_path.exists():
|
|
691
|
+
shutil.rmtree(chroma_path)
|
|
692
|
+
if console:
|
|
693
|
+
console.print(f"[dim]Deleted {chroma_path}[/dim]")
|
|
694
|
+
store = VectorStore.get_instance()
|
|
695
|
+
else:
|
|
696
|
+
# Incremental/sync/single-phase — check existing chroma integrity
|
|
697
|
+
try:
|
|
698
|
+
store = VectorStore.get_instance()
|
|
699
|
+
integrity = store.check_integrity()
|
|
700
|
+
if integrity["status"] == "corrupted":
|
|
701
|
+
if console:
|
|
702
|
+
console.print()
|
|
703
|
+
console.print(f"[bold red]Chroma is corrupted:[/bold red] {integrity['error']}")
|
|
704
|
+
console.print("Run [bold]fp ingest --rebuild-vectors full[/bold] to rebuild from scratch.")
|
|
705
|
+
return
|
|
706
|
+
except ImportError:
|
|
707
|
+
raise
|
|
708
|
+
|
|
709
|
+
# Build extractor (vec_config also used by _vectorize_files for SQL pre-filtering)
|
|
710
|
+
vec_config = config.get("vectorization", {})
|
|
711
|
+
extractor = FullContentExtractor.from_config(config)
|
|
712
|
+
|
|
713
|
+
if console:
|
|
714
|
+
console.print()
|
|
715
|
+
console.print(f"[bold]Rebuilding vectors[/bold] [dim]({mode})[/dim]")
|
|
716
|
+
console.print()
|
|
717
|
+
|
|
718
|
+
# Determine which phases to run
|
|
719
|
+
run_files = files_enabled and (phase is None or phase == "files")
|
|
720
|
+
run_messages = chats_enabled and (phase is None or phase == "messages")
|
|
721
|
+
run_chat_info = chats_enabled and (phase is None or phase == "chat_info")
|
|
722
|
+
|
|
723
|
+
results = {}
|
|
724
|
+
|
|
725
|
+
# Cleanup removed vectors (incremental/sync only)
|
|
726
|
+
if mode in ("incremental", "sync") and not _shutdown:
|
|
727
|
+
results["cleanup"] = _cleanup_removed_vectors(
|
|
728
|
+
conn,
|
|
729
|
+
cursor,
|
|
730
|
+
store,
|
|
731
|
+
clean_files=run_files,
|
|
732
|
+
clean_messages=run_messages,
|
|
733
|
+
clean_chats=run_chat_info,
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
if run_files and not _shutdown:
|
|
737
|
+
results["files"] = _vectorize_files(
|
|
738
|
+
conn,
|
|
739
|
+
cursor,
|
|
740
|
+
store,
|
|
741
|
+
extractor,
|
|
742
|
+
vec_config,
|
|
743
|
+
console,
|
|
744
|
+
mode=mode,
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
if run_messages and not _shutdown:
|
|
748
|
+
results["messages"] = _vectorize_messages(
|
|
749
|
+
conn,
|
|
750
|
+
cursor,
|
|
751
|
+
store,
|
|
752
|
+
console,
|
|
753
|
+
mode=mode,
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
if run_chat_info and not _shutdown:
|
|
757
|
+
results["chat_info"] = _vectorize_chat_info(
|
|
758
|
+
conn,
|
|
759
|
+
cursor,
|
|
760
|
+
store,
|
|
761
|
+
console,
|
|
762
|
+
mode=mode,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
# Sync verification — compare DB and chroma counts
|
|
766
|
+
if mode == "sync" and not _shutdown:
|
|
767
|
+
_sync_verify(cursor, store, files_enabled, chats_enabled, console)
|
|
768
|
+
|
|
769
|
+
# Summary
|
|
770
|
+
if console:
|
|
771
|
+
console.print()
|
|
772
|
+
interrupted = any(r.get("interrupted") for r in results.values())
|
|
773
|
+
label = (
|
|
774
|
+
"[bold yellow]Rebuild interrupted[/bold yellow]"
|
|
775
|
+
if interrupted
|
|
776
|
+
else "[bold green]Rebuild complete[/bold green]"
|
|
777
|
+
)
|
|
778
|
+
console.print(label)
|
|
779
|
+
|
|
780
|
+
cleanup = results.get("cleanup", {})
|
|
781
|
+
|
|
782
|
+
if mode in ("incremental", "sync"):
|
|
783
|
+
# Show categorized counts
|
|
784
|
+
files_r = results.get("files", {})
|
|
785
|
+
if run_files:
|
|
786
|
+
new_count = files_r.get("done", 0)
|
|
787
|
+
removed_count = cleanup.get("removed", 0)
|
|
788
|
+
chunks_count = files_r.get("chunks", 0)
|
|
789
|
+
console.print(f" Files: {new_count} new/modified ({chunks_count} chunks), {removed_count} removed")
|
|
790
|
+
elif do_files:
|
|
791
|
+
console.print(" Files: skipped (--source chats)")
|
|
792
|
+
else:
|
|
793
|
+
if "files" in results:
|
|
794
|
+
r = results["files"]
|
|
795
|
+
console.print(f" Files: {r['done']} vectorized ({r['chunks']} chunks)")
|
|
796
|
+
elif run_files:
|
|
797
|
+
console.print(" Files: skipped (disabled)")
|
|
798
|
+
elif do_files:
|
|
799
|
+
console.print(" Files: skipped (--source chats)")
|
|
800
|
+
|
|
801
|
+
if "messages" in results:
|
|
802
|
+
msg_removed = cleanup.get("removed_messages", 0)
|
|
803
|
+
msg_line = f" Messages: {results['messages']['done']} vectorized"
|
|
804
|
+
if msg_removed:
|
|
805
|
+
msg_line += f", {msg_removed} removed"
|
|
806
|
+
console.print(msg_line)
|
|
807
|
+
elif run_messages:
|
|
808
|
+
console.print(" Messages: skipped (disabled)")
|
|
809
|
+
|
|
810
|
+
if "chat_info" in results:
|
|
811
|
+
chat_removed = cleanup.get("removed_chats", 0)
|
|
812
|
+
chat_line = f" Chats: {results['chat_info']['done']} info indexed"
|
|
813
|
+
if chat_removed:
|
|
814
|
+
chat_line += f", {chat_removed} removed"
|
|
815
|
+
console.print(chat_line)
|
|
816
|
+
elif run_chat_info:
|
|
817
|
+
console.print(" Chats: skipped (disabled)")
|
|
818
|
+
elif do_chats:
|
|
819
|
+
console.print(" Chats: skipped (--source files)")
|
|
820
|
+
|
|
821
|
+
console.print()
|
|
822
|
+
finally:
|
|
823
|
+
if conn is not None:
|
|
824
|
+
conn.close()
|
|
825
|
+
# Restore original signal handlers
|
|
826
|
+
signal.signal(signal.SIGINT, old_sigint)
|
|
827
|
+
signal.signal(signal.SIGTERM, old_sigterm)
|