footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,602 @@
1
+ """Chat queries, write operations, and duplicate detection."""
2
+
3
+ import hashlib
4
+ import json
5
+ import sqlite3
6
+ from collections import defaultdict
7
+ from difflib import SequenceMatcher
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from footprinter.db.sql_utils import build_status_filter, paginate, paginated_response
11
+
12
+ SORT_WHITELIST = {"title", "account", "message_count", "created_at", "modified_at"}
13
+
14
+
15
+ def list_chats(
16
+ conn: sqlite3.Connection,
17
+ *,
18
+ account: Optional[str] = None,
19
+ query: Optional[str] = None,
20
+ sort_by: str = "modified_at",
21
+ order: str = "desc",
22
+ limit: int = 50,
23
+ page: int = 1,
24
+ status: Optional[str | list[str]] = None,
25
+ ) -> dict:
26
+ """List chats with filtering, sorting, and pagination.
27
+
28
+ Parameters
29
+ ----------
30
+ conn : sqlite3.Connection
31
+ account : optional account filter (e.g. 'claude', 'chatgpt')
32
+ query : optional title search (LIKE match)
33
+ sort_by : column to sort by (from SORT_WHITELIST)
34
+ order : 'asc' or 'desc'
35
+ limit : max rows per page
36
+ page : 1-based page number
37
+ status : str, list[str], or None
38
+ ``None`` → exclude merged and removed (default).
39
+ ``"all"`` → no status filter.
40
+ Single string → exact match.
41
+ List of strings → ``WHERE status IN (...)``.
42
+
43
+ Returns
44
+ -------
45
+ dict with keys: chats, pagination
46
+ """
47
+ sort_col = sort_by if sort_by in SORT_WHITELIST else "modified_at"
48
+ sort_col_sql = f"chat.{sort_col}"
49
+ order_sql = "ASC" if order.lower() == "asc" else "DESC"
50
+
51
+ conditions: list[str] = []
52
+ params: list = []
53
+
54
+ status_conds, status_params = build_status_filter(
55
+ status,
56
+ column="chat.status",
57
+ default_exclude=["merged", "removed"],
58
+ )
59
+ conditions.extend(status_conds)
60
+ params.extend(status_params)
61
+
62
+ if account:
63
+ conditions.append("chat.account = ?")
64
+ params.append(account)
65
+
66
+ if query:
67
+ conditions.append("chat.title LIKE ?")
68
+ params.append(f"%{query}%")
69
+
70
+ where = "WHERE " + " AND ".join(conditions) if conditions else ""
71
+
72
+ count_sql = f"SELECT COUNT(*) FROM chats chat {where}"
73
+ fetch_sql = f"""
74
+ SELECT chat.id, chat.external_id, chat.account, chat.title, chat.message_count,
75
+ chat.created_at, chat.modified_at, chat.status, chat.merged_into_id,
76
+ chat.mcp_view, chat.mcp_read
77
+ FROM chats chat
78
+ {where}
79
+ ORDER BY {sort_col_sql} {order_sql}
80
+ LIMIT ? OFFSET ?
81
+ """
82
+ rows, pagination = paginate(conn, count_sql, fetch_sql, params, page=page, limit=limit)
83
+
84
+ chats = [
85
+ {
86
+ "id": r["id"],
87
+ "external_id": r["external_id"],
88
+ "account": r["account"],
89
+ "title": r["title"],
90
+ "message_count": r["message_count"],
91
+ "created_at": r["created_at"],
92
+ "modified_at": r["modified_at"],
93
+ "status": r["status"],
94
+ "merged_into_id": r["merged_into_id"],
95
+ "mcp_view": r["mcp_view"],
96
+ "mcp_read": r["mcp_read"],
97
+ }
98
+ for r in rows
99
+ ]
100
+
101
+ return paginated_response("chats", chats, pagination)
102
+
103
+
104
+ def get_chat_detail(
105
+ conn: sqlite3.Connection,
106
+ chat_id: int,
107
+ ) -> Optional[dict]:
108
+ """Return chat metadata and messages for a single chat.
109
+
110
+ Parameters
111
+ ----------
112
+ conn : sqlite3.Connection
113
+ chat_id : internal integer ID
114
+
115
+ Returns
116
+ -------
117
+ dict with chat fields at top level and ``messages`` list, or None if not found
118
+ """
119
+ cursor = conn.execute(
120
+ """
121
+ SELECT chat.id, chat.external_id, chat.account, chat.title,
122
+ chat.summary, chat.message_count,
123
+ chat.created_at, chat.modified_at, chat.status, chat.merged_into_id,
124
+ chat.client_id, chat.project_id,
125
+ chat.mcp_view, chat.mcp_read,
126
+ project.project_name, client.name AS client_name
127
+ FROM chats chat
128
+ LEFT JOIN projects project ON chat.project_id = project.id
129
+ LEFT JOIN clients client ON chat.client_id = client.id
130
+ WHERE chat.id = ?
131
+ """,
132
+ (chat_id,),
133
+ )
134
+ row = cursor.fetchone()
135
+ if not row:
136
+ return None
137
+
138
+ chat = {
139
+ "id": row["id"],
140
+ "external_id": row["external_id"],
141
+ "account": row["account"],
142
+ "title": row["title"],
143
+ "summary": row["summary"],
144
+ "message_count": row["message_count"],
145
+ "created_at": row["created_at"],
146
+ "modified_at": row["modified_at"],
147
+ "status": row["status"],
148
+ "merged_into_id": row["merged_into_id"],
149
+ "client_id": row["client_id"],
150
+ "project_id": row["project_id"],
151
+ "project_name": row["project_name"],
152
+ "client_name": row["client_name"],
153
+ "mcp_view": row["mcp_view"] or "inherit",
154
+ "mcp_read": row["mcp_read"] or "inherit",
155
+ }
156
+
157
+ msg_cursor = conn.execute(
158
+ """
159
+ SELECT id, chat_id, message_id, role, content, created_at
160
+ FROM messages
161
+ WHERE chat_id = ?
162
+ ORDER BY id
163
+ """,
164
+ (chat_id,),
165
+ )
166
+ messages = [
167
+ {
168
+ "id": r["id"],
169
+ "chat_id": r["chat_id"],
170
+ "message_id": r["message_id"],
171
+ "role": r["role"],
172
+ "content": r["content"],
173
+ "created_at": r["created_at"],
174
+ }
175
+ for r in msg_cursor.fetchall()
176
+ ]
177
+
178
+ chat["messages"] = messages
179
+ return chat
180
+
181
+
182
+ def update_chat_relationships(
183
+ conn: sqlite3.Connection,
184
+ chat_id: int,
185
+ *,
186
+ project_id: Optional[int] = None,
187
+ client_id: Optional[int] = None,
188
+ ) -> Optional[bool]:
189
+ """Update project and/or client assignment on a chat.
190
+
191
+ Only updates fields that are passed (not None). Pass ``0`` to clear
192
+ a field (set to NULL). Stamps ``assignment_source = 'user'``
193
+ when the column exists (app-scope DBs only).
194
+ Returns True on success, None if chat not found.
195
+ """
196
+ cursor = conn.execute("SELECT id FROM chats WHERE id = ?", (chat_id,))
197
+ if cursor.fetchone() is None:
198
+ return None
199
+
200
+ if project_id is not None and project_id != 0:
201
+ proj = conn.execute("SELECT id FROM projects WHERE id = ?", (project_id,)).fetchone()
202
+ if not proj:
203
+ raise ValueError(f"No project with id {project_id}")
204
+ if client_id is not None and client_id != 0:
205
+ cli = conn.execute("SELECT id FROM clients WHERE id = ?", (client_id,)).fetchone()
206
+ if not cli:
207
+ raise ValueError(f"No client with id {client_id}")
208
+
209
+ sets: list[str] = []
210
+ params: list = []
211
+ if project_id is not None:
212
+ if project_id == 0:
213
+ sets.append("project_id = NULL")
214
+ else:
215
+ sets.append("project_id = ?")
216
+ params.append(project_id)
217
+ if client_id is not None:
218
+ if client_id == 0:
219
+ sets.append("client_id = NULL")
220
+ else:
221
+ sets.append("client_id = ?")
222
+ params.append(client_id)
223
+ if not sets:
224
+ return True
225
+
226
+ sets.append("assignment_source = 'user'")
227
+ params.append(chat_id)
228
+ try:
229
+ conn.execute(f"UPDATE chats SET {', '.join(sets)} WHERE id = ?", params)
230
+ except sqlite3.OperationalError as e:
231
+ if "no such column" not in str(e):
232
+ raise
233
+ # assignment_source not present (tool-only DB)
234
+ sets.pop()
235
+ conn.execute(f"UPDATE chats SET {', '.join(sets)} WHERE id = ?", params)
236
+ conn.commit()
237
+ return True
238
+
239
+
240
+ # ---------------------------------------------------------------------------
241
+ # Duplicate detection
242
+ # ---------------------------------------------------------------------------
243
+
244
+ _FUZZY_THRESHOLD = 0.85
245
+ _MESSAGE_OVERLAP_THRESHOLD = 0.50
246
+
247
+
248
+ def _get_active_chats(conn: sqlite3.Connection) -> list[dict]:
249
+ """All non-merged, non-removed chats for dedup scan."""
250
+ rows = conn.execute(
251
+ "SELECT id, external_id, account, title, message_count,"
252
+ " created_at, modified_at"
253
+ " FROM chats"
254
+ " WHERE status NOT IN ('merged', 'removed')"
255
+ " ORDER BY id"
256
+ ).fetchall()
257
+ return [dict(r) for r in rows]
258
+
259
+
260
+ def _get_message_hashes(conn: sqlite3.Connection, chat_id: int) -> list[str]:
261
+ """SHA-256 content hashes for a chat's messages."""
262
+ rows = conn.execute(
263
+ "SELECT content FROM messages WHERE chat_id = ? ORDER BY id",
264
+ (chat_id,),
265
+ ).fetchall()
266
+ return [hashlib.sha256((r["content"] or "").encode("utf-8")).hexdigest() for r in rows]
267
+
268
+
269
+ def _normalize_title(title: str | None) -> str:
270
+ if not title:
271
+ return ""
272
+ return title.strip().lower()
273
+
274
+
275
+ def detect_duplicates(
276
+ conn: sqlite3.Connection,
277
+ *,
278
+ fuzzy_threshold: float = _FUZZY_THRESHOLD,
279
+ overlap_threshold: float = _MESSAGE_OVERLAP_THRESHOLD,
280
+ ) -> list[dict]:
281
+ """Detect potential duplicate chats via three passes.
282
+
283
+ 1. Exact title match (normalized)
284
+ 2. Fuzzy title match (SequenceMatcher >= threshold)
285
+ 3. Message content overlap (SHA-256 hash intersection)
286
+
287
+ Returns list of dicts with keys: reason, confidence, chats, detail.
288
+ """
289
+ active_chats = _get_active_chats(conn)
290
+ if len(active_chats) < 2:
291
+ return []
292
+
293
+ groups: list[dict] = []
294
+ paired: set[tuple[int, int]] = set()
295
+ hash_cache: dict[int, list[str]] = {}
296
+
297
+ def _get_hashes(chat_id: int) -> list[str]:
298
+ if chat_id not in hash_cache:
299
+ hash_cache[chat_id] = _get_message_hashes(conn, chat_id)
300
+ return hash_cache[chat_id]
301
+
302
+ # Pass 1: Exact title
303
+ by_title: dict[str, list[dict]] = defaultdict(list)
304
+ for conv in active_chats:
305
+ norm = _normalize_title(conv["title"])
306
+ if norm:
307
+ by_title[norm].append(conv)
308
+
309
+ for title, convs in by_title.items():
310
+ if len(convs) >= 2:
311
+ groups.append(
312
+ {
313
+ "reason": "exact_title",
314
+ "confidence": "high",
315
+ "chats": convs,
316
+ "detail": f'Title: "{convs[0]["title"]}"',
317
+ }
318
+ )
319
+ for i in range(len(convs)):
320
+ for j in range(i + 1, len(convs)):
321
+ pair = (min(convs[i]["id"], convs[j]["id"]), max(convs[i]["id"], convs[j]["id"]))
322
+ paired.add(pair)
323
+
324
+ # Pass 2: Fuzzy title
325
+ for i in range(len(active_chats)):
326
+ for j in range(i + 1, len(active_chats)):
327
+ a, b = active_chats[i], active_chats[j]
328
+ pair = (min(a["id"], b["id"]), max(a["id"], b["id"]))
329
+ if pair in paired:
330
+ continue
331
+ title_a = _normalize_title(a["title"])
332
+ title_b = _normalize_title(b["title"])
333
+ if not title_a or not title_b:
334
+ continue
335
+ ratio = SequenceMatcher(None, title_a, title_b).ratio()
336
+ if ratio >= fuzzy_threshold:
337
+ groups.append(
338
+ {
339
+ "reason": "fuzzy_title",
340
+ "confidence": "medium",
341
+ "chats": [a, b],
342
+ "detail": f'"{a["title"]}" ≈ "{b["title"]}" ({ratio:.0%})',
343
+ }
344
+ )
345
+ paired.add(pair)
346
+
347
+ # Pass 3: Message overlap (same account only)
348
+ by_account: dict[str, list[dict]] = defaultdict(list)
349
+ for conv in active_chats:
350
+ by_account[conv["account"]].append(conv)
351
+
352
+ for account, convs in by_account.items():
353
+ for i in range(len(convs)):
354
+ for j in range(i + 1, len(convs)):
355
+ a, b = convs[i], convs[j]
356
+ pair = (min(a["id"], b["id"]), max(a["id"], b["id"]))
357
+ if pair in paired:
358
+ continue
359
+ hashes_a = set(_get_hashes(a["id"]))
360
+ hashes_b = set(_get_hashes(b["id"]))
361
+ if not hashes_a or not hashes_b:
362
+ continue
363
+ intersection = hashes_a & hashes_b
364
+ min_count = min(len(hashes_a), len(hashes_b))
365
+ overlap = len(intersection) / min_count
366
+ if overlap >= overlap_threshold:
367
+ groups.append(
368
+ {
369
+ "reason": "message_overlap",
370
+ "confidence": "high",
371
+ "chats": [a, b],
372
+ "detail": (f"{len(intersection)} shared messages ({overlap:.0%} overlap)"),
373
+ }
374
+ )
375
+ paired.add(pair)
376
+
377
+ return groups
378
+
379
+
380
+ # ---------------------------------------------------------------------------
381
+ # Write operations
382
+ # ---------------------------------------------------------------------------
383
+
384
+
385
+ def insert_chat(conn: sqlite3.Connection, conv_data: Dict[str, Any]) -> int:
386
+ """Insert or update a chat record, preserving the row id on conflict."""
387
+ cursor = conn.cursor()
388
+ params = (
389
+ conv_data["external_id"],
390
+ conv_data.get("account"),
391
+ conv_data.get("title"),
392
+ conv_data.get("summary"),
393
+ conv_data.get("created_at"),
394
+ conv_data.get("updated_at"),
395
+ conv_data.get("message_count", 0),
396
+ json.dumps(conv_data.get("metadata", {})),
397
+ )
398
+ cursor.execute(
399
+ """
400
+ INSERT INTO chats
401
+ (external_id, account, title, summary, created_at, modified_at,
402
+ message_count, metadata)
403
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
404
+ ON CONFLICT(external_id) DO UPDATE SET
405
+ account = excluded.account,
406
+ title = excluded.title,
407
+ summary = excluded.summary,
408
+ created_at = excluded.created_at,
409
+ modified_at = excluded.modified_at,
410
+ message_count = excluded.message_count,
411
+ metadata = excluded.metadata,
412
+ updated_at = CURRENT_TIMESTAMP
413
+ """,
414
+ params,
415
+ )
416
+ cursor.execute(
417
+ "SELECT id FROM chats WHERE external_id = ?",
418
+ (conv_data["external_id"],),
419
+ )
420
+ return cursor.fetchone()[0]
421
+
422
+
423
+ def insert_message(conn: sqlite3.Connection, msg_data: Dict[str, Any]) -> int:
424
+ """Insert a chat message record."""
425
+ cursor = conn.cursor()
426
+ cursor.execute(
427
+ """
428
+ INSERT INTO messages
429
+ (chat_id, message_id, role, content, created_at, metadata,
430
+ indexed_at, updated_at)
431
+ VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
432
+ """,
433
+ (
434
+ msg_data["chat_id"],
435
+ msg_data.get("message_id"),
436
+ msg_data["role"],
437
+ msg_data.get("content"),
438
+ msg_data.get("created_at"),
439
+ json.dumps(msg_data.get("metadata", {})),
440
+ ),
441
+ )
442
+ return cursor.lastrowid
443
+
444
+
445
+ def get_chat_id_by_uuid(conn: sqlite3.Connection, chat_uuid: str) -> Optional[int]:
446
+ """Get internal chat ID by external UUID."""
447
+ cursor = conn.cursor()
448
+ cursor.execute("SELECT id FROM chats WHERE external_id = ?", (chat_uuid,))
449
+ row = cursor.fetchone()
450
+ return row["id"] if row else None
451
+
452
+
453
+ def delete_chat_messages(conn: sqlite3.Connection, chat_id: int) -> int:
454
+ """Delete all messages for a chat (before re-import)."""
455
+ cursor = conn.cursor()
456
+ cursor.execute("DELETE FROM messages WHERE chat_id = ?", (chat_id,))
457
+ return cursor.rowcount
458
+
459
+
460
+ def get_all_active_chats(conn: sqlite3.Connection) -> List[Dict]:
461
+ """All non-merged chats (includes removed). Use ``detect_duplicates`` for dedup."""
462
+ cursor = conn.cursor()
463
+ cursor.execute(
464
+ """
465
+ SELECT id, external_id, account, title, message_count,
466
+ created_at, modified_at
467
+ FROM chats
468
+ WHERE status != 'merged'
469
+ ORDER BY id
470
+ """
471
+ )
472
+ return [dict(row) for row in cursor.fetchall()]
473
+
474
+
475
+ def get_chat_message_hashes(conn: sqlite3.Connection, chat_id: int) -> List[str]:
476
+ """SHA-256 content hashes for overlap detection."""
477
+ cursor = conn.cursor()
478
+ cursor.execute(
479
+ "SELECT content FROM messages WHERE chat_id = ? ORDER BY id",
480
+ (chat_id,),
481
+ )
482
+ return [hashlib.sha256((row["content"] or "").encode("utf-8")).hexdigest() for row in cursor.fetchall()]
483
+
484
+
485
+ def get_chat_messages(conn: sqlite3.Connection, chat_id: int) -> List[Dict]:
486
+ """All messages for a chat."""
487
+ cursor = conn.cursor()
488
+ cursor.execute(
489
+ """
490
+ SELECT id, chat_id, message_id, role, content, created_at
491
+ FROM messages
492
+ WHERE chat_id = ?
493
+ ORDER BY id
494
+ """,
495
+ (chat_id,),
496
+ )
497
+ return [dict(row) for row in cursor.fetchall()]
498
+
499
+
500
+ def get_chat_by_id(conn: sqlite3.Connection, chat_id: int) -> Optional[Dict]:
501
+ """Single chat lookup by internal ID."""
502
+ cursor = conn.cursor()
503
+ cursor.execute(
504
+ """
505
+ SELECT id, external_id, account, title, message_count,
506
+ created_at, modified_at, status, merged_into_id
507
+ FROM chats
508
+ WHERE id = ?
509
+ """,
510
+ (chat_id,),
511
+ )
512
+ row = cursor.fetchone()
513
+ return dict(row) if row else None
514
+
515
+
516
+ def mark_chat_merged(conn: sqlite3.Connection, chat_id: int, merged_into_id: int) -> None:
517
+ """Set status='merged' and record which chat it was merged into."""
518
+ cursor = conn.cursor()
519
+ cursor.execute(
520
+ """
521
+ UPDATE chats
522
+ SET status = 'merged', merged_into_id = ?
523
+ WHERE id = ?
524
+ """,
525
+ (merged_into_id, chat_id),
526
+ )
527
+
528
+
529
+ def move_messages_to_chat(conn: sqlite3.Connection, source_id: int, target_id: int, message_ids: List[int]) -> int:
530
+ """Move specific messages from source to target chat."""
531
+ if not message_ids:
532
+ return 0
533
+ cursor = conn.cursor()
534
+ placeholders = ",".join("?" for _ in message_ids)
535
+ cursor.execute(
536
+ f"""
537
+ UPDATE messages
538
+ SET chat_id = ?
539
+ WHERE id IN ({placeholders}) AND chat_id = ?
540
+ """,
541
+ [target_id] + message_ids + [source_id],
542
+ )
543
+ return cursor.rowcount
544
+
545
+
546
+ def update_chat_message_count(conn: sqlite3.Connection, chat_id: int) -> int:
547
+ """Recount messages from messages table and update chat record."""
548
+ cursor = conn.cursor()
549
+ cursor.execute(
550
+ "SELECT COUNT(*) FROM messages WHERE chat_id = ?",
551
+ (chat_id,),
552
+ )
553
+ count = cursor.fetchone()[0]
554
+ cursor.execute(
555
+ "UPDATE chats SET message_count = ? WHERE id = ?",
556
+ (count, chat_id),
557
+ )
558
+ return count
559
+
560
+
561
+ def list_chats_simple(
562
+ conn: sqlite3.Connection,
563
+ account: Optional[str] = None,
564
+ limit: int = 50,
565
+ status: Optional[str | list[str]] = None,
566
+ ) -> List[Dict]:
567
+ """List chats with optional account filter, excludes merged by default.
568
+
569
+ Returns a flat list of chat dicts (unlike the paginated ``list_chats``
570
+ used by the read API).
571
+ """
572
+ cursor = conn.cursor()
573
+ conditions: list[str] = []
574
+ params: list = []
575
+ if status is None:
576
+ conditions.append("status != 'merged'")
577
+ elif status == "all":
578
+ pass
579
+ elif isinstance(status, list) and status:
580
+ placeholders = ",".join("?" for _ in status)
581
+ conditions.append(f"status IN ({placeholders})")
582
+ params.extend(status)
583
+ else:
584
+ conditions.append("status = ?")
585
+ params.append(status)
586
+ if account:
587
+ conditions.append("account = ?")
588
+ params.append(account)
589
+ where = "WHERE " + " AND ".join(conditions) if conditions else ""
590
+ params.append(limit)
591
+ cursor.execute(
592
+ f"""
593
+ SELECT id, external_id, account, title, message_count,
594
+ created_at, modified_at, status, merged_into_id
595
+ FROM chats
596
+ {where}
597
+ ORDER BY modified_at DESC
598
+ LIMIT ?
599
+ """,
600
+ params,
601
+ )
602
+ return [dict(row) for row in cursor.fetchall()]