footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
footprinter/db/chats.py
ADDED
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
"""Chat queries, write operations, and duplicate detection."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import sqlite3
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from difflib import SequenceMatcher
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from footprinter.db.sql_utils import build_status_filter, paginate, paginated_response
|
|
11
|
+
|
|
12
|
+
SORT_WHITELIST = {"title", "account", "message_count", "created_at", "modified_at"}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def list_chats(
|
|
16
|
+
conn: sqlite3.Connection,
|
|
17
|
+
*,
|
|
18
|
+
account: Optional[str] = None,
|
|
19
|
+
query: Optional[str] = None,
|
|
20
|
+
sort_by: str = "modified_at",
|
|
21
|
+
order: str = "desc",
|
|
22
|
+
limit: int = 50,
|
|
23
|
+
page: int = 1,
|
|
24
|
+
status: Optional[str | list[str]] = None,
|
|
25
|
+
) -> dict:
|
|
26
|
+
"""List chats with filtering, sorting, and pagination.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
conn : sqlite3.Connection
|
|
31
|
+
account : optional account filter (e.g. 'claude', 'chatgpt')
|
|
32
|
+
query : optional title search (LIKE match)
|
|
33
|
+
sort_by : column to sort by (from SORT_WHITELIST)
|
|
34
|
+
order : 'asc' or 'desc'
|
|
35
|
+
limit : max rows per page
|
|
36
|
+
page : 1-based page number
|
|
37
|
+
status : str, list[str], or None
|
|
38
|
+
``None`` → exclude merged and removed (default).
|
|
39
|
+
``"all"`` → no status filter.
|
|
40
|
+
Single string → exact match.
|
|
41
|
+
List of strings → ``WHERE status IN (...)``.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
dict with keys: chats, pagination
|
|
46
|
+
"""
|
|
47
|
+
sort_col = sort_by if sort_by in SORT_WHITELIST else "modified_at"
|
|
48
|
+
sort_col_sql = f"chat.{sort_col}"
|
|
49
|
+
order_sql = "ASC" if order.lower() == "asc" else "DESC"
|
|
50
|
+
|
|
51
|
+
conditions: list[str] = []
|
|
52
|
+
params: list = []
|
|
53
|
+
|
|
54
|
+
status_conds, status_params = build_status_filter(
|
|
55
|
+
status,
|
|
56
|
+
column="chat.status",
|
|
57
|
+
default_exclude=["merged", "removed"],
|
|
58
|
+
)
|
|
59
|
+
conditions.extend(status_conds)
|
|
60
|
+
params.extend(status_params)
|
|
61
|
+
|
|
62
|
+
if account:
|
|
63
|
+
conditions.append("chat.account = ?")
|
|
64
|
+
params.append(account)
|
|
65
|
+
|
|
66
|
+
if query:
|
|
67
|
+
conditions.append("chat.title LIKE ?")
|
|
68
|
+
params.append(f"%{query}%")
|
|
69
|
+
|
|
70
|
+
where = "WHERE " + " AND ".join(conditions) if conditions else ""
|
|
71
|
+
|
|
72
|
+
count_sql = f"SELECT COUNT(*) FROM chats chat {where}"
|
|
73
|
+
fetch_sql = f"""
|
|
74
|
+
SELECT chat.id, chat.external_id, chat.account, chat.title, chat.message_count,
|
|
75
|
+
chat.created_at, chat.modified_at, chat.status, chat.merged_into_id,
|
|
76
|
+
chat.mcp_view, chat.mcp_read
|
|
77
|
+
FROM chats chat
|
|
78
|
+
{where}
|
|
79
|
+
ORDER BY {sort_col_sql} {order_sql}
|
|
80
|
+
LIMIT ? OFFSET ?
|
|
81
|
+
"""
|
|
82
|
+
rows, pagination = paginate(conn, count_sql, fetch_sql, params, page=page, limit=limit)
|
|
83
|
+
|
|
84
|
+
chats = [
|
|
85
|
+
{
|
|
86
|
+
"id": r["id"],
|
|
87
|
+
"external_id": r["external_id"],
|
|
88
|
+
"account": r["account"],
|
|
89
|
+
"title": r["title"],
|
|
90
|
+
"message_count": r["message_count"],
|
|
91
|
+
"created_at": r["created_at"],
|
|
92
|
+
"modified_at": r["modified_at"],
|
|
93
|
+
"status": r["status"],
|
|
94
|
+
"merged_into_id": r["merged_into_id"],
|
|
95
|
+
"mcp_view": r["mcp_view"],
|
|
96
|
+
"mcp_read": r["mcp_read"],
|
|
97
|
+
}
|
|
98
|
+
for r in rows
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
return paginated_response("chats", chats, pagination)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_chat_detail(
|
|
105
|
+
conn: sqlite3.Connection,
|
|
106
|
+
chat_id: int,
|
|
107
|
+
) -> Optional[dict]:
|
|
108
|
+
"""Return chat metadata and messages for a single chat.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
conn : sqlite3.Connection
|
|
113
|
+
chat_id : internal integer ID
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
dict with chat fields at top level and ``messages`` list, or None if not found
|
|
118
|
+
"""
|
|
119
|
+
cursor = conn.execute(
|
|
120
|
+
"""
|
|
121
|
+
SELECT chat.id, chat.external_id, chat.account, chat.title,
|
|
122
|
+
chat.summary, chat.message_count,
|
|
123
|
+
chat.created_at, chat.modified_at, chat.status, chat.merged_into_id,
|
|
124
|
+
chat.client_id, chat.project_id,
|
|
125
|
+
chat.mcp_view, chat.mcp_read,
|
|
126
|
+
project.project_name, client.name AS client_name
|
|
127
|
+
FROM chats chat
|
|
128
|
+
LEFT JOIN projects project ON chat.project_id = project.id
|
|
129
|
+
LEFT JOIN clients client ON chat.client_id = client.id
|
|
130
|
+
WHERE chat.id = ?
|
|
131
|
+
""",
|
|
132
|
+
(chat_id,),
|
|
133
|
+
)
|
|
134
|
+
row = cursor.fetchone()
|
|
135
|
+
if not row:
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
chat = {
|
|
139
|
+
"id": row["id"],
|
|
140
|
+
"external_id": row["external_id"],
|
|
141
|
+
"account": row["account"],
|
|
142
|
+
"title": row["title"],
|
|
143
|
+
"summary": row["summary"],
|
|
144
|
+
"message_count": row["message_count"],
|
|
145
|
+
"created_at": row["created_at"],
|
|
146
|
+
"modified_at": row["modified_at"],
|
|
147
|
+
"status": row["status"],
|
|
148
|
+
"merged_into_id": row["merged_into_id"],
|
|
149
|
+
"client_id": row["client_id"],
|
|
150
|
+
"project_id": row["project_id"],
|
|
151
|
+
"project_name": row["project_name"],
|
|
152
|
+
"client_name": row["client_name"],
|
|
153
|
+
"mcp_view": row["mcp_view"] or "inherit",
|
|
154
|
+
"mcp_read": row["mcp_read"] or "inherit",
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
msg_cursor = conn.execute(
|
|
158
|
+
"""
|
|
159
|
+
SELECT id, chat_id, message_id, role, content, created_at
|
|
160
|
+
FROM messages
|
|
161
|
+
WHERE chat_id = ?
|
|
162
|
+
ORDER BY id
|
|
163
|
+
""",
|
|
164
|
+
(chat_id,),
|
|
165
|
+
)
|
|
166
|
+
messages = [
|
|
167
|
+
{
|
|
168
|
+
"id": r["id"],
|
|
169
|
+
"chat_id": r["chat_id"],
|
|
170
|
+
"message_id": r["message_id"],
|
|
171
|
+
"role": r["role"],
|
|
172
|
+
"content": r["content"],
|
|
173
|
+
"created_at": r["created_at"],
|
|
174
|
+
}
|
|
175
|
+
for r in msg_cursor.fetchall()
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
chat["messages"] = messages
|
|
179
|
+
return chat
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def update_chat_relationships(
|
|
183
|
+
conn: sqlite3.Connection,
|
|
184
|
+
chat_id: int,
|
|
185
|
+
*,
|
|
186
|
+
project_id: Optional[int] = None,
|
|
187
|
+
client_id: Optional[int] = None,
|
|
188
|
+
) -> Optional[bool]:
|
|
189
|
+
"""Update project and/or client assignment on a chat.
|
|
190
|
+
|
|
191
|
+
Only updates fields that are passed (not None). Pass ``0`` to clear
|
|
192
|
+
a field (set to NULL). Stamps ``assignment_source = 'user'``
|
|
193
|
+
when the column exists (app-scope DBs only).
|
|
194
|
+
Returns True on success, None if chat not found.
|
|
195
|
+
"""
|
|
196
|
+
cursor = conn.execute("SELECT id FROM chats WHERE id = ?", (chat_id,))
|
|
197
|
+
if cursor.fetchone() is None:
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
if project_id is not None and project_id != 0:
|
|
201
|
+
proj = conn.execute("SELECT id FROM projects WHERE id = ?", (project_id,)).fetchone()
|
|
202
|
+
if not proj:
|
|
203
|
+
raise ValueError(f"No project with id {project_id}")
|
|
204
|
+
if client_id is not None and client_id != 0:
|
|
205
|
+
cli = conn.execute("SELECT id FROM clients WHERE id = ?", (client_id,)).fetchone()
|
|
206
|
+
if not cli:
|
|
207
|
+
raise ValueError(f"No client with id {client_id}")
|
|
208
|
+
|
|
209
|
+
sets: list[str] = []
|
|
210
|
+
params: list = []
|
|
211
|
+
if project_id is not None:
|
|
212
|
+
if project_id == 0:
|
|
213
|
+
sets.append("project_id = NULL")
|
|
214
|
+
else:
|
|
215
|
+
sets.append("project_id = ?")
|
|
216
|
+
params.append(project_id)
|
|
217
|
+
if client_id is not None:
|
|
218
|
+
if client_id == 0:
|
|
219
|
+
sets.append("client_id = NULL")
|
|
220
|
+
else:
|
|
221
|
+
sets.append("client_id = ?")
|
|
222
|
+
params.append(client_id)
|
|
223
|
+
if not sets:
|
|
224
|
+
return True
|
|
225
|
+
|
|
226
|
+
sets.append("assignment_source = 'user'")
|
|
227
|
+
params.append(chat_id)
|
|
228
|
+
try:
|
|
229
|
+
conn.execute(f"UPDATE chats SET {', '.join(sets)} WHERE id = ?", params)
|
|
230
|
+
except sqlite3.OperationalError as e:
|
|
231
|
+
if "no such column" not in str(e):
|
|
232
|
+
raise
|
|
233
|
+
# assignment_source not present (tool-only DB)
|
|
234
|
+
sets.pop()
|
|
235
|
+
conn.execute(f"UPDATE chats SET {', '.join(sets)} WHERE id = ?", params)
|
|
236
|
+
conn.commit()
|
|
237
|
+
return True
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# ---------------------------------------------------------------------------
|
|
241
|
+
# Duplicate detection
|
|
242
|
+
# ---------------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
_FUZZY_THRESHOLD = 0.85
|
|
245
|
+
_MESSAGE_OVERLAP_THRESHOLD = 0.50
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _get_active_chats(conn: sqlite3.Connection) -> list[dict]:
|
|
249
|
+
"""All non-merged, non-removed chats for dedup scan."""
|
|
250
|
+
rows = conn.execute(
|
|
251
|
+
"SELECT id, external_id, account, title, message_count,"
|
|
252
|
+
" created_at, modified_at"
|
|
253
|
+
" FROM chats"
|
|
254
|
+
" WHERE status NOT IN ('merged', 'removed')"
|
|
255
|
+
" ORDER BY id"
|
|
256
|
+
).fetchall()
|
|
257
|
+
return [dict(r) for r in rows]
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _get_message_hashes(conn: sqlite3.Connection, chat_id: int) -> list[str]:
|
|
261
|
+
"""SHA-256 content hashes for a chat's messages."""
|
|
262
|
+
rows = conn.execute(
|
|
263
|
+
"SELECT content FROM messages WHERE chat_id = ? ORDER BY id",
|
|
264
|
+
(chat_id,),
|
|
265
|
+
).fetchall()
|
|
266
|
+
return [hashlib.sha256((r["content"] or "").encode("utf-8")).hexdigest() for r in rows]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _normalize_title(title: str | None) -> str:
|
|
270
|
+
if not title:
|
|
271
|
+
return ""
|
|
272
|
+
return title.strip().lower()
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def detect_duplicates(
|
|
276
|
+
conn: sqlite3.Connection,
|
|
277
|
+
*,
|
|
278
|
+
fuzzy_threshold: float = _FUZZY_THRESHOLD,
|
|
279
|
+
overlap_threshold: float = _MESSAGE_OVERLAP_THRESHOLD,
|
|
280
|
+
) -> list[dict]:
|
|
281
|
+
"""Detect potential duplicate chats via three passes.
|
|
282
|
+
|
|
283
|
+
1. Exact title match (normalized)
|
|
284
|
+
2. Fuzzy title match (SequenceMatcher >= threshold)
|
|
285
|
+
3. Message content overlap (SHA-256 hash intersection)
|
|
286
|
+
|
|
287
|
+
Returns list of dicts with keys: reason, confidence, chats, detail.
|
|
288
|
+
"""
|
|
289
|
+
active_chats = _get_active_chats(conn)
|
|
290
|
+
if len(active_chats) < 2:
|
|
291
|
+
return []
|
|
292
|
+
|
|
293
|
+
groups: list[dict] = []
|
|
294
|
+
paired: set[tuple[int, int]] = set()
|
|
295
|
+
hash_cache: dict[int, list[str]] = {}
|
|
296
|
+
|
|
297
|
+
def _get_hashes(chat_id: int) -> list[str]:
|
|
298
|
+
if chat_id not in hash_cache:
|
|
299
|
+
hash_cache[chat_id] = _get_message_hashes(conn, chat_id)
|
|
300
|
+
return hash_cache[chat_id]
|
|
301
|
+
|
|
302
|
+
# Pass 1: Exact title
|
|
303
|
+
by_title: dict[str, list[dict]] = defaultdict(list)
|
|
304
|
+
for conv in active_chats:
|
|
305
|
+
norm = _normalize_title(conv["title"])
|
|
306
|
+
if norm:
|
|
307
|
+
by_title[norm].append(conv)
|
|
308
|
+
|
|
309
|
+
for title, convs in by_title.items():
|
|
310
|
+
if len(convs) >= 2:
|
|
311
|
+
groups.append(
|
|
312
|
+
{
|
|
313
|
+
"reason": "exact_title",
|
|
314
|
+
"confidence": "high",
|
|
315
|
+
"chats": convs,
|
|
316
|
+
"detail": f'Title: "{convs[0]["title"]}"',
|
|
317
|
+
}
|
|
318
|
+
)
|
|
319
|
+
for i in range(len(convs)):
|
|
320
|
+
for j in range(i + 1, len(convs)):
|
|
321
|
+
pair = (min(convs[i]["id"], convs[j]["id"]), max(convs[i]["id"], convs[j]["id"]))
|
|
322
|
+
paired.add(pair)
|
|
323
|
+
|
|
324
|
+
# Pass 2: Fuzzy title
|
|
325
|
+
for i in range(len(active_chats)):
|
|
326
|
+
for j in range(i + 1, len(active_chats)):
|
|
327
|
+
a, b = active_chats[i], active_chats[j]
|
|
328
|
+
pair = (min(a["id"], b["id"]), max(a["id"], b["id"]))
|
|
329
|
+
if pair in paired:
|
|
330
|
+
continue
|
|
331
|
+
title_a = _normalize_title(a["title"])
|
|
332
|
+
title_b = _normalize_title(b["title"])
|
|
333
|
+
if not title_a or not title_b:
|
|
334
|
+
continue
|
|
335
|
+
ratio = SequenceMatcher(None, title_a, title_b).ratio()
|
|
336
|
+
if ratio >= fuzzy_threshold:
|
|
337
|
+
groups.append(
|
|
338
|
+
{
|
|
339
|
+
"reason": "fuzzy_title",
|
|
340
|
+
"confidence": "medium",
|
|
341
|
+
"chats": [a, b],
|
|
342
|
+
"detail": f'"{a["title"]}" ≈ "{b["title"]}" ({ratio:.0%})',
|
|
343
|
+
}
|
|
344
|
+
)
|
|
345
|
+
paired.add(pair)
|
|
346
|
+
|
|
347
|
+
# Pass 3: Message overlap (same account only)
|
|
348
|
+
by_account: dict[str, list[dict]] = defaultdict(list)
|
|
349
|
+
for conv in active_chats:
|
|
350
|
+
by_account[conv["account"]].append(conv)
|
|
351
|
+
|
|
352
|
+
for account, convs in by_account.items():
|
|
353
|
+
for i in range(len(convs)):
|
|
354
|
+
for j in range(i + 1, len(convs)):
|
|
355
|
+
a, b = convs[i], convs[j]
|
|
356
|
+
pair = (min(a["id"], b["id"]), max(a["id"], b["id"]))
|
|
357
|
+
if pair in paired:
|
|
358
|
+
continue
|
|
359
|
+
hashes_a = set(_get_hashes(a["id"]))
|
|
360
|
+
hashes_b = set(_get_hashes(b["id"]))
|
|
361
|
+
if not hashes_a or not hashes_b:
|
|
362
|
+
continue
|
|
363
|
+
intersection = hashes_a & hashes_b
|
|
364
|
+
min_count = min(len(hashes_a), len(hashes_b))
|
|
365
|
+
overlap = len(intersection) / min_count
|
|
366
|
+
if overlap >= overlap_threshold:
|
|
367
|
+
groups.append(
|
|
368
|
+
{
|
|
369
|
+
"reason": "message_overlap",
|
|
370
|
+
"confidence": "high",
|
|
371
|
+
"chats": [a, b],
|
|
372
|
+
"detail": (f"{len(intersection)} shared messages ({overlap:.0%} overlap)"),
|
|
373
|
+
}
|
|
374
|
+
)
|
|
375
|
+
paired.add(pair)
|
|
376
|
+
|
|
377
|
+
return groups
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
# ---------------------------------------------------------------------------
|
|
381
|
+
# Write operations
|
|
382
|
+
# ---------------------------------------------------------------------------
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def insert_chat(conn: sqlite3.Connection, conv_data: Dict[str, Any]) -> int:
|
|
386
|
+
"""Insert or update a chat record, preserving the row id on conflict.
|
|
387
|
+
|
|
388
|
+
Populates ``status`` ('active' unless overridden) and ``indexed_at`` on
|
|
389
|
+
insert so downstream MCP filters treat new rows as visible even on legacy
|
|
390
|
+
schemas that lack column DEFAULTs. On conflict, ``status`` and
|
|
391
|
+
``indexed_at`` are preserved — a re-import must not reset a user-set
|
|
392
|
+
'hidden' or bump the first-seen timestamp.
|
|
393
|
+
"""
|
|
394
|
+
cursor = conn.cursor()
|
|
395
|
+
params = (
|
|
396
|
+
conv_data["external_id"],
|
|
397
|
+
conv_data.get("account"),
|
|
398
|
+
conv_data.get("title"),
|
|
399
|
+
conv_data.get("summary"),
|
|
400
|
+
conv_data.get("created_at"),
|
|
401
|
+
conv_data.get("updated_at"),
|
|
402
|
+
conv_data.get("message_count", 0),
|
|
403
|
+
json.dumps(conv_data.get("metadata", {})),
|
|
404
|
+
conv_data.get("status", "active"),
|
|
405
|
+
)
|
|
406
|
+
cursor.execute(
|
|
407
|
+
"""
|
|
408
|
+
INSERT INTO chats
|
|
409
|
+
(external_id, account, title, summary, created_at, modified_at,
|
|
410
|
+
message_count, metadata, status, indexed_at)
|
|
411
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
|
|
412
|
+
ON CONFLICT(external_id) DO UPDATE SET
|
|
413
|
+
account = excluded.account,
|
|
414
|
+
title = excluded.title,
|
|
415
|
+
summary = excluded.summary,
|
|
416
|
+
created_at = excluded.created_at,
|
|
417
|
+
modified_at = excluded.modified_at,
|
|
418
|
+
message_count = excluded.message_count,
|
|
419
|
+
metadata = excluded.metadata,
|
|
420
|
+
updated_at = CURRENT_TIMESTAMP
|
|
421
|
+
""",
|
|
422
|
+
params,
|
|
423
|
+
)
|
|
424
|
+
cursor.execute(
|
|
425
|
+
"SELECT id FROM chats WHERE external_id = ?",
|
|
426
|
+
(conv_data["external_id"],),
|
|
427
|
+
)
|
|
428
|
+
return cursor.fetchone()[0]
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def insert_message(conn: sqlite3.Connection, msg_data: Dict[str, Any]) -> int:
|
|
432
|
+
"""Insert a chat message record."""
|
|
433
|
+
cursor = conn.cursor()
|
|
434
|
+
cursor.execute(
|
|
435
|
+
"""
|
|
436
|
+
INSERT INTO messages
|
|
437
|
+
(chat_id, message_id, role, content, created_at, metadata,
|
|
438
|
+
indexed_at, updated_at)
|
|
439
|
+
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
|
440
|
+
""",
|
|
441
|
+
(
|
|
442
|
+
msg_data["chat_id"],
|
|
443
|
+
msg_data.get("message_id"),
|
|
444
|
+
msg_data["role"],
|
|
445
|
+
msg_data.get("content"),
|
|
446
|
+
msg_data.get("created_at"),
|
|
447
|
+
json.dumps(msg_data.get("metadata", {})),
|
|
448
|
+
),
|
|
449
|
+
)
|
|
450
|
+
return cursor.lastrowid
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def get_chat_id_by_uuid(conn: sqlite3.Connection, chat_uuid: str) -> Optional[int]:
|
|
454
|
+
"""Get internal chat ID by external UUID."""
|
|
455
|
+
cursor = conn.cursor()
|
|
456
|
+
cursor.execute("SELECT id FROM chats WHERE external_id = ?", (chat_uuid,))
|
|
457
|
+
row = cursor.fetchone()
|
|
458
|
+
return row["id"] if row else None
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def delete_chat_messages(conn: sqlite3.Connection, chat_id: int) -> int:
|
|
462
|
+
"""Delete all messages for a chat (before re-import)."""
|
|
463
|
+
cursor = conn.cursor()
|
|
464
|
+
cursor.execute("DELETE FROM messages WHERE chat_id = ?", (chat_id,))
|
|
465
|
+
return cursor.rowcount
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def get_all_active_chats(conn: sqlite3.Connection) -> List[Dict]:
|
|
469
|
+
"""All non-merged chats (includes removed). Use ``detect_duplicates`` for dedup."""
|
|
470
|
+
cursor = conn.cursor()
|
|
471
|
+
cursor.execute(
|
|
472
|
+
"""
|
|
473
|
+
SELECT id, external_id, account, title, message_count,
|
|
474
|
+
created_at, modified_at
|
|
475
|
+
FROM chats
|
|
476
|
+
WHERE status != 'merged'
|
|
477
|
+
ORDER BY id
|
|
478
|
+
"""
|
|
479
|
+
)
|
|
480
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def get_chat_message_hashes(conn: sqlite3.Connection, chat_id: int) -> List[str]:
|
|
484
|
+
"""SHA-256 content hashes for overlap detection."""
|
|
485
|
+
cursor = conn.cursor()
|
|
486
|
+
cursor.execute(
|
|
487
|
+
"SELECT content FROM messages WHERE chat_id = ? ORDER BY id",
|
|
488
|
+
(chat_id,),
|
|
489
|
+
)
|
|
490
|
+
return [hashlib.sha256((row["content"] or "").encode("utf-8")).hexdigest() for row in cursor.fetchall()]
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def get_chat_messages(conn: sqlite3.Connection, chat_id: int) -> List[Dict]:
|
|
494
|
+
"""All messages for a chat."""
|
|
495
|
+
cursor = conn.cursor()
|
|
496
|
+
cursor.execute(
|
|
497
|
+
"""
|
|
498
|
+
SELECT id, chat_id, message_id, role, content, created_at
|
|
499
|
+
FROM messages
|
|
500
|
+
WHERE chat_id = ?
|
|
501
|
+
ORDER BY id
|
|
502
|
+
""",
|
|
503
|
+
(chat_id,),
|
|
504
|
+
)
|
|
505
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def get_chat_by_id(conn: sqlite3.Connection, chat_id: int) -> Optional[Dict]:
|
|
509
|
+
"""Single chat lookup by internal ID."""
|
|
510
|
+
cursor = conn.cursor()
|
|
511
|
+
cursor.execute(
|
|
512
|
+
"""
|
|
513
|
+
SELECT id, external_id, account, title, message_count,
|
|
514
|
+
created_at, modified_at, status, merged_into_id
|
|
515
|
+
FROM chats
|
|
516
|
+
WHERE id = ?
|
|
517
|
+
""",
|
|
518
|
+
(chat_id,),
|
|
519
|
+
)
|
|
520
|
+
row = cursor.fetchone()
|
|
521
|
+
return dict(row) if row else None
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def mark_chat_merged(conn: sqlite3.Connection, chat_id: int, merged_into_id: int) -> None:
|
|
525
|
+
"""Set status='merged' and record which chat it was merged into."""
|
|
526
|
+
cursor = conn.cursor()
|
|
527
|
+
cursor.execute(
|
|
528
|
+
"""
|
|
529
|
+
UPDATE chats
|
|
530
|
+
SET status = 'merged', merged_into_id = ?
|
|
531
|
+
WHERE id = ?
|
|
532
|
+
""",
|
|
533
|
+
(merged_into_id, chat_id),
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def move_messages_to_chat(conn: sqlite3.Connection, source_id: int, target_id: int, message_ids: List[int]) -> int:
|
|
538
|
+
"""Move specific messages from source to target chat."""
|
|
539
|
+
if not message_ids:
|
|
540
|
+
return 0
|
|
541
|
+
cursor = conn.cursor()
|
|
542
|
+
placeholders = ",".join("?" for _ in message_ids)
|
|
543
|
+
cursor.execute(
|
|
544
|
+
f"""
|
|
545
|
+
UPDATE messages
|
|
546
|
+
SET chat_id = ?
|
|
547
|
+
WHERE id IN ({placeholders}) AND chat_id = ?
|
|
548
|
+
""",
|
|
549
|
+
[target_id] + message_ids + [source_id],
|
|
550
|
+
)
|
|
551
|
+
return cursor.rowcount
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def update_chat_message_count(conn: sqlite3.Connection, chat_id: int) -> int:
|
|
555
|
+
"""Recount messages from messages table and update chat record."""
|
|
556
|
+
cursor = conn.cursor()
|
|
557
|
+
cursor.execute(
|
|
558
|
+
"SELECT COUNT(*) FROM messages WHERE chat_id = ?",
|
|
559
|
+
(chat_id,),
|
|
560
|
+
)
|
|
561
|
+
count = cursor.fetchone()[0]
|
|
562
|
+
cursor.execute(
|
|
563
|
+
"UPDATE chats SET message_count = ? WHERE id = ?",
|
|
564
|
+
(count, chat_id),
|
|
565
|
+
)
|
|
566
|
+
return count
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def list_chats_simple(
|
|
570
|
+
conn: sqlite3.Connection,
|
|
571
|
+
account: Optional[str] = None,
|
|
572
|
+
limit: int = 50,
|
|
573
|
+
status: Optional[str | list[str]] = None,
|
|
574
|
+
) -> List[Dict]:
|
|
575
|
+
"""List chats with optional account filter, excludes merged by default.
|
|
576
|
+
|
|
577
|
+
Returns a flat list of chat dicts (unlike the paginated ``list_chats``
|
|
578
|
+
used by the read API).
|
|
579
|
+
"""
|
|
580
|
+
cursor = conn.cursor()
|
|
581
|
+
conditions: list[str] = []
|
|
582
|
+
params: list = []
|
|
583
|
+
if status is None:
|
|
584
|
+
conditions.append("status != 'merged'")
|
|
585
|
+
elif status == "all":
|
|
586
|
+
pass
|
|
587
|
+
elif isinstance(status, list) and status:
|
|
588
|
+
placeholders = ",".join("?" for _ in status)
|
|
589
|
+
conditions.append(f"status IN ({placeholders})")
|
|
590
|
+
params.extend(status)
|
|
591
|
+
else:
|
|
592
|
+
conditions.append("status = ?")
|
|
593
|
+
params.append(status)
|
|
594
|
+
if account:
|
|
595
|
+
conditions.append("account = ?")
|
|
596
|
+
params.append(account)
|
|
597
|
+
where = "WHERE " + " AND ".join(conditions) if conditions else ""
|
|
598
|
+
params.append(limit)
|
|
599
|
+
cursor.execute(
|
|
600
|
+
f"""
|
|
601
|
+
SELECT id, external_id, account, title, message_count,
|
|
602
|
+
created_at, modified_at, status, merged_into_id
|
|
603
|
+
FROM chats
|
|
604
|
+
{where}
|
|
605
|
+
ORDER BY modified_at DESC
|
|
606
|
+
LIMIT ?
|
|
607
|
+
""",
|
|
608
|
+
params,
|
|
609
|
+
)
|
|
610
|
+
return [dict(row) for row in cursor.fetchall()]
|