footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,515 @@
1
+ """
2
+ Chat indexer for importing and querying AI chat exports.
3
+
4
+ Usage:
5
+ python -m footprinter.ingest.chat_indexer upload ~/Downloads/claude-export.zip
6
+ python -m footprinter.ingest.chat_indexer stats
7
+ python -m footprinter.ingest.chat_indexer history
8
+ """
9
+
10
+ import argparse
11
+ import json
12
+ import logging
13
+ import sys
14
+ import tempfile
15
+ import zipfile
16
+ from contextlib import nullcontext
17
+ from pathlib import Path
18
+ from typing import Dict, Optional, Tuple
19
+
20
+ from rich.console import Console
21
+ from rich.progress import track
22
+
23
+ from footprinter.db import chats as chats_db
24
+ from footprinter.db import uploads as uploads_db
25
+ from footprinter.semantic.vector_store import _chat_vectorization_enabled
26
+
27
+ from ..utils.hash_utils import compute_sha256
28
+ from ..utils.time import utc_now_iso
29
+ from .chat_parsers import ChatGPTParser, ClaudeParser
30
+ from .database import Database
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Security limits for zip processing
35
+ MAX_DECOMPRESSED_SIZE = 1_073_741_824 # 1 GB
36
+ MAX_ZIP_ENTRIES = 10_000
37
+ MAX_COMPRESSION_RATIO = 100 # 100:1
38
+
39
+
40
+ class ChatIndexer:
41
+ """Manager for importing and querying chat history."""
42
+
43
+ def __init__(self, db: Database):
44
+ self.db = db
45
+ self._vector_store = None # lazy
46
+
47
+ def _get_vector_store(self):
48
+ if self._vector_store is None:
49
+ try:
50
+ from footprinter.semantic.vector_store import VectorStore
51
+
52
+ self._vector_store = VectorStore.get_instance()
53
+ except (ImportError, Exception):
54
+ self._vector_store = False # sentinel: don't retry
55
+ return self._vector_store if self._vector_store is not False else None
56
+
57
+ def _vectorize_message(self, msg_id, chat_id, msg, conv_data):
58
+ if not _chat_vectorization_enabled():
59
+ return
60
+ # Check per-record vectorize flag
61
+ row = self.db.conn.execute(
62
+ "SELECT COALESCE(json_extract(metadata, '$.vectorize'), 1) as vec FROM messages WHERE id = ?",
63
+ (msg_id,),
64
+ ).fetchone()
65
+ if row and row["vec"] == 0:
66
+ return
67
+ store = self._get_vector_store()
68
+ if not store or not msg.get("content"):
69
+ return
70
+ try:
71
+ metadata = {
72
+ "source": conv_data.get("source", "unknown"),
73
+ "role": msg.get("role", "unknown"),
74
+ "chat_title": (conv_data.get("title") or "(untitled)")[:200],
75
+ "created_at": msg.get("created_at", ""),
76
+ "message_position": 0,
77
+ }
78
+ store.upsert_chat_message(
79
+ message_id=msg_id,
80
+ chat_id=chat_id,
81
+ content=msg["content"],
82
+ metadata=metadata,
83
+ )
84
+ self.db.conn.execute(
85
+ "UPDATE messages SET vectorized_at = CURRENT_TIMESTAMP WHERE id = ?",
86
+ (msg_id,),
87
+ )
88
+ except Exception as e:
89
+ logger.debug(f"Chat message vectorization skipped for msg {msg_id}: {e}")
90
+
91
+ def _vectorize_chat_info(self, chat_id, conv_data):
92
+ if not _chat_vectorization_enabled():
93
+ return
94
+ # Check per-record vectorize flag
95
+ row = self.db.conn.execute(
96
+ "SELECT COALESCE(json_extract(metadata, '$.vectorize'), 1) as vec FROM chats WHERE id = ?",
97
+ (chat_id,),
98
+ ).fetchone()
99
+ if row and row["vec"] == 0:
100
+ return
101
+ store = self._get_vector_store()
102
+ if not store:
103
+ return
104
+ try:
105
+ store.index_chat_info(
106
+ chat_id=chat_id,
107
+ title=conv_data.get("title"),
108
+ summary=conv_data.get("summary"),
109
+ source=conv_data.get("source", "unknown"),
110
+ created_at=conv_data.get("created_at", ""),
111
+ message_count=conv_data.get("message_count", 0),
112
+ )
113
+ self.db.conn.execute(
114
+ "UPDATE chats SET metadata_vectorized_at = CURRENT_TIMESTAMP WHERE id = ?",
115
+ (chat_id,),
116
+ )
117
+ except Exception as e:
118
+ logger.debug(f"Chat info vectorization skipped for {chat_id}: {e}")
119
+
120
+ def _validate_zip(self, zf: zipfile.ZipFile, extract_dir: Path) -> None:
121
+ """Validate zip contents for path traversal, size, and compression ratio."""
122
+ entries = zf.infolist()
123
+
124
+ if len(entries) > MAX_ZIP_ENTRIES:
125
+ raise ValueError(f"Zip contains {len(entries)} entries, exceeds limit of {MAX_ZIP_ENTRIES}")
126
+
127
+ extract_root = extract_dir.resolve()
128
+ total_decompressed = 0
129
+ total_compressed = 0
130
+
131
+ for info in entries:
132
+ # Reject absolute paths
133
+ if info.filename.startswith("/"):
134
+ raise ValueError(f"Zip contains absolute path: {info.filename}")
135
+
136
+ # Reject path traversal
137
+ target = (extract_dir / info.filename).resolve()
138
+ if not str(target).startswith(str(extract_root)):
139
+ raise ValueError(f"Zip contains path traversal: {info.filename}")
140
+
141
+ total_decompressed += info.file_size
142
+ total_compressed += info.compress_size
143
+
144
+ if total_decompressed > MAX_DECOMPRESSED_SIZE:
145
+ raise ValueError(
146
+ f"Zip decompressed size {total_decompressed} bytes exceeds limit of {MAX_DECOMPRESSED_SIZE} bytes"
147
+ )
148
+
149
+ if total_compressed > 0:
150
+ ratio = total_decompressed / total_compressed
151
+ if ratio > MAX_COMPRESSION_RATIO:
152
+ raise ValueError(f"Zip compression ratio {ratio:.1f}:1 exceeds limit of {MAX_COMPRESSION_RATIO}:1")
153
+
154
+ def upload(self, file_path: Path, console: Optional[Console] = None) -> Dict:
155
+ """
156
+ Upload and import a chat export (zip or directory).
157
+
158
+ Accepts a .zip file or extracted directory. Zip files are hashed
159
+ to prevent duplicate imports. Chats and messages are
160
+ deduplicated on import.
161
+
162
+ Args:
163
+ file_path: Path to .zip file or extracted directory
164
+ console: Optional rich Console for progress UI. None = silent.
165
+
166
+ Returns:
167
+ Dict with upload statistics
168
+ """
169
+ file_path = Path(file_path)
170
+ if not file_path.exists():
171
+ raise FileNotFoundError(f"File not found: {file_path}")
172
+
173
+ is_zip = file_path.suffix.lower() == ".zip"
174
+
175
+ if is_zip:
176
+ hash_ctx = console.status("Computing hash…") if console else nullcontext()
177
+ with hash_ctx:
178
+ file_hash = compute_sha256(str(file_path))
179
+ if not file_hash:
180
+ raise ValueError(f"Could not compute hash for {file_path}")
181
+
182
+ existing = uploads_db.get_upload_by_hash(self.db.conn, file_hash)
183
+ if existing:
184
+ logger.info(f"File already uploaded on {existing['uploaded_at']}")
185
+ return {
186
+ "status": "duplicate",
187
+ "upload_id": existing["id"],
188
+ "previous_upload": existing,
189
+ }
190
+
191
+ file_size = file_path.stat().st_size
192
+
193
+ with tempfile.TemporaryDirectory() as tmpdir:
194
+ extract_dir = Path(tmpdir) / "extract"
195
+ with zipfile.ZipFile(file_path, "r") as zf:
196
+ self._validate_zip(zf, extract_dir)
197
+ zf.extractall(extract_dir)
198
+
199
+ source, extract_dir = self._detect_source(extract_dir)
200
+
201
+ upload_id = uploads_db.create_upload(
202
+ self.db.conn,
203
+ {
204
+ "filename": file_path.name,
205
+ "file_hash": file_hash,
206
+ "file_size": file_size,
207
+ "type": "chat",
208
+ "source": source,
209
+ "status": "processing",
210
+ },
211
+ )
212
+
213
+ try:
214
+ result = self._import_with_dedup(extract_dir, source, console=console)
215
+ uploads_db.update_upload(
216
+ self.db.conn,
217
+ upload_id,
218
+ status="completed",
219
+ completed_at=utc_now_iso(),
220
+ items_added=result["chats_added"],
221
+ items_updated=result["chats_updated"],
222
+ items_total=result["chats_added"] + result["chats_updated"],
223
+ )
224
+ result["upload_id"] = upload_id
225
+ result["status"] = "completed"
226
+ return result
227
+ except Exception as e:
228
+ uploads_db.update_upload(
229
+ self.db.conn,
230
+ upload_id,
231
+ status="failed",
232
+ error_message=str(e),
233
+ completed_at=utc_now_iso(),
234
+ )
235
+ raise
236
+ else:
237
+ # Directory import (alternative to single-file)
238
+ source, file_path = self._detect_source(file_path)
239
+ return self._import_with_dedup(file_path, source, console=console)
240
+
241
+ def _detect_source(self, extract_dir: Path) -> Tuple[str, Path]:
242
+ """Detect chat export source from directory contents.
243
+
244
+ Returns:
245
+ Tuple of (source, resolved_dir) where resolved_dir contains conversations.json
246
+ """
247
+ conv_file = extract_dir / "conversations.json"
248
+ if not conv_file.exists():
249
+ # Search one level deep for subdirectory-wrapped exports
250
+ for child in extract_dir.iterdir():
251
+ if child.is_dir() and (child / "conversations.json").exists():
252
+ conv_file = child / "conversations.json"
253
+ extract_dir = child
254
+ break
255
+ else:
256
+ raise ValueError("conversations.json not found in export")
257
+
258
+ with open(conv_file, "r", encoding="utf-8") as f:
259
+ data = json.load(f)
260
+
261
+ if not isinstance(data, list) or len(data) == 0:
262
+ raise ValueError("conversations.json is empty or invalid")
263
+
264
+ first = data[0]
265
+ if "uuid" in first and "chat_messages" in first:
266
+ return ("claude", extract_dir)
267
+ if "mapping" in first:
268
+ return ("chatgpt", extract_dir)
269
+
270
+ raise ValueError("Unknown export format")
271
+
272
+ def _import_with_dedup(
273
+ self, export_dir: Path, source: str, console: Optional[Console] = None
274
+ ) -> Dict:
275
+ """Import chats with message deduplication."""
276
+ if source == "claude":
277
+ parser = ClaudeParser(export_dir)
278
+ elif source == "chatgpt":
279
+ conv_file = export_dir / "conversations.json"
280
+ parser = ChatGPTParser(conv_file)
281
+ else:
282
+ raise ValueError(f"Unknown source: {source}")
283
+
284
+ stats = parser.get_stats()
285
+ logger.info(f"{source.capitalize()} export contains:")
286
+ logger.info(f" {stats['total_chats']} chats")
287
+ logger.info(f" {stats['chats_with_messages']} with messages")
288
+ logger.info(f" {stats['total_messages']} total messages")
289
+ logger.info(f" Date range: {stats['earliest_chat']} to {stats['latest_chat']}")
290
+
291
+ # Pre-warm the vector store so the sentence-transformer model load
292
+ # happens here, under a visible spinner, not during the first chat's
293
+ # vectorize step. Skip when vectorization is disabled — otherwise
294
+ # chromadb + model init would fire despite config opting out.
295
+ if _chat_vectorization_enabled():
296
+ vector_ctx = console.status("Loading vector store…") if console else nullcontext()
297
+ with vector_ctx:
298
+ self._get_vector_store()
299
+
300
+ chats_added = 0
301
+ chats_updated = 0
302
+ messages_imported = 0
303
+ errors = 0
304
+
305
+ chat_iter = parser.parse_chats()
306
+ if console is not None:
307
+ chat_iter = track(
308
+ chat_iter,
309
+ total=stats["total_chats"],
310
+ description="Importing chats",
311
+ console=console,
312
+ )
313
+
314
+ for conv_data in chat_iter:
315
+ try:
316
+ # Check if chat already exists
317
+ existing_id = chats_db.get_chat_id_by_uuid(self.db.conn, conv_data["external_id"])
318
+
319
+ if existing_id:
320
+ # Delete old messages and vectors before re-import
321
+ chats_db.delete_chat_messages(self.db.conn, existing_id)
322
+ store = self._get_vector_store()
323
+ if store:
324
+ try:
325
+ store.delete_chat(existing_id)
326
+ except Exception:
327
+ pass
328
+
329
+ # Insert/replace chat
330
+ chats_db.insert_chat(
331
+ self.db.conn,
332
+ {
333
+ "external_id": conv_data["external_id"],
334
+ "account": conv_data["source"], # Map source → account
335
+ "title": conv_data["title"],
336
+ "summary": conv_data["summary"],
337
+ "created_at": conv_data["created_at"],
338
+ "updated_at": conv_data["updated_at"],
339
+ "message_count": conv_data["message_count"],
340
+ "metadata": conv_data.get("metadata", {}),
341
+ },
342
+ )
343
+
344
+ # Count only after successful insert
345
+ if existing_id:
346
+ chats_updated += 1
347
+ else:
348
+ chats_added += 1
349
+
350
+ internal_id = chats_db.get_chat_id_by_uuid(self.db.conn, conv_data["external_id"])
351
+
352
+ for msg in conv_data["messages"]:
353
+ msg_id = chats_db.insert_message(
354
+ self.db.conn,
355
+ {
356
+ "chat_id": internal_id,
357
+ "message_id": msg["message_id"],
358
+ "role": msg["role"],
359
+ "content": msg["content"],
360
+ "created_at": msg["created_at"],
361
+ "metadata": msg.get("metadata", {}),
362
+ },
363
+ )
364
+ self._vectorize_message(msg_id, internal_id, msg, conv_data)
365
+ messages_imported += 1
366
+
367
+ self._vectorize_chat_info(internal_id, conv_data)
368
+
369
+ # Commit per-chat: all messages + vectorization for this chat
370
+ self.db.conn.commit()
371
+
372
+ if console is None:
373
+ total = chats_added + chats_updated
374
+ if total % 100 == 0:
375
+ logger.info(f"Imported {total} chats...")
376
+
377
+ except Exception as e:
378
+ logger.error(f"Error importing chat {conv_data.get('external_id')}: {e}")
379
+ errors += 1
380
+
381
+ return {
382
+ "chats_added": chats_added,
383
+ "chats_updated": chats_updated,
384
+ "messages_imported": messages_imported,
385
+ "errors": errors,
386
+ }
387
+
388
+ def get_stats(self) -> Dict:
389
+ """Get chat history statistics."""
390
+ cursor = self.db.conn.cursor()
391
+
392
+ # Chat stats
393
+ cursor.execute("SELECT COUNT(*) as count FROM chats")
394
+ chat_count = cursor.fetchone()["count"]
395
+
396
+ cursor.execute("SELECT COUNT(*) as count FROM messages")
397
+ msg_count = cursor.fetchone()["count"]
398
+
399
+ # By account
400
+ cursor.execute("""
401
+ SELECT account, COUNT(*) as count
402
+ FROM chats
403
+ GROUP BY account
404
+ """)
405
+ by_account = {row["account"]: row["count"] for row in cursor.fetchall()}
406
+
407
+ # Date range
408
+ cursor.execute("""
409
+ SELECT MIN(created_at) as earliest, MAX(created_at) as latest
410
+ FROM chats
411
+ """)
412
+ dates = cursor.fetchone()
413
+
414
+ # Top chats by message count
415
+ cursor.execute("""
416
+ SELECT title, message_count, created_at
417
+ FROM chats
418
+ ORDER BY message_count DESC
419
+ LIMIT 10
420
+ """)
421
+ top_chats = [dict(row) for row in cursor.fetchall()]
422
+
423
+ return {
424
+ "total_chats": chat_count,
425
+ "total_messages": msg_count,
426
+ "by_account": by_account,
427
+ "earliest_chat": dates["earliest"],
428
+ "latest_chat": dates["latest"],
429
+ "top_chats": top_chats,
430
+ }
431
+
432
+
433
+ def main():
434
+ """CLI entry point for importing and managing AI chat history."""
435
+ parser = argparse.ArgumentParser(description="Import and manage AI chat history")
436
+ subparsers = parser.add_subparsers(dest="command", help="Commands")
437
+
438
+ # upload command
439
+ upload_parser = subparsers.add_parser("upload", help="Upload and import chat export (zip or directory)")
440
+ upload_parser.add_argument("file_path", type=Path, help="Path to chat export zip file or directory")
441
+
442
+ # stats command
443
+ subparsers.add_parser("stats", help="Show chat history statistics")
444
+
445
+ # history command
446
+ history_parser = subparsers.add_parser("history", help="Show recent upload history")
447
+ history_parser.add_argument("--limit", type=int, default=10, help="Number of uploads to show (default: 10)")
448
+
449
+ args = parser.parse_args()
450
+
451
+ if not args.command:
452
+ parser.print_help()
453
+ sys.exit(1)
454
+
455
+ # Initialize database and manager
456
+ from footprinter.paths import get_db_path
457
+
458
+ db = Database(str(get_db_path()))
459
+ manager = ChatIndexer(db)
460
+
461
+ try:
462
+ if args.command == "upload":
463
+ logger.info(f"Uploading chat export from {args.file_path}")
464
+ result = manager.upload(args.file_path)
465
+
466
+ if result.get("status") == "duplicate":
467
+ prev = result["previous_upload"]
468
+ logger.warning("This file was already uploaded.")
469
+ logger.info(f" Uploaded: {prev['uploaded_at']}")
470
+ logger.info(f" Items: {prev['items_added']} added, {prev['items_updated']} updated")
471
+ else:
472
+ logger.info("=" * 60)
473
+ logger.info("Upload complete!")
474
+ logger.info(f" Upload ID: {result['upload_id']}")
475
+ logger.info(f" Chats: {result['chats_added']} added, {result['chats_updated']} updated")
476
+ logger.info(f" Messages: {result['messages_imported']}")
477
+ if result["errors"]:
478
+ logger.warning(f" Errors: {result['errors']}")
479
+
480
+ elif args.command == "stats":
481
+ stats = manager.get_stats()
482
+ logger.info("=" * 60)
483
+ logger.info("CHAT HISTORY STATISTICS")
484
+ logger.info("=" * 60)
485
+ logger.info(f"Total chats: {stats['total_chats']}")
486
+ logger.info(f"Total messages: {stats['total_messages']}")
487
+ logger.info("By account:")
488
+ for account, count in stats.get("by_account", {}).items():
489
+ logger.info(f" {account}: {count}")
490
+ logger.info(f"Date range: {stats['earliest_chat']} to {stats['latest_chat']}")
491
+ logger.info("Top chats by message count:")
492
+ for conv in stats.get("top_chats", []):
493
+ title = conv["title"][:50] + "..." if len(conv["title"] or "") > 50 else conv["title"]
494
+ logger.info(f" {conv['message_count']:4d} msgs: {title}")
495
+
496
+ elif args.command == "history":
497
+ uploads = uploads_db.get_recent_uploads(db.conn, upload_type="chat", limit=args.limit)
498
+ logger.info("=" * 60)
499
+ logger.info("CHAT UPLOAD HISTORY")
500
+ logger.info("=" * 60)
501
+ if not uploads:
502
+ logger.info("No uploads found.")
503
+ for upload in uploads:
504
+ logger.info(f"{upload['uploaded_at']} - {upload['filename']}")
505
+ logger.info(f" Source: {upload['source']} Status: {upload['status']}")
506
+ logger.info(f" Added: {upload['items_added']} Updated: {upload['items_updated']}")
507
+ if upload.get("error_message"):
508
+ logger.info(f" Error: {upload['error_message']}")
509
+
510
+ finally:
511
+ db.close()
512
+
513
+
514
+ if __name__ == "__main__":
515
+ main()
@@ -0,0 +1,8 @@
1
+ """
2
+ Chat history parsers for various AI chat exports.
3
+ """
4
+
5
+ from .chatgpt_parser import ChatGPTParser
6
+ from .claude_parser import ClaudeParser
7
+
8
+ __all__ = ["ClaudeParser", "ChatGPTParser"]