footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,659 @@
1
+ """Folder queries and write operations."""
2
+
3
+ import sqlite3
4
+ from typing import Any, Dict
5
+
6
+ from footprinter.db.sql_utils import paginate, paginated_response
7
+
8
+
9
+ def list_folders(
10
+ conn: sqlite3.Connection,
11
+ *,
12
+ project_id: int | None = None,
13
+ depth: int | None = 1,
14
+ include_hidden: bool = False,
15
+ sort_by: str = "size",
16
+ limit: int = 50,
17
+ page: int = 1,
18
+ ) -> dict:
19
+ """Return indexed folders with project info.
20
+
21
+ Parameters
22
+ ----------
23
+ conn : sqlite3.Connection
24
+ project_id : int or None
25
+ Filter by project. ``0`` means 'no project assigned'.
26
+ depth : int or None
27
+ Max path depth (segments below home).
28
+ ``1`` = top-level + one below, ``None`` = no filter.
29
+ include_hidden : bool
30
+ If False, exclude folders with hidden segments (``/.``).
31
+ sort_by : str
32
+ ``'size'`` (DESC), ``'files'`` (DESC), or ``'path'`` (ASC).
33
+ limit : int
34
+ Maximum rows per page (default 50).
35
+ page : int
36
+ 1-based page number (default 1).
37
+
38
+ Returns
39
+ -------
40
+ dict
41
+ ``{"folders": [...], "pagination": {page, limit, total, total_pages}}``
42
+ """
43
+ where = "1=1"
44
+ params: list = []
45
+
46
+ if project_id is not None:
47
+ if project_id == 0:
48
+ where += " AND folder.project_id IS NULL"
49
+ else:
50
+ where += " AND folder.project_id = ?"
51
+ params.append(project_id)
52
+
53
+ if depth is not None:
54
+ where += " AND (LENGTH(folder.relative_path) - LENGTH(REPLACE(folder.relative_path, '/', '')) - 1) <= ?"
55
+ params.append(depth)
56
+
57
+ if not include_hidden:
58
+ where += " AND folder.relative_path NOT LIKE '%/.%'"
59
+
60
+ # When depth filtering is active, roll up descendant files.
61
+ # Otherwise count only direct children (folder_id match).
62
+ if depth is not None:
63
+ count_sub = """(
64
+ SELECT COUNT(*) FROM files file
65
+ JOIN folders ancestor_folder ON file.folder_id = ancestor_folder.id
66
+ WHERE file.status != 'removed'
67
+ AND (ancestor_folder.id = folder_cte.id
68
+ OR ancestor_folder.relative_path LIKE folder_cte.relative_path || '/%')
69
+ )"""
70
+ sum_sub = """(
71
+ SELECT COALESCE(SUM(file.size_bytes), 0) FROM files file
72
+ JOIN folders ancestor_folder ON file.folder_id = ancestor_folder.id
73
+ WHERE file.status != 'removed'
74
+ AND (ancestor_folder.id = folder_cte.id
75
+ OR ancestor_folder.relative_path LIKE folder_cte.relative_path || '/%')
76
+ )"""
77
+ else:
78
+ count_sub = """(
79
+ SELECT COUNT(*) FROM files file
80
+ WHERE file.folder_id = folder_cte.id AND file.status != 'removed'
81
+ )"""
82
+ sum_sub = """(
83
+ SELECT COALESCE(SUM(file.size_bytes), 0) FROM files file
84
+ WHERE file.folder_id = folder_cte.id AND file.status != 'removed'
85
+ )"""
86
+
87
+ sort_map = {
88
+ "size": "live_size_bytes DESC",
89
+ "files": "live_file_count DESC",
90
+ "path": "folder_cte.relative_path ASC",
91
+ }
92
+ order_clause = sort_map.get(sort_by, "live_size_bytes DESC")
93
+
94
+ count_sql = f"SELECT COUNT(*) FROM folders folder WHERE {where}"
95
+ fetch_sql = f"""
96
+ WITH folder_cte AS (
97
+ SELECT folder.id, folder.path, folder.relative_path, folder.name, folder.source,
98
+ folder.project_id, folder.mcp_view, folder.mcp_read
99
+ FROM folders folder
100
+ WHERE {where}
101
+ )
102
+ SELECT
103
+ folder_cte.*,
104
+ project.project_name AS project_name,
105
+ {count_sub} AS live_file_count,
106
+ {sum_sub} AS live_size_bytes
107
+ FROM folder_cte
108
+ LEFT JOIN projects project ON folder_cte.project_id = project.id
109
+ ORDER BY {order_clause}
110
+ LIMIT ? OFFSET ?
111
+ """
112
+ rows, pagination = paginate(conn, count_sql, fetch_sql, params, page=page, limit=limit)
113
+
114
+ folders = [
115
+ {
116
+ "id": row["id"],
117
+ "path": row["path"],
118
+ "relative_path": row["relative_path"],
119
+ "name": row["name"],
120
+ "source": row["source"] or "local",
121
+ "direct_files": row["live_file_count"],
122
+ "total_size_bytes": row["live_size_bytes"],
123
+ "project_id": row["project_id"],
124
+ "project_name": row["project_name"] or "",
125
+ "mcp_view": row["mcp_view"],
126
+ "mcp_read": row["mcp_read"],
127
+ }
128
+ for row in rows
129
+ ]
130
+
131
+ return paginated_response("folders", folders, pagination)
132
+
133
+
134
+ def get_folder_by_path(conn: sqlite3.Connection, path: str) -> dict | None:
135
+ """Look up a folder by exact path. Returns dict or None."""
136
+ row = conn.execute(
137
+ """SELECT id, path, relative_path, name, source,
138
+ direct_file_count, total_size_bytes, scanned_at,
139
+ project_id, external_id, account, mcp_view, mcp_read
140
+ FROM folders WHERE path = ?""",
141
+ (path,),
142
+ ).fetchone()
143
+ return dict(row) if row else None
144
+
145
+
146
+ def get_folder_navigation(conn: sqlite3.Connection, folder_id: int, path: str) -> dict:
147
+ """Return navigation data for a folder: files, subfolders, recursive file count.
148
+
149
+ All results include ``mcp_view`` so the service layer can filter by visibility.
150
+ """
151
+ # Files in this folder (limit 200, hidden NOT pre-filtered — service does it)
152
+ files = conn.execute(
153
+ """SELECT id, name, content_type, size_bytes, modified_at, source, status,
154
+ mcp_view, mcp_read
155
+ FROM files
156
+ WHERE folder_id = ? AND status != 'removed'
157
+ ORDER BY name
158
+ LIMIT 200""",
159
+ (folder_id,),
160
+ ).fetchall()
161
+ file_results = [dict(r) for r in files]
162
+
163
+ # Immediate subfolders (one level deeper)
164
+ subfolders = conn.execute(
165
+ """SELECT id, path, relative_path, name, direct_file_count, total_size_bytes,
166
+ source, mcp_view, mcp_read
167
+ FROM folders
168
+ WHERE path LIKE ? AND path != ? AND path NOT LIKE ?""",
169
+ (path + "/%", path, path + "/%/%"),
170
+ ).fetchall()
171
+ subfolder_results = [dict(sf) for sf in subfolders]
172
+
173
+ # Recursive file count across all descendants (excludes hidden files)
174
+ recursive = conn.execute(
175
+ """WITH RECURSIVE descendants(id) AS (
176
+ SELECT id FROM folders WHERE id = ?
177
+ UNION ALL
178
+ SELECT f.id FROM folders f
179
+ JOIN descendants d ON f.parent_folder_id = d.id
180
+ )
181
+ SELECT COUNT(*) as total
182
+ FROM files
183
+ WHERE folder_id IN (SELECT id FROM descendants)
184
+ AND status != 'removed'
185
+ AND COALESCE(mcp_view, 'inherit') != 'hidden'""",
186
+ (folder_id,),
187
+ ).fetchone()
188
+
189
+ return {
190
+ "files": file_results,
191
+ "subfolders": subfolder_results,
192
+ "recursive_file_count": recursive["total"],
193
+ }
194
+
195
+
196
+ def resolve_folder(conn: sqlite3.Connection, identifier: str) -> int:
197
+ """Resolve folder ID or relative_path to row ID.
198
+
199
+ Tries numeric ID first, then falls back to relative_path match.
200
+
201
+ Raises ValueError if not found.
202
+ """
203
+ # Try numeric ID
204
+ try:
205
+ folder_id = int(identifier)
206
+ row = conn.execute("SELECT id FROM folders WHERE id = ?", (folder_id,)).fetchone()
207
+ if row:
208
+ return row["id"]
209
+ raise ValueError(f"No folder with id {folder_id}")
210
+ except ValueError as exc:
211
+ if "No folder" in str(exc):
212
+ raise
213
+
214
+ # Fall back to relative_path
215
+ row = conn.execute("SELECT id FROM folders WHERE relative_path = ?", (identifier,)).fetchone()
216
+ if row:
217
+ return row["id"]
218
+
219
+ raise ValueError(f"No folder matching '{identifier}'")
220
+
221
+
222
+ def get_folder(conn: sqlite3.Connection, folder_id: int) -> dict | None:
223
+ """Return folder detail with child files and project info.
224
+
225
+ Returns None if the folder does not exist.
226
+ """
227
+ row = conn.execute(
228
+ """
229
+ SELECT
230
+ folder.id, folder.path, folder.relative_path, folder.name, folder.source,
231
+ folder.project_id, folder.mcp_view, folder.mcp_read,
232
+ project.project_name,
233
+ (SELECT COUNT(*) FROM files file
234
+ WHERE file.folder_id = folder.id AND file.status != 'removed'
235
+ ) AS live_file_count,
236
+ (SELECT COALESCE(SUM(file.size_bytes), 0) FROM files file
237
+ WHERE file.folder_id = folder.id AND file.status != 'removed'
238
+ ) AS live_size_bytes
239
+ FROM folders folder
240
+ LEFT JOIN projects project ON folder.project_id = project.id
241
+ WHERE folder.id = ?
242
+ """,
243
+ (folder_id,),
244
+ ).fetchone()
245
+
246
+ if not row:
247
+ return None
248
+
249
+ # Child files (limit 20)
250
+ child_files = conn.execute(
251
+ """
252
+ SELECT id, name, content_type, size_bytes
253
+ FROM files
254
+ WHERE folder_id = ? AND status != 'removed'
255
+ LIMIT 20
256
+ """,
257
+ (folder_id,),
258
+ ).fetchall()
259
+
260
+ return {
261
+ "id": row["id"],
262
+ "path": row["path"],
263
+ "relative_path": row["relative_path"],
264
+ "name": row["name"],
265
+ "source": row["source"] or "local",
266
+ "direct_files": row["live_file_count"],
267
+ "total_size_bytes": row["live_size_bytes"],
268
+ "project_id": row["project_id"],
269
+ "project": {
270
+ "id": row["project_id"],
271
+ "name": row["project_name"] or "",
272
+ }
273
+ if row["project_id"]
274
+ else None,
275
+ "mcp_view": row["mcp_view"],
276
+ "mcp_read": row["mcp_read"],
277
+ "files": [
278
+ {
279
+ "id": a["id"],
280
+ "name": a["name"],
281
+ "content_type": a["content_type"] or "",
282
+ "size_bytes": a["size_bytes"] or 0,
283
+ }
284
+ for a in child_files
285
+ ],
286
+ }
287
+
288
+
289
+ def cascade_project_id(
290
+ conn: sqlite3.Connection,
291
+ folder_id: int,
292
+ project_id: int | None,
293
+ *,
294
+ clear: bool = False,
295
+ ) -> dict:
296
+ """Walk the folder tree from *folder_id* and set/clear project_id.
297
+
298
+ Uses a recursive CTE on ``parent_folder_id`` to find all descendant
299
+ folders, then updates both folders and their files.
300
+
301
+ If *clear* is True, sets ``project_id = NULL`` on all descendants
302
+ (the *project_id* argument is ignored).
303
+
304
+ Returns ``{"folders_updated": int, "files_updated": int}``.
305
+ """
306
+ cursor = conn.cursor()
307
+ value = None if clear else project_id
308
+
309
+ # Validate project exists (when setting, not clearing)
310
+ if not clear:
311
+ row = conn.execute("SELECT id FROM projects WHERE id = ?", (project_id,)).fetchone()
312
+ if not row:
313
+ raise ValueError(f"No project with id {project_id}")
314
+
315
+ # Find all descendant folders (including the root itself)
316
+ descendants_cte = """
317
+ WITH RECURSIVE descendants(id) AS (
318
+ SELECT id FROM folders WHERE id = ?
319
+ UNION ALL
320
+ SELECT folder.id FROM folders folder
321
+ JOIN descendants descendant ON folder.parent_folder_id = descendant.id
322
+ )
323
+ """
324
+
325
+ cursor.execute(
326
+ f"{descendants_cte} SELECT id FROM descendants",
327
+ (folder_id,),
328
+ )
329
+ desc_ids = [row["id"] for row in cursor.fetchall()]
330
+
331
+ if not desc_ids:
332
+ return {"folders_updated": 0, "files_updated": 0}
333
+
334
+ ph = ",".join("?" * len(desc_ids))
335
+
336
+ # Update folders
337
+ cursor.execute(
338
+ f"UPDATE folders SET project_id = ? WHERE id IN ({ph})",
339
+ [value] + desc_ids,
340
+ )
341
+ folders_updated = cursor.rowcount
342
+
343
+ # Update files (skip removed)
344
+ cursor.execute(
345
+ f"UPDATE files SET project_id = ? WHERE folder_id IN ({ph}) AND status != 'removed'",
346
+ [value] + desc_ids,
347
+ )
348
+ files_updated = cursor.rowcount
349
+ conn.commit()
350
+
351
+ return {
352
+ "folders_updated": folders_updated,
353
+ "files_updated": files_updated,
354
+ }
355
+
356
+
357
+ def update_folder_relationships(
358
+ conn: sqlite3.Connection,
359
+ folder_id: int,
360
+ *,
361
+ project_id: int | None = None,
362
+ client_id: int | None = None,
363
+ ) -> bool | None:
364
+ """Update project and/or client assignment on a single folder (no cascade).
365
+
366
+ Only updates fields that are passed (not None). Pass ``0`` to clear
367
+ a field (set to NULL). Stamps ``assignment_source = 'user'``
368
+ when the column exists (app-scope DBs only), so auto-detection
369
+ won't overwrite manual assignments.
370
+ Returns True on success, or None if the folder does not exist.
371
+ Raises ValueError if *project_id* is given (and not 0) but doesn't exist.
372
+ """
373
+ row = conn.execute("SELECT id FROM folders WHERE id = ?", (folder_id,)).fetchone()
374
+ if not row:
375
+ return None
376
+
377
+ if project_id is not None and project_id != 0:
378
+ proj = conn.execute("SELECT id FROM projects WHERE id = ?", (project_id,)).fetchone()
379
+ if not proj:
380
+ raise ValueError(f"No project with id {project_id}")
381
+
382
+ sets: list[str] = []
383
+ params: list = []
384
+ if project_id is not None:
385
+ if project_id == 0:
386
+ sets.append("project_id = NULL")
387
+ else:
388
+ sets.append("project_id = ?")
389
+ params.append(project_id)
390
+ if client_id is not None:
391
+ if client_id == 0:
392
+ sets.append("client_id = NULL")
393
+ else:
394
+ sets.append("client_id = ?")
395
+ params.append(client_id)
396
+ if not sets:
397
+ return True
398
+
399
+ sets.append("assignment_source = 'user'")
400
+ params.append(folder_id)
401
+ try:
402
+ conn.execute(f"UPDATE folders SET {', '.join(sets)} WHERE id = ?", params)
403
+ except sqlite3.OperationalError as e:
404
+ if "no such column" not in str(e):
405
+ raise
406
+ # assignment_source not present (tool-only DB)
407
+ sets.pop()
408
+ conn.execute(f"UPDATE folders SET {', '.join(sets)} WHERE id = ?", params)
409
+ conn.commit()
410
+ return True
411
+
412
+
413
+ def cascade_client_id(
414
+ conn: sqlite3.Connection,
415
+ folder_id: int,
416
+ client_id: int | None,
417
+ *,
418
+ clear: bool = False,
419
+ ) -> dict:
420
+ """Walk the folder tree from *folder_id* and set/clear client_id.
421
+
422
+ Uses a recursive CTE on ``parent_folder_id`` to find all descendant
423
+ folders, then updates both folders and their files.
424
+
425
+ If *clear* is True, sets ``client_id = NULL`` on all descendants
426
+ (the *client_id* argument is ignored). Pass ``client_id=0`` as a
427
+ sentinel to clear (equivalent to ``clear=True``).
428
+
429
+ Returns ``{"folders_updated": int, "files_updated": int}``.
430
+ """
431
+ cursor = conn.cursor()
432
+
433
+ # Treat 0 as a clear sentinel
434
+ if client_id == 0:
435
+ clear = True
436
+
437
+ value = None if clear else client_id
438
+
439
+ # Validate client exists (when setting, not clearing)
440
+ if not clear:
441
+ row = conn.execute("SELECT id FROM clients WHERE id = ?", (client_id,)).fetchone()
442
+ if not row:
443
+ raise ValueError(f"No client with id {client_id}")
444
+
445
+ # Find all descendant folders (including the root itself)
446
+ descendants_cte = """
447
+ WITH RECURSIVE descendants(id) AS (
448
+ SELECT id FROM folders WHERE id = ?
449
+ UNION ALL
450
+ SELECT folder.id FROM folders folder
451
+ JOIN descendants descendant ON folder.parent_folder_id = descendant.id
452
+ )
453
+ """
454
+
455
+ cursor.execute(
456
+ f"{descendants_cte} SELECT id FROM descendants",
457
+ (folder_id,),
458
+ )
459
+ desc_ids = [row["id"] for row in cursor.fetchall()]
460
+
461
+ if not desc_ids:
462
+ return {"folders_updated": 0, "files_updated": 0}
463
+
464
+ ph = ",".join("?" * len(desc_ids))
465
+
466
+ # Update folders
467
+ cursor.execute(
468
+ f"UPDATE folders SET client_id = ? WHERE id IN ({ph})",
469
+ [value] + desc_ids,
470
+ )
471
+ folders_updated = cursor.rowcount
472
+
473
+ # Update files (skip removed)
474
+ cursor.execute(
475
+ f"UPDATE files SET client_id = ? WHERE folder_id IN ({ph}) AND status != 'removed'",
476
+ [value] + desc_ids,
477
+ )
478
+ files_updated = cursor.rowcount
479
+ conn.commit()
480
+
481
+ return {
482
+ "folders_updated": folders_updated,
483
+ "files_updated": files_updated,
484
+ }
485
+
486
+
487
+ # ---------------------------------------------------------------------------
488
+ # Write operations
489
+ # ---------------------------------------------------------------------------
490
+
491
+
492
+ def insert_drive_folder(conn: sqlite3.Connection, data: Dict[str, Any]) -> tuple:
493
+ """Insert or update a Drive folder record in folders.
494
+
495
+ Returns:
496
+ Tuple of (result_type, folder_id) where result_type is 'inserted' or 'updated'
497
+ """
498
+ cursor = conn.cursor()
499
+
500
+ cursor.execute(
501
+ "SELECT id FROM folders WHERE source = ? AND external_id = ?",
502
+ (data["source"], data["external_id"]),
503
+ )
504
+ existing = cursor.fetchone()
505
+
506
+ if existing:
507
+ cursor.execute(
508
+ """
509
+ UPDATE folders SET
510
+ path = ?,
511
+ relative_path = ?,
512
+ name = ?,
513
+ account = ?,
514
+ web_link = ?,
515
+ scanned_at = CURRENT_TIMESTAMP,
516
+ updated_at = CURRENT_TIMESTAMP
517
+ WHERE id = ?
518
+ """,
519
+ (
520
+ data["path"],
521
+ data["relative_path"],
522
+ data["name"],
523
+ data["account"],
524
+ data["web_link"],
525
+ existing["id"],
526
+ ),
527
+ )
528
+ return "updated", existing["id"]
529
+ else:
530
+ cursor.execute(
531
+ """
532
+ INSERT INTO folders (
533
+ source, external_id, account,
534
+ path, relative_path, name,
535
+ web_link, scanned_at, created_at,
536
+ indexed_at, updated_at
537
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP,
538
+ CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
539
+ """,
540
+ (
541
+ data["source"],
542
+ data["external_id"],
543
+ data["account"],
544
+ data["path"],
545
+ data["relative_path"],
546
+ data["name"],
547
+ data["web_link"],
548
+ ),
549
+ )
550
+ return "inserted", cursor.lastrowid
551
+
552
+
553
+ def update_drive_folder_parents(conn: sqlite3.Connection, source: str, folder_map: Dict[str, str]) -> int:
554
+ """Update parent_folder_id links for Drive folders.
555
+
556
+ Returns:
557
+ Number of folders updated
558
+ """
559
+ cursor = conn.cursor()
560
+ updated = 0
561
+
562
+ for folder_ext_id, parent_ext_id in folder_map.items():
563
+ cursor.execute(
564
+ "SELECT id FROM folders WHERE source = ? AND external_id = ?",
565
+ (source, parent_ext_id),
566
+ )
567
+ parent_row = cursor.fetchone()
568
+
569
+ if parent_row:
570
+ cursor.execute(
571
+ """
572
+ UPDATE folders
573
+ SET parent_folder_id = ?
574
+ WHERE source = ? AND external_id = ?
575
+ """,
576
+ (parent_row["id"], source, folder_ext_id),
577
+ )
578
+ updated += 1
579
+
580
+ conn.commit()
581
+ return updated
582
+
583
+
584
+ def refresh_folder_counts(conn: sqlite3.Connection) -> dict:
585
+ """Refresh pre-computed file counts for all folders.
586
+
587
+ Uses folder_id FK for direct counts, then propagates totals up
588
+ the parent_folder_id hierarchy by processing from leaves to roots.
589
+
590
+ Returns stats about the refresh operation.
591
+ """
592
+ cursor = conn.cursor()
593
+
594
+ cursor.execute(
595
+ """
596
+ UPDATE folders
597
+ SET direct_file_count = COALESCE((
598
+ SELECT COUNT(*) FROM files file
599
+ WHERE file.folder_id = folders.id AND file.status != 'removed'
600
+ ), 0),
601
+ total_size_bytes = COALESCE((
602
+ SELECT SUM(file.size_bytes) FROM files file
603
+ WHERE file.folder_id = folders.id AND file.status != 'removed'
604
+ ), 0)
605
+ """
606
+ )
607
+ conn.commit()
608
+
609
+ cursor.execute("UPDATE folders SET total_file_count = direct_file_count")
610
+ conn.commit()
611
+
612
+ cursor.execute(
613
+ """
614
+ SELECT id, parent_folder_id, direct_file_count, total_size_bytes
615
+ FROM folders
616
+ ORDER BY LENGTH(path) - LENGTH(REPLACE(path, '/', '')) DESC
617
+ """
618
+ )
619
+ folders = cursor.fetchall()
620
+
621
+ folder_counts = {row["id"]: row["direct_file_count"] or 0 for row in folders}
622
+ folder_sizes = {row["id"]: row["total_size_bytes"] or 0 for row in folders}
623
+
624
+ for row in folders:
625
+ folder_id = row["id"]
626
+ parent_id = row["parent_folder_id"]
627
+ if parent_id and parent_id in folder_counts:
628
+ folder_counts[parent_id] += folder_counts[folder_id]
629
+ folder_sizes[parent_id] += folder_sizes[folder_id]
630
+
631
+ for folder_id, total_count in folder_counts.items():
632
+ cursor.execute(
633
+ """
634
+ UPDATE folders
635
+ SET total_file_count = ?,
636
+ total_size_bytes = ?
637
+ WHERE id = ?
638
+ """,
639
+ (total_count, folder_sizes.get(folder_id, 0), folder_id),
640
+ )
641
+
642
+ conn.commit()
643
+
644
+ cursor.execute(
645
+ """
646
+ SELECT
647
+ COUNT(*) as folders,
648
+ SUM(direct_file_count) as total_direct,
649
+ MAX(total_file_count) as max_total
650
+ FROM folders
651
+ """
652
+ )
653
+ row = cursor.fetchone()
654
+
655
+ return {
656
+ "folders_updated": len(folders),
657
+ "total_direct_files": row["total_direct"] or 0,
658
+ "max_folder_total": row["max_total"] or 0,
659
+ }