footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,659 @@
|
|
|
1
|
+
"""Folder queries and write operations."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
from footprinter.db.sql_utils import paginate, paginated_response
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def list_folders(
|
|
10
|
+
conn: sqlite3.Connection,
|
|
11
|
+
*,
|
|
12
|
+
project_id: int | None = None,
|
|
13
|
+
depth: int | None = 1,
|
|
14
|
+
include_hidden: bool = False,
|
|
15
|
+
sort_by: str = "size",
|
|
16
|
+
limit: int = 50,
|
|
17
|
+
page: int = 1,
|
|
18
|
+
) -> dict:
|
|
19
|
+
"""Return indexed folders with project info.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
conn : sqlite3.Connection
|
|
24
|
+
project_id : int or None
|
|
25
|
+
Filter by project. ``0`` means 'no project assigned'.
|
|
26
|
+
depth : int or None
|
|
27
|
+
Max path depth (segments below home).
|
|
28
|
+
``1`` = top-level + one below, ``None`` = no filter.
|
|
29
|
+
include_hidden : bool
|
|
30
|
+
If False, exclude folders with hidden segments (``/.``).
|
|
31
|
+
sort_by : str
|
|
32
|
+
``'size'`` (DESC), ``'files'`` (DESC), or ``'path'`` (ASC).
|
|
33
|
+
limit : int
|
|
34
|
+
Maximum rows per page (default 50).
|
|
35
|
+
page : int
|
|
36
|
+
1-based page number (default 1).
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
dict
|
|
41
|
+
``{"folders": [...], "pagination": {page, limit, total, total_pages}}``
|
|
42
|
+
"""
|
|
43
|
+
where = "1=1"
|
|
44
|
+
params: list = []
|
|
45
|
+
|
|
46
|
+
if project_id is not None:
|
|
47
|
+
if project_id == 0:
|
|
48
|
+
where += " AND folder.project_id IS NULL"
|
|
49
|
+
else:
|
|
50
|
+
where += " AND folder.project_id = ?"
|
|
51
|
+
params.append(project_id)
|
|
52
|
+
|
|
53
|
+
if depth is not None:
|
|
54
|
+
where += " AND (LENGTH(folder.relative_path) - LENGTH(REPLACE(folder.relative_path, '/', '')) - 1) <= ?"
|
|
55
|
+
params.append(depth)
|
|
56
|
+
|
|
57
|
+
if not include_hidden:
|
|
58
|
+
where += " AND folder.relative_path NOT LIKE '%/.%'"
|
|
59
|
+
|
|
60
|
+
# When depth filtering is active, roll up descendant files.
|
|
61
|
+
# Otherwise count only direct children (folder_id match).
|
|
62
|
+
if depth is not None:
|
|
63
|
+
count_sub = """(
|
|
64
|
+
SELECT COUNT(*) FROM files file
|
|
65
|
+
JOIN folders ancestor_folder ON file.folder_id = ancestor_folder.id
|
|
66
|
+
WHERE file.status != 'removed'
|
|
67
|
+
AND (ancestor_folder.id = folder_cte.id
|
|
68
|
+
OR ancestor_folder.relative_path LIKE folder_cte.relative_path || '/%')
|
|
69
|
+
)"""
|
|
70
|
+
sum_sub = """(
|
|
71
|
+
SELECT COALESCE(SUM(file.size_bytes), 0) FROM files file
|
|
72
|
+
JOIN folders ancestor_folder ON file.folder_id = ancestor_folder.id
|
|
73
|
+
WHERE file.status != 'removed'
|
|
74
|
+
AND (ancestor_folder.id = folder_cte.id
|
|
75
|
+
OR ancestor_folder.relative_path LIKE folder_cte.relative_path || '/%')
|
|
76
|
+
)"""
|
|
77
|
+
else:
|
|
78
|
+
count_sub = """(
|
|
79
|
+
SELECT COUNT(*) FROM files file
|
|
80
|
+
WHERE file.folder_id = folder_cte.id AND file.status != 'removed'
|
|
81
|
+
)"""
|
|
82
|
+
sum_sub = """(
|
|
83
|
+
SELECT COALESCE(SUM(file.size_bytes), 0) FROM files file
|
|
84
|
+
WHERE file.folder_id = folder_cte.id AND file.status != 'removed'
|
|
85
|
+
)"""
|
|
86
|
+
|
|
87
|
+
sort_map = {
|
|
88
|
+
"size": "live_size_bytes DESC",
|
|
89
|
+
"files": "live_file_count DESC",
|
|
90
|
+
"path": "folder_cte.relative_path ASC",
|
|
91
|
+
}
|
|
92
|
+
order_clause = sort_map.get(sort_by, "live_size_bytes DESC")
|
|
93
|
+
|
|
94
|
+
count_sql = f"SELECT COUNT(*) FROM folders folder WHERE {where}"
|
|
95
|
+
fetch_sql = f"""
|
|
96
|
+
WITH folder_cte AS (
|
|
97
|
+
SELECT folder.id, folder.path, folder.relative_path, folder.name, folder.source,
|
|
98
|
+
folder.project_id, folder.mcp_view, folder.mcp_read
|
|
99
|
+
FROM folders folder
|
|
100
|
+
WHERE {where}
|
|
101
|
+
)
|
|
102
|
+
SELECT
|
|
103
|
+
folder_cte.*,
|
|
104
|
+
project.project_name AS project_name,
|
|
105
|
+
{count_sub} AS live_file_count,
|
|
106
|
+
{sum_sub} AS live_size_bytes
|
|
107
|
+
FROM folder_cte
|
|
108
|
+
LEFT JOIN projects project ON folder_cte.project_id = project.id
|
|
109
|
+
ORDER BY {order_clause}
|
|
110
|
+
LIMIT ? OFFSET ?
|
|
111
|
+
"""
|
|
112
|
+
rows, pagination = paginate(conn, count_sql, fetch_sql, params, page=page, limit=limit)
|
|
113
|
+
|
|
114
|
+
folders = [
|
|
115
|
+
{
|
|
116
|
+
"id": row["id"],
|
|
117
|
+
"path": row["path"],
|
|
118
|
+
"relative_path": row["relative_path"],
|
|
119
|
+
"name": row["name"],
|
|
120
|
+
"source": row["source"] or "local",
|
|
121
|
+
"direct_files": row["live_file_count"],
|
|
122
|
+
"total_size_bytes": row["live_size_bytes"],
|
|
123
|
+
"project_id": row["project_id"],
|
|
124
|
+
"project_name": row["project_name"] or "",
|
|
125
|
+
"mcp_view": row["mcp_view"],
|
|
126
|
+
"mcp_read": row["mcp_read"],
|
|
127
|
+
}
|
|
128
|
+
for row in rows
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
return paginated_response("folders", folders, pagination)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def get_folder_by_path(conn: sqlite3.Connection, path: str) -> dict | None:
|
|
135
|
+
"""Look up a folder by exact path. Returns dict or None."""
|
|
136
|
+
row = conn.execute(
|
|
137
|
+
"""SELECT id, path, relative_path, name, source,
|
|
138
|
+
direct_file_count, total_size_bytes, scanned_at,
|
|
139
|
+
project_id, external_id, account, mcp_view, mcp_read
|
|
140
|
+
FROM folders WHERE path = ?""",
|
|
141
|
+
(path,),
|
|
142
|
+
).fetchone()
|
|
143
|
+
return dict(row) if row else None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def get_folder_navigation(conn: sqlite3.Connection, folder_id: int, path: str) -> dict:
|
|
147
|
+
"""Return navigation data for a folder: files, subfolders, recursive file count.
|
|
148
|
+
|
|
149
|
+
All results include ``mcp_view`` so the service layer can filter by visibility.
|
|
150
|
+
"""
|
|
151
|
+
# Files in this folder (limit 200, hidden NOT pre-filtered — service does it)
|
|
152
|
+
files = conn.execute(
|
|
153
|
+
"""SELECT id, name, content_type, size_bytes, modified_at, source, status,
|
|
154
|
+
mcp_view, mcp_read
|
|
155
|
+
FROM files
|
|
156
|
+
WHERE folder_id = ? AND status != 'removed'
|
|
157
|
+
ORDER BY name
|
|
158
|
+
LIMIT 200""",
|
|
159
|
+
(folder_id,),
|
|
160
|
+
).fetchall()
|
|
161
|
+
file_results = [dict(r) for r in files]
|
|
162
|
+
|
|
163
|
+
# Immediate subfolders (one level deeper)
|
|
164
|
+
subfolders = conn.execute(
|
|
165
|
+
"""SELECT id, path, relative_path, name, direct_file_count, total_size_bytes,
|
|
166
|
+
source, mcp_view, mcp_read
|
|
167
|
+
FROM folders
|
|
168
|
+
WHERE path LIKE ? AND path != ? AND path NOT LIKE ?""",
|
|
169
|
+
(path + "/%", path, path + "/%/%"),
|
|
170
|
+
).fetchall()
|
|
171
|
+
subfolder_results = [dict(sf) for sf in subfolders]
|
|
172
|
+
|
|
173
|
+
# Recursive file count across all descendants (excludes hidden files)
|
|
174
|
+
recursive = conn.execute(
|
|
175
|
+
"""WITH RECURSIVE descendants(id) AS (
|
|
176
|
+
SELECT id FROM folders WHERE id = ?
|
|
177
|
+
UNION ALL
|
|
178
|
+
SELECT f.id FROM folders f
|
|
179
|
+
JOIN descendants d ON f.parent_folder_id = d.id
|
|
180
|
+
)
|
|
181
|
+
SELECT COUNT(*) as total
|
|
182
|
+
FROM files
|
|
183
|
+
WHERE folder_id IN (SELECT id FROM descendants)
|
|
184
|
+
AND status != 'removed'
|
|
185
|
+
AND COALESCE(mcp_view, 'inherit') != 'hidden'""",
|
|
186
|
+
(folder_id,),
|
|
187
|
+
).fetchone()
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"files": file_results,
|
|
191
|
+
"subfolders": subfolder_results,
|
|
192
|
+
"recursive_file_count": recursive["total"],
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def resolve_folder(conn: sqlite3.Connection, identifier: str) -> int:
|
|
197
|
+
"""Resolve folder ID or relative_path to row ID.
|
|
198
|
+
|
|
199
|
+
Tries numeric ID first, then falls back to relative_path match.
|
|
200
|
+
|
|
201
|
+
Raises ValueError if not found.
|
|
202
|
+
"""
|
|
203
|
+
# Try numeric ID
|
|
204
|
+
try:
|
|
205
|
+
folder_id = int(identifier)
|
|
206
|
+
row = conn.execute("SELECT id FROM folders WHERE id = ?", (folder_id,)).fetchone()
|
|
207
|
+
if row:
|
|
208
|
+
return row["id"]
|
|
209
|
+
raise ValueError(f"No folder with id {folder_id}")
|
|
210
|
+
except ValueError as exc:
|
|
211
|
+
if "No folder" in str(exc):
|
|
212
|
+
raise
|
|
213
|
+
|
|
214
|
+
# Fall back to relative_path
|
|
215
|
+
row = conn.execute("SELECT id FROM folders WHERE relative_path = ?", (identifier,)).fetchone()
|
|
216
|
+
if row:
|
|
217
|
+
return row["id"]
|
|
218
|
+
|
|
219
|
+
raise ValueError(f"No folder matching '{identifier}'")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def get_folder(conn: sqlite3.Connection, folder_id: int) -> dict | None:
|
|
223
|
+
"""Return folder detail with child files and project info.
|
|
224
|
+
|
|
225
|
+
Returns None if the folder does not exist.
|
|
226
|
+
"""
|
|
227
|
+
row = conn.execute(
|
|
228
|
+
"""
|
|
229
|
+
SELECT
|
|
230
|
+
folder.id, folder.path, folder.relative_path, folder.name, folder.source,
|
|
231
|
+
folder.project_id, folder.mcp_view, folder.mcp_read,
|
|
232
|
+
project.project_name,
|
|
233
|
+
(SELECT COUNT(*) FROM files file
|
|
234
|
+
WHERE file.folder_id = folder.id AND file.status != 'removed'
|
|
235
|
+
) AS live_file_count,
|
|
236
|
+
(SELECT COALESCE(SUM(file.size_bytes), 0) FROM files file
|
|
237
|
+
WHERE file.folder_id = folder.id AND file.status != 'removed'
|
|
238
|
+
) AS live_size_bytes
|
|
239
|
+
FROM folders folder
|
|
240
|
+
LEFT JOIN projects project ON folder.project_id = project.id
|
|
241
|
+
WHERE folder.id = ?
|
|
242
|
+
""",
|
|
243
|
+
(folder_id,),
|
|
244
|
+
).fetchone()
|
|
245
|
+
|
|
246
|
+
if not row:
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
# Child files (limit 20)
|
|
250
|
+
child_files = conn.execute(
|
|
251
|
+
"""
|
|
252
|
+
SELECT id, name, content_type, size_bytes
|
|
253
|
+
FROM files
|
|
254
|
+
WHERE folder_id = ? AND status != 'removed'
|
|
255
|
+
LIMIT 20
|
|
256
|
+
""",
|
|
257
|
+
(folder_id,),
|
|
258
|
+
).fetchall()
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
"id": row["id"],
|
|
262
|
+
"path": row["path"],
|
|
263
|
+
"relative_path": row["relative_path"],
|
|
264
|
+
"name": row["name"],
|
|
265
|
+
"source": row["source"] or "local",
|
|
266
|
+
"direct_files": row["live_file_count"],
|
|
267
|
+
"total_size_bytes": row["live_size_bytes"],
|
|
268
|
+
"project_id": row["project_id"],
|
|
269
|
+
"project": {
|
|
270
|
+
"id": row["project_id"],
|
|
271
|
+
"name": row["project_name"] or "",
|
|
272
|
+
}
|
|
273
|
+
if row["project_id"]
|
|
274
|
+
else None,
|
|
275
|
+
"mcp_view": row["mcp_view"],
|
|
276
|
+
"mcp_read": row["mcp_read"],
|
|
277
|
+
"files": [
|
|
278
|
+
{
|
|
279
|
+
"id": a["id"],
|
|
280
|
+
"name": a["name"],
|
|
281
|
+
"content_type": a["content_type"] or "",
|
|
282
|
+
"size_bytes": a["size_bytes"] or 0,
|
|
283
|
+
}
|
|
284
|
+
for a in child_files
|
|
285
|
+
],
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def cascade_project_id(
|
|
290
|
+
conn: sqlite3.Connection,
|
|
291
|
+
folder_id: int,
|
|
292
|
+
project_id: int | None,
|
|
293
|
+
*,
|
|
294
|
+
clear: bool = False,
|
|
295
|
+
) -> dict:
|
|
296
|
+
"""Walk the folder tree from *folder_id* and set/clear project_id.
|
|
297
|
+
|
|
298
|
+
Uses a recursive CTE on ``parent_folder_id`` to find all descendant
|
|
299
|
+
folders, then updates both folders and their files.
|
|
300
|
+
|
|
301
|
+
If *clear* is True, sets ``project_id = NULL`` on all descendants
|
|
302
|
+
(the *project_id* argument is ignored).
|
|
303
|
+
|
|
304
|
+
Returns ``{"folders_updated": int, "files_updated": int}``.
|
|
305
|
+
"""
|
|
306
|
+
cursor = conn.cursor()
|
|
307
|
+
value = None if clear else project_id
|
|
308
|
+
|
|
309
|
+
# Validate project exists (when setting, not clearing)
|
|
310
|
+
if not clear:
|
|
311
|
+
row = conn.execute("SELECT id FROM projects WHERE id = ?", (project_id,)).fetchone()
|
|
312
|
+
if not row:
|
|
313
|
+
raise ValueError(f"No project with id {project_id}")
|
|
314
|
+
|
|
315
|
+
# Find all descendant folders (including the root itself)
|
|
316
|
+
descendants_cte = """
|
|
317
|
+
WITH RECURSIVE descendants(id) AS (
|
|
318
|
+
SELECT id FROM folders WHERE id = ?
|
|
319
|
+
UNION ALL
|
|
320
|
+
SELECT folder.id FROM folders folder
|
|
321
|
+
JOIN descendants descendant ON folder.parent_folder_id = descendant.id
|
|
322
|
+
)
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
cursor.execute(
|
|
326
|
+
f"{descendants_cte} SELECT id FROM descendants",
|
|
327
|
+
(folder_id,),
|
|
328
|
+
)
|
|
329
|
+
desc_ids = [row["id"] for row in cursor.fetchall()]
|
|
330
|
+
|
|
331
|
+
if not desc_ids:
|
|
332
|
+
return {"folders_updated": 0, "files_updated": 0}
|
|
333
|
+
|
|
334
|
+
ph = ",".join("?" * len(desc_ids))
|
|
335
|
+
|
|
336
|
+
# Update folders
|
|
337
|
+
cursor.execute(
|
|
338
|
+
f"UPDATE folders SET project_id = ? WHERE id IN ({ph})",
|
|
339
|
+
[value] + desc_ids,
|
|
340
|
+
)
|
|
341
|
+
folders_updated = cursor.rowcount
|
|
342
|
+
|
|
343
|
+
# Update files (skip removed)
|
|
344
|
+
cursor.execute(
|
|
345
|
+
f"UPDATE files SET project_id = ? WHERE folder_id IN ({ph}) AND status != 'removed'",
|
|
346
|
+
[value] + desc_ids,
|
|
347
|
+
)
|
|
348
|
+
files_updated = cursor.rowcount
|
|
349
|
+
conn.commit()
|
|
350
|
+
|
|
351
|
+
return {
|
|
352
|
+
"folders_updated": folders_updated,
|
|
353
|
+
"files_updated": files_updated,
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def update_folder_relationships(
|
|
358
|
+
conn: sqlite3.Connection,
|
|
359
|
+
folder_id: int,
|
|
360
|
+
*,
|
|
361
|
+
project_id: int | None = None,
|
|
362
|
+
client_id: int | None = None,
|
|
363
|
+
) -> bool | None:
|
|
364
|
+
"""Update project and/or client assignment on a single folder (no cascade).
|
|
365
|
+
|
|
366
|
+
Only updates fields that are passed (not None). Pass ``0`` to clear
|
|
367
|
+
a field (set to NULL). Stamps ``assignment_source = 'user'``
|
|
368
|
+
when the column exists (app-scope DBs only), so auto-detection
|
|
369
|
+
won't overwrite manual assignments.
|
|
370
|
+
Returns True on success, or None if the folder does not exist.
|
|
371
|
+
Raises ValueError if *project_id* is given (and not 0) but doesn't exist.
|
|
372
|
+
"""
|
|
373
|
+
row = conn.execute("SELECT id FROM folders WHERE id = ?", (folder_id,)).fetchone()
|
|
374
|
+
if not row:
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
if project_id is not None and project_id != 0:
|
|
378
|
+
proj = conn.execute("SELECT id FROM projects WHERE id = ?", (project_id,)).fetchone()
|
|
379
|
+
if not proj:
|
|
380
|
+
raise ValueError(f"No project with id {project_id}")
|
|
381
|
+
|
|
382
|
+
sets: list[str] = []
|
|
383
|
+
params: list = []
|
|
384
|
+
if project_id is not None:
|
|
385
|
+
if project_id == 0:
|
|
386
|
+
sets.append("project_id = NULL")
|
|
387
|
+
else:
|
|
388
|
+
sets.append("project_id = ?")
|
|
389
|
+
params.append(project_id)
|
|
390
|
+
if client_id is not None:
|
|
391
|
+
if client_id == 0:
|
|
392
|
+
sets.append("client_id = NULL")
|
|
393
|
+
else:
|
|
394
|
+
sets.append("client_id = ?")
|
|
395
|
+
params.append(client_id)
|
|
396
|
+
if not sets:
|
|
397
|
+
return True
|
|
398
|
+
|
|
399
|
+
sets.append("assignment_source = 'user'")
|
|
400
|
+
params.append(folder_id)
|
|
401
|
+
try:
|
|
402
|
+
conn.execute(f"UPDATE folders SET {', '.join(sets)} WHERE id = ?", params)
|
|
403
|
+
except sqlite3.OperationalError as e:
|
|
404
|
+
if "no such column" not in str(e):
|
|
405
|
+
raise
|
|
406
|
+
# assignment_source not present (tool-only DB)
|
|
407
|
+
sets.pop()
|
|
408
|
+
conn.execute(f"UPDATE folders SET {', '.join(sets)} WHERE id = ?", params)
|
|
409
|
+
conn.commit()
|
|
410
|
+
return True
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def cascade_client_id(
|
|
414
|
+
conn: sqlite3.Connection,
|
|
415
|
+
folder_id: int,
|
|
416
|
+
client_id: int | None,
|
|
417
|
+
*,
|
|
418
|
+
clear: bool = False,
|
|
419
|
+
) -> dict:
|
|
420
|
+
"""Walk the folder tree from *folder_id* and set/clear client_id.
|
|
421
|
+
|
|
422
|
+
Uses a recursive CTE on ``parent_folder_id`` to find all descendant
|
|
423
|
+
folders, then updates both folders and their files.
|
|
424
|
+
|
|
425
|
+
If *clear* is True, sets ``client_id = NULL`` on all descendants
|
|
426
|
+
(the *client_id* argument is ignored). Pass ``client_id=0`` as a
|
|
427
|
+
sentinel to clear (equivalent to ``clear=True``).
|
|
428
|
+
|
|
429
|
+
Returns ``{"folders_updated": int, "files_updated": int}``.
|
|
430
|
+
"""
|
|
431
|
+
cursor = conn.cursor()
|
|
432
|
+
|
|
433
|
+
# Treat 0 as a clear sentinel
|
|
434
|
+
if client_id == 0:
|
|
435
|
+
clear = True
|
|
436
|
+
|
|
437
|
+
value = None if clear else client_id
|
|
438
|
+
|
|
439
|
+
# Validate client exists (when setting, not clearing)
|
|
440
|
+
if not clear:
|
|
441
|
+
row = conn.execute("SELECT id FROM clients WHERE id = ?", (client_id,)).fetchone()
|
|
442
|
+
if not row:
|
|
443
|
+
raise ValueError(f"No client with id {client_id}")
|
|
444
|
+
|
|
445
|
+
# Find all descendant folders (including the root itself)
|
|
446
|
+
descendants_cte = """
|
|
447
|
+
WITH RECURSIVE descendants(id) AS (
|
|
448
|
+
SELECT id FROM folders WHERE id = ?
|
|
449
|
+
UNION ALL
|
|
450
|
+
SELECT folder.id FROM folders folder
|
|
451
|
+
JOIN descendants descendant ON folder.parent_folder_id = descendant.id
|
|
452
|
+
)
|
|
453
|
+
"""
|
|
454
|
+
|
|
455
|
+
cursor.execute(
|
|
456
|
+
f"{descendants_cte} SELECT id FROM descendants",
|
|
457
|
+
(folder_id,),
|
|
458
|
+
)
|
|
459
|
+
desc_ids = [row["id"] for row in cursor.fetchall()]
|
|
460
|
+
|
|
461
|
+
if not desc_ids:
|
|
462
|
+
return {"folders_updated": 0, "files_updated": 0}
|
|
463
|
+
|
|
464
|
+
ph = ",".join("?" * len(desc_ids))
|
|
465
|
+
|
|
466
|
+
# Update folders
|
|
467
|
+
cursor.execute(
|
|
468
|
+
f"UPDATE folders SET client_id = ? WHERE id IN ({ph})",
|
|
469
|
+
[value] + desc_ids,
|
|
470
|
+
)
|
|
471
|
+
folders_updated = cursor.rowcount
|
|
472
|
+
|
|
473
|
+
# Update files (skip removed)
|
|
474
|
+
cursor.execute(
|
|
475
|
+
f"UPDATE files SET client_id = ? WHERE folder_id IN ({ph}) AND status != 'removed'",
|
|
476
|
+
[value] + desc_ids,
|
|
477
|
+
)
|
|
478
|
+
files_updated = cursor.rowcount
|
|
479
|
+
conn.commit()
|
|
480
|
+
|
|
481
|
+
return {
|
|
482
|
+
"folders_updated": folders_updated,
|
|
483
|
+
"files_updated": files_updated,
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
# ---------------------------------------------------------------------------
|
|
488
|
+
# Write operations
|
|
489
|
+
# ---------------------------------------------------------------------------
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def insert_drive_folder(conn: sqlite3.Connection, data: Dict[str, Any]) -> tuple:
|
|
493
|
+
"""Insert or update a Drive folder record in folders.
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Tuple of (result_type, folder_id) where result_type is 'inserted' or 'updated'
|
|
497
|
+
"""
|
|
498
|
+
cursor = conn.cursor()
|
|
499
|
+
|
|
500
|
+
cursor.execute(
|
|
501
|
+
"SELECT id FROM folders WHERE source = ? AND external_id = ?",
|
|
502
|
+
(data["source"], data["external_id"]),
|
|
503
|
+
)
|
|
504
|
+
existing = cursor.fetchone()
|
|
505
|
+
|
|
506
|
+
if existing:
|
|
507
|
+
cursor.execute(
|
|
508
|
+
"""
|
|
509
|
+
UPDATE folders SET
|
|
510
|
+
path = ?,
|
|
511
|
+
relative_path = ?,
|
|
512
|
+
name = ?,
|
|
513
|
+
account = ?,
|
|
514
|
+
web_link = ?,
|
|
515
|
+
scanned_at = CURRENT_TIMESTAMP,
|
|
516
|
+
updated_at = CURRENT_TIMESTAMP
|
|
517
|
+
WHERE id = ?
|
|
518
|
+
""",
|
|
519
|
+
(
|
|
520
|
+
data["path"],
|
|
521
|
+
data["relative_path"],
|
|
522
|
+
data["name"],
|
|
523
|
+
data["account"],
|
|
524
|
+
data["web_link"],
|
|
525
|
+
existing["id"],
|
|
526
|
+
),
|
|
527
|
+
)
|
|
528
|
+
return "updated", existing["id"]
|
|
529
|
+
else:
|
|
530
|
+
cursor.execute(
|
|
531
|
+
"""
|
|
532
|
+
INSERT INTO folders (
|
|
533
|
+
source, external_id, account,
|
|
534
|
+
path, relative_path, name,
|
|
535
|
+
web_link, scanned_at, created_at,
|
|
536
|
+
indexed_at, updated_at
|
|
537
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP,
|
|
538
|
+
CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
|
539
|
+
""",
|
|
540
|
+
(
|
|
541
|
+
data["source"],
|
|
542
|
+
data["external_id"],
|
|
543
|
+
data["account"],
|
|
544
|
+
data["path"],
|
|
545
|
+
data["relative_path"],
|
|
546
|
+
data["name"],
|
|
547
|
+
data["web_link"],
|
|
548
|
+
),
|
|
549
|
+
)
|
|
550
|
+
return "inserted", cursor.lastrowid
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def update_drive_folder_parents(conn: sqlite3.Connection, source: str, folder_map: Dict[str, str]) -> int:
|
|
554
|
+
"""Update parent_folder_id links for Drive folders.
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
Number of folders updated
|
|
558
|
+
"""
|
|
559
|
+
cursor = conn.cursor()
|
|
560
|
+
updated = 0
|
|
561
|
+
|
|
562
|
+
for folder_ext_id, parent_ext_id in folder_map.items():
|
|
563
|
+
cursor.execute(
|
|
564
|
+
"SELECT id FROM folders WHERE source = ? AND external_id = ?",
|
|
565
|
+
(source, parent_ext_id),
|
|
566
|
+
)
|
|
567
|
+
parent_row = cursor.fetchone()
|
|
568
|
+
|
|
569
|
+
if parent_row:
|
|
570
|
+
cursor.execute(
|
|
571
|
+
"""
|
|
572
|
+
UPDATE folders
|
|
573
|
+
SET parent_folder_id = ?
|
|
574
|
+
WHERE source = ? AND external_id = ?
|
|
575
|
+
""",
|
|
576
|
+
(parent_row["id"], source, folder_ext_id),
|
|
577
|
+
)
|
|
578
|
+
updated += 1
|
|
579
|
+
|
|
580
|
+
conn.commit()
|
|
581
|
+
return updated
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def refresh_folder_counts(conn: sqlite3.Connection) -> dict:
|
|
585
|
+
"""Refresh pre-computed file counts for all folders.
|
|
586
|
+
|
|
587
|
+
Uses folder_id FK for direct counts, then propagates totals up
|
|
588
|
+
the parent_folder_id hierarchy by processing from leaves to roots.
|
|
589
|
+
|
|
590
|
+
Returns stats about the refresh operation.
|
|
591
|
+
"""
|
|
592
|
+
cursor = conn.cursor()
|
|
593
|
+
|
|
594
|
+
cursor.execute(
|
|
595
|
+
"""
|
|
596
|
+
UPDATE folders
|
|
597
|
+
SET direct_file_count = COALESCE((
|
|
598
|
+
SELECT COUNT(*) FROM files file
|
|
599
|
+
WHERE file.folder_id = folders.id AND file.status != 'removed'
|
|
600
|
+
), 0),
|
|
601
|
+
total_size_bytes = COALESCE((
|
|
602
|
+
SELECT SUM(file.size_bytes) FROM files file
|
|
603
|
+
WHERE file.folder_id = folders.id AND file.status != 'removed'
|
|
604
|
+
), 0)
|
|
605
|
+
"""
|
|
606
|
+
)
|
|
607
|
+
conn.commit()
|
|
608
|
+
|
|
609
|
+
cursor.execute("UPDATE folders SET total_file_count = direct_file_count")
|
|
610
|
+
conn.commit()
|
|
611
|
+
|
|
612
|
+
cursor.execute(
|
|
613
|
+
"""
|
|
614
|
+
SELECT id, parent_folder_id, direct_file_count, total_size_bytes
|
|
615
|
+
FROM folders
|
|
616
|
+
ORDER BY LENGTH(path) - LENGTH(REPLACE(path, '/', '')) DESC
|
|
617
|
+
"""
|
|
618
|
+
)
|
|
619
|
+
folders = cursor.fetchall()
|
|
620
|
+
|
|
621
|
+
folder_counts = {row["id"]: row["direct_file_count"] or 0 for row in folders}
|
|
622
|
+
folder_sizes = {row["id"]: row["total_size_bytes"] or 0 for row in folders}
|
|
623
|
+
|
|
624
|
+
for row in folders:
|
|
625
|
+
folder_id = row["id"]
|
|
626
|
+
parent_id = row["parent_folder_id"]
|
|
627
|
+
if parent_id and parent_id in folder_counts:
|
|
628
|
+
folder_counts[parent_id] += folder_counts[folder_id]
|
|
629
|
+
folder_sizes[parent_id] += folder_sizes[folder_id]
|
|
630
|
+
|
|
631
|
+
for folder_id, total_count in folder_counts.items():
|
|
632
|
+
cursor.execute(
|
|
633
|
+
"""
|
|
634
|
+
UPDATE folders
|
|
635
|
+
SET total_file_count = ?,
|
|
636
|
+
total_size_bytes = ?
|
|
637
|
+
WHERE id = ?
|
|
638
|
+
""",
|
|
639
|
+
(total_count, folder_sizes.get(folder_id, 0), folder_id),
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
conn.commit()
|
|
643
|
+
|
|
644
|
+
cursor.execute(
|
|
645
|
+
"""
|
|
646
|
+
SELECT
|
|
647
|
+
COUNT(*) as folders,
|
|
648
|
+
SUM(direct_file_count) as total_direct,
|
|
649
|
+
MAX(total_file_count) as max_total
|
|
650
|
+
FROM folders
|
|
651
|
+
"""
|
|
652
|
+
)
|
|
653
|
+
row = cursor.fetchone()
|
|
654
|
+
|
|
655
|
+
return {
|
|
656
|
+
"folders_updated": len(folders),
|
|
657
|
+
"total_direct_files": row["total_direct"] or 0,
|
|
658
|
+
"max_folder_total": row["max_total"] or 0,
|
|
659
|
+
}
|