footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,879 @@
1
+ """fp data — export, template, and import commands for entity CSV data.
2
+
3
+ Export current data:
4
+ ``fp data export clients``
5
+ ``fp data export files --status active --limit 100``
6
+
7
+ Generate import-compatible templates:
8
+ ``fp data template clients``
9
+ ``fp data template files --file template.csv``
10
+
11
+ Import metadata corrections for data-source entities:
12
+ ``fp data import files corrections.csv``
13
+ ``fp data import files corrections.csv --commit``
14
+ """
15
+
16
+ import csv
17
+ import sys
18
+ from dataclasses import dataclass, field
19
+
20
+ from footprinter.cli._common import FORMATTER, add_json_flag, console, open_db, output_json
21
+ from footprinter.cli.upsert import CSV_COLUMNS
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Export column specs — derived from CSV_COLUMNS (import column spec)
25
+ # ---------------------------------------------------------------------------
26
+
27
+ #: Export columns per entity: required + optional from CSV_COLUMNS,
28
+ #: minus client_id for projects (internal DB ID, not user-facing).
29
+ EXPORT_COLUMNS: dict[str, list[str]] = {
30
+ "client": CSV_COLUMNS["client"][0] + CSV_COLUMNS["client"][1],
31
+ "project": [c for c in CSV_COLUMNS["project"][0] + CSV_COLUMNS["project"][1] if c != "client_id"],
32
+ }
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Export SQL queries (clients/projects only — data-source uses registry)
36
+ # ---------------------------------------------------------------------------
37
+
38
+
39
+ def _export_query(entity_type: str, status_filter: str | None) -> tuple[str, list]:
40
+ """Return (sql, params) for an unbounded export query."""
41
+ params: list = []
42
+ if entity_type == "client":
43
+ sql = "SELECT name, client_type, slug, path_pattern, status FROM clients"
44
+ if status_filter:
45
+ sql += " WHERE status = ?"
46
+ params.append(status_filter)
47
+ else:
48
+ sql += " WHERE status != 'removed'"
49
+ sql += " ORDER BY name"
50
+ else:
51
+ sql = (
52
+ "SELECT p.project_name, p.root_path, "
53
+ "COALESCE(c.name, '') AS client, "
54
+ "p.project_type, p.description, p.github_url, p.status "
55
+ "FROM projects p LEFT JOIN clients c ON p.client_id = c.id"
56
+ )
57
+ if status_filter:
58
+ sql += " WHERE p.status = ?"
59
+ params.append(status_filter)
60
+ else:
61
+ sql += " WHERE p.status != 'removed'"
62
+ sql += " ORDER BY p.project_name"
63
+ return sql, params
64
+
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # Template example rows (clients/projects only)
68
+ # ---------------------------------------------------------------------------
69
+
70
+ TEMPLATE_ROWS: dict[str, list[dict]] = {
71
+ "client": [
72
+ {
73
+ "name": "Acme Corp",
74
+ "client_type": "external",
75
+ "slug": "",
76
+ "path_pattern": "~/Work/clients/acme/",
77
+ "status": "active",
78
+ },
79
+ {"name": "Internal Tools", "client_type": "internal", "slug": "", "path_pattern": "", "status": "active"},
80
+ {"name": "Side Project", "client_type": "personal", "slug": "", "path_pattern": "", "status": "active"},
81
+ ],
82
+ "project": [
83
+ {
84
+ "project_name": "My Web App",
85
+ "root_path": "~/Work/projects/my-app",
86
+ "client": "Acme Corp",
87
+ "project_type": "python",
88
+ "description": "A web application",
89
+ "github_url": "",
90
+ "status": "active",
91
+ },
92
+ {
93
+ "project_name": "Documentation",
94
+ "root_path": "~/Work/docs",
95
+ "client": "",
96
+ "project_type": "docs",
97
+ "description": "Internal documentation",
98
+ "github_url": "",
99
+ "status": "active",
100
+ },
101
+ {
102
+ "project_name": "Mobile App",
103
+ "root_path": "~/Work/mobile",
104
+ "client": "Internal Tools",
105
+ "project_type": "typescript",
106
+ "description": "Mobile app",
107
+ "github_url": "",
108
+ "status": "active",
109
+ },
110
+ ],
111
+ }
112
+
113
+ VALID_VALUES_NOTES: dict[str, dict[str, str]] = {
114
+ "client": {
115
+ "client_type": "external, internal, personal",
116
+ "status": "active, hidden, removed",
117
+ },
118
+ "project": {
119
+ "status": "active, paused, completed, abandoned, removed",
120
+ },
121
+ }
122
+
123
+
124
+ # ---------------------------------------------------------------------------
125
+ # Data-source entity registry
126
+ # ---------------------------------------------------------------------------
127
+
128
+
129
+ @dataclass
130
+ class DataSourceSpec:
131
+ """Specification for a data-source entity's CSV operations."""
132
+
133
+ table: str
134
+ export_columns: list[str]
135
+ writable_columns: list[str]
136
+ order_by: str
137
+ has_status: bool
138
+ template_rows: list[dict] = field(default_factory=list)
139
+ valid_values: dict[str, str] = field(default_factory=dict)
140
+
141
+
142
+ DATA_SOURCE_SPECS: dict[str, DataSourceSpec] = {
143
+ "files": DataSourceSpec(
144
+ table="files",
145
+ export_columns=[
146
+ "id",
147
+ "name",
148
+ "path",
149
+ "source",
150
+ "status",
151
+ "content_type",
152
+ "size_bytes",
153
+ "modified_at",
154
+ "project_id",
155
+ "client_id",
156
+ "mcp_view",
157
+ "mcp_read",
158
+ ],
159
+ writable_columns=["status", "project_id", "client_id", "mcp_view", "mcp_read"],
160
+ order_by="id",
161
+ has_status=True,
162
+ template_rows=[
163
+ {
164
+ "id": "1",
165
+ "name": "readme.md",
166
+ "path": "/Users/me/Work/readme.md",
167
+ "source": "local",
168
+ "status": "active",
169
+ "content_type": "markdown",
170
+ "size_bytes": "1024",
171
+ "modified_at": "2026-01-15T10:00:00Z",
172
+ "project_id": "1",
173
+ "client_id": "1",
174
+ "mcp_view": "visible",
175
+ "mcp_read": "allow",
176
+ },
177
+ {
178
+ "id": "2",
179
+ "name": "notes.txt",
180
+ "path": "/Users/me/Work/notes.txt",
181
+ "source": "local",
182
+ "status": "hidden",
183
+ "content_type": "text",
184
+ "size_bytes": "512",
185
+ "modified_at": "2026-02-01T10:00:00Z",
186
+ "project_id": "",
187
+ "client_id": "",
188
+ "mcp_view": "inherit",
189
+ "mcp_read": "inherit",
190
+ },
191
+ ],
192
+ valid_values={
193
+ "status": "active, hidden, removed",
194
+ "mcp_view": "hidden, opaque, visible, inherit",
195
+ "mcp_read": "allow, deny, inherit",
196
+ },
197
+ ),
198
+ "folders": DataSourceSpec(
199
+ table="folders",
200
+ export_columns=[
201
+ "id",
202
+ "path",
203
+ "relative_path",
204
+ "name",
205
+ "source",
206
+ "status",
207
+ "project_id",
208
+ "client_id",
209
+ "mcp_view",
210
+ "mcp_read",
211
+ ],
212
+ writable_columns=["status", "project_id", "client_id", "mcp_view", "mcp_read"],
213
+ order_by="id",
214
+ has_status=True,
215
+ template_rows=[
216
+ {
217
+ "id": "1",
218
+ "path": "/Users/me/Work",
219
+ "relative_path": "Work",
220
+ "name": "Work",
221
+ "source": "local",
222
+ "status": "active",
223
+ "project_id": "1",
224
+ "client_id": "",
225
+ "mcp_view": "visible",
226
+ "mcp_read": "allow",
227
+ },
228
+ {
229
+ "id": "2",
230
+ "path": "/Users/me/Personal",
231
+ "relative_path": "Personal",
232
+ "name": "Personal",
233
+ "source": "local",
234
+ "status": "active",
235
+ "project_id": "",
236
+ "client_id": "",
237
+ "mcp_view": "inherit",
238
+ "mcp_read": "inherit",
239
+ },
240
+ ],
241
+ valid_values={
242
+ "status": "active, hidden, removed",
243
+ "mcp_view": "hidden, opaque, visible, inherit",
244
+ "mcp_read": "allow, deny, inherit",
245
+ },
246
+ ),
247
+ "emails": DataSourceSpec(
248
+ table="emails",
249
+ export_columns=[
250
+ "id",
251
+ "message_id",
252
+ "account",
253
+ "subject",
254
+ "from_address",
255
+ "received_at",
256
+ "status",
257
+ "project_id",
258
+ "client_id",
259
+ "mcp_view",
260
+ "mcp_read",
261
+ ],
262
+ writable_columns=["status", "project_id", "client_id", "mcp_view", "mcp_read"],
263
+ order_by="id",
264
+ has_status=True,
265
+ template_rows=[
266
+ {
267
+ "id": "1",
268
+ "message_id": "msg-001@example.com",
269
+ "account": "work",
270
+ "subject": "Project Update",
271
+ "from_address": "sender@example.com",
272
+ "received_at": "2026-02-01T09:00:00Z",
273
+ "status": "active",
274
+ "project_id": "1",
275
+ "client_id": "1",
276
+ "mcp_view": "visible",
277
+ "mcp_read": "allow",
278
+ },
279
+ {
280
+ "id": "2",
281
+ "message_id": "msg-002@example.com",
282
+ "account": "personal",
283
+ "subject": "Newsletter",
284
+ "from_address": "news@example.com",
285
+ "received_at": "2026-02-02T09:00:00Z",
286
+ "status": "active",
287
+ "project_id": "",
288
+ "client_id": "",
289
+ "mcp_view": "inherit",
290
+ "mcp_read": "inherit",
291
+ },
292
+ ],
293
+ valid_values={
294
+ "status": "active, hidden, removed",
295
+ "mcp_view": "hidden, opaque, visible, inherit",
296
+ "mcp_read": "allow, deny, inherit",
297
+ },
298
+ ),
299
+ "chats": DataSourceSpec(
300
+ table="chats",
301
+ export_columns=[
302
+ "id",
303
+ "external_id",
304
+ "account",
305
+ "title",
306
+ "message_count",
307
+ "status",
308
+ "created_at",
309
+ "updated_at",
310
+ "project_id",
311
+ "client_id",
312
+ "mcp_view",
313
+ "mcp_read",
314
+ ],
315
+ writable_columns=["status", "project_id", "client_id", "mcp_view", "mcp_read"],
316
+ order_by="id",
317
+ has_status=True,
318
+ template_rows=[
319
+ {
320
+ "id": "1",
321
+ "external_id": "conv-001",
322
+ "account": "personal",
323
+ "title": "Architecture Chat",
324
+ "message_count": "5",
325
+ "status": "active",
326
+ "created_at": "2026-01-10T08:00:00Z",
327
+ "updated_at": "2026-01-10T09:00:00Z",
328
+ "project_id": "1",
329
+ "client_id": "1",
330
+ "mcp_view": "visible",
331
+ "mcp_read": "allow",
332
+ },
333
+ {
334
+ "id": "2",
335
+ "external_id": "conv-002",
336
+ "account": "personal",
337
+ "title": "Random Chat",
338
+ "message_count": "3",
339
+ "status": "active",
340
+ "created_at": "2026-01-11T08:00:00Z",
341
+ "updated_at": "2026-01-11T09:00:00Z",
342
+ "project_id": "",
343
+ "client_id": "",
344
+ "mcp_view": "inherit",
345
+ "mcp_read": "inherit",
346
+ },
347
+ ],
348
+ valid_values={
349
+ "status": "active, hidden, removed, merged",
350
+ "mcp_view": "hidden, opaque, visible, inherit",
351
+ "mcp_read": "allow, deny, inherit",
352
+ },
353
+ ),
354
+ "messages": DataSourceSpec(
355
+ table="messages",
356
+ export_columns=[
357
+ "id",
358
+ "chat_id",
359
+ "message_id",
360
+ "role",
361
+ "created_at",
362
+ "mcp_view",
363
+ "mcp_read",
364
+ ],
365
+ writable_columns=["mcp_view", "mcp_read"],
366
+ order_by="id",
367
+ has_status=False,
368
+ template_rows=[
369
+ {
370
+ "id": "1",
371
+ "chat_id": "1",
372
+ "message_id": "msg-1",
373
+ "role": "user",
374
+ "created_at": "2026-01-10T08:01:00Z",
375
+ "mcp_view": "visible",
376
+ "mcp_read": "allow",
377
+ },
378
+ {
379
+ "id": "2",
380
+ "chat_id": "1",
381
+ "message_id": "msg-2",
382
+ "role": "assistant",
383
+ "created_at": "2026-01-10T08:02:00Z",
384
+ "mcp_view": "visible",
385
+ "mcp_read": "allow",
386
+ },
387
+ ],
388
+ valid_values={
389
+ "mcp_view": "hidden, opaque, visible, inherit",
390
+ "mcp_read": "allow, deny, inherit",
391
+ },
392
+ ),
393
+ "visits": DataSourceSpec(
394
+ table="visits",
395
+ export_columns=[
396
+ "id",
397
+ "url",
398
+ "title",
399
+ "visit_time",
400
+ "browser",
401
+ "status",
402
+ "project_id",
403
+ "client_id",
404
+ "mcp_view",
405
+ "mcp_read",
406
+ ],
407
+ writable_columns=["status", "project_id", "client_id", "mcp_view", "mcp_read"],
408
+ order_by="id",
409
+ has_status=True,
410
+ template_rows=[
411
+ {
412
+ "id": "1",
413
+ "url": "https://example.com",
414
+ "title": "Example",
415
+ "visit_time": "2026-03-01T12:00:00Z",
416
+ "browser": "safari",
417
+ "status": "active",
418
+ "project_id": "1",
419
+ "client_id": "1",
420
+ "mcp_view": "visible",
421
+ "mcp_read": "allow",
422
+ },
423
+ {
424
+ "id": "2",
425
+ "url": "https://news.com",
426
+ "title": "News",
427
+ "visit_time": "2026-03-02T12:00:00Z",
428
+ "browser": "chrome",
429
+ "status": "active",
430
+ "project_id": "",
431
+ "client_id": "",
432
+ "mcp_view": "inherit",
433
+ "mcp_read": "inherit",
434
+ },
435
+ ],
436
+ valid_values={
437
+ "status": "active, hidden, removed",
438
+ "mcp_view": "hidden, opaque, visible, inherit",
439
+ "mcp_read": "allow, deny, inherit",
440
+ },
441
+ ),
442
+ }
443
+
444
+ #: All entity nouns accepted by export/template (clients, projects + data-source)
445
+ ALL_EXPORT_NOUNS = ["clients", "projects"] + list(DATA_SOURCE_SPECS.keys())
446
+
447
+ #: Entity nouns that support import (data-source only)
448
+ IMPORT_NOUNS = list(DATA_SOURCE_SPECS.keys())
449
+
450
+
451
+ # ---------------------------------------------------------------------------
452
+ # Handlers
453
+ # ---------------------------------------------------------------------------
454
+
455
+
456
+ def _write_csv(columns: list[str], rows: list[dict], file_path: str | None) -> None:
457
+ """Write CSV to file or stdout."""
458
+ if file_path:
459
+ with open(file_path, "w", newline="") as f:
460
+ writer = csv.DictWriter(f, fieldnames=columns, extrasaction="ignore")
461
+ writer.writeheader()
462
+ for row in rows:
463
+ writer.writerow({k: v if (v := row.get(k)) is not None else "" for k in columns})
464
+ else:
465
+ writer = csv.DictWriter(sys.stdout, fieldnames=columns, extrasaction="ignore")
466
+ writer.writeheader()
467
+ for row in rows:
468
+ writer.writerow({k: v if (v := row.get(k)) is not None else "" for k in columns})
469
+
470
+
471
+ def _handle_export(args) -> None:
472
+ """Handle ``fp data export <noun>``."""
473
+ noun = args.noun
474
+
475
+ # Data-source entities go through the registry
476
+ if noun in DATA_SOURCE_SPECS:
477
+ _handle_export_data_source(args)
478
+ return
479
+
480
+ # Existing client/project path
481
+ entity_type = "client" if noun == "clients" else "project"
482
+ columns = EXPORT_COLUMNS[entity_type]
483
+
484
+ status_filter = getattr(args, "status", None)
485
+ if status_filter:
486
+ valid = VALID_VALUES_NOTES.get(entity_type, {}).get("status", "")
487
+ valid_set = {v.strip() for v in valid.split(",")} if valid else set()
488
+ if valid_set and status_filter not in valid_set:
489
+ print(
490
+ f"Unknown status '{status_filter}'. Valid values: {', '.join(sorted(valid_set))}",
491
+ file=sys.stderr,
492
+ )
493
+ sys.exit(1)
494
+
495
+ sql, params = _export_query(entity_type, status_filter)
496
+
497
+ # Apply limit/offset if provided (OFFSET requires LIMIT in SQLite)
498
+ limit = getattr(args, "limit", None)
499
+ offset = getattr(args, "offset", None)
500
+ if limit is not None:
501
+ sql += " LIMIT ?"
502
+ params.append(limit)
503
+ elif offset is not None:
504
+ sql += " LIMIT -1"
505
+ if offset is not None:
506
+ sql += " OFFSET ?"
507
+ params.append(offset)
508
+
509
+ with open_db() as conn:
510
+ cur = conn.execute(sql, params)
511
+ rows = [dict(r) for r in cur.fetchall()]
512
+
513
+ _write_csv(columns, rows, getattr(args, "file", None))
514
+
515
+
516
+ def _handle_export_data_source(args) -> None:
517
+ """Export a data-source entity via the registry."""
518
+ noun = args.noun
519
+ spec = DATA_SOURCE_SPECS[noun]
520
+ columns = spec.export_columns
521
+ status_filter = getattr(args, "status", None)
522
+
523
+ col_list = ", ".join(columns)
524
+ sql = f"SELECT {col_list} FROM {spec.table}" # noqa: S608
525
+ params: list = []
526
+
527
+ # Validate status filter against known values
528
+ if status_filter and spec.has_status:
529
+ valid = spec.valid_values.get("status", "")
530
+ valid_set = {v.strip() for v in valid.split(",")} if valid else set()
531
+ if valid_set and status_filter not in valid_set:
532
+ print(
533
+ f"Unknown status '{status_filter}'. Valid values: {', '.join(sorted(valid_set))}",
534
+ file=sys.stderr,
535
+ )
536
+ sys.exit(1)
537
+
538
+ # Default: exclude removed rows (for entities with status)
539
+ if spec.has_status:
540
+ if status_filter:
541
+ sql += " WHERE status = ?"
542
+ params.append(status_filter)
543
+ else:
544
+ sql += " WHERE status != 'removed'"
545
+ elif status_filter:
546
+ print(
547
+ f"Entity '{noun}' does not have a status column.",
548
+ file=sys.stderr,
549
+ )
550
+ sys.exit(1)
551
+
552
+ sql += f" ORDER BY {spec.order_by}"
553
+
554
+ # Apply limit/offset (OFFSET requires LIMIT in SQLite)
555
+ limit = getattr(args, "limit", None)
556
+ offset = getattr(args, "offset", None)
557
+ if limit is not None:
558
+ sql += " LIMIT ?"
559
+ params.append(limit)
560
+ elif offset is not None:
561
+ sql += " LIMIT -1"
562
+ if offset is not None:
563
+ sql += " OFFSET ?"
564
+ params.append(offset)
565
+
566
+ with open_db() as conn:
567
+ cur = conn.execute(sql, params)
568
+ rows = [dict(r) for r in cur.fetchall()]
569
+
570
+ _write_csv(columns, rows, getattr(args, "file", None))
571
+
572
+
573
+ def _handle_template(args) -> None:
574
+ """Handle ``fp data template <noun>``."""
575
+ noun = args.noun
576
+
577
+ # Data-source entities go through the registry
578
+ if noun in DATA_SOURCE_SPECS:
579
+ spec = DATA_SOURCE_SPECS[noun]
580
+ _write_csv(spec.export_columns, spec.template_rows, getattr(args, "file", None))
581
+ notes = spec.valid_values
582
+ if notes:
583
+ print("\nValid values:", file=sys.stderr)
584
+ for fld, values in notes.items():
585
+ print(f" {fld}: {values}", file=sys.stderr)
586
+ return
587
+
588
+ # Existing client/project path
589
+ entity_type = "client" if noun == "clients" else "project"
590
+ columns = EXPORT_COLUMNS[entity_type]
591
+ rows = TEMPLATE_ROWS[entity_type]
592
+
593
+ _write_csv(columns, rows, getattr(args, "file", None))
594
+
595
+ # Print valid value notes to stderr
596
+ notes = VALID_VALUES_NOTES.get(entity_type, {})
597
+ if notes:
598
+ print("\nValid values:", file=sys.stderr)
599
+ for fld, values in notes.items():
600
+ print(f" {fld}: {values}", file=sys.stderr)
601
+
602
+
603
+ def _handle_import(args) -> None:
604
+ """Handle ``fp data import <noun> <file>``."""
605
+ from pathlib import Path
606
+
607
+ from rich.table import Table
608
+
609
+ noun = args.noun
610
+ spec = DATA_SOURCE_SPECS[noun]
611
+ csv_path = Path(args.file)
612
+ has_dry_run = getattr(args, "dry_run", False)
613
+ has_commit = getattr(args, "commit", False)
614
+ if has_dry_run and has_commit:
615
+ console.print("[red]Cannot use --dry-run and --commit together.[/red]")
616
+ sys.exit(1)
617
+ dry_run = not has_commit
618
+
619
+ # Read and validate CSV
620
+ if not csv_path.exists():
621
+ console.print(f"[red]File not found: {csv_path}[/red]")
622
+ sys.exit(1)
623
+
624
+ with open(csv_path, encoding="utf-8", newline="") as f:
625
+ reader = csv.DictReader(f)
626
+ if reader.fieldnames is None:
627
+ console.print("[red]Empty or invalid CSV file.[/red]")
628
+ sys.exit(1)
629
+ if "id" not in reader.fieldnames:
630
+ console.print("[red]CSV must contain an 'id' column.[/red]")
631
+ sys.exit(1)
632
+ rows = list(reader)
633
+
634
+ if not rows:
635
+ if getattr(args, "json", False):
636
+ output_json({"total": 0, "updated": 0, "skipped": 0, "errors": 0})
637
+ else:
638
+ console.print("[dim]No rows in CSV — nothing to do.[/dim]")
639
+ return
640
+
641
+ # Determine which writable columns are present in the CSV
642
+ csv_writable = [c for c in spec.writable_columns if c in reader.fieldnames]
643
+
644
+ if not csv_writable:
645
+ console.print(
646
+ f"[red]No writable columns found in CSV. "
647
+ f"Writable columns for {noun}: {', '.join(spec.writable_columns)}[/red]"
648
+ )
649
+ sys.exit(1)
650
+
651
+ # Process rows
652
+ updated = 0
653
+ skipped = 0
654
+ errors = 0
655
+ error_details: list[dict] = []
656
+
657
+ with open_db() as conn:
658
+ # Begin ingest tracking before data writes (matches upsert.py pattern)
659
+ ingest_svc = None
660
+ ingest_id = None
661
+ if not dry_run:
662
+ from footprinter.services.ingest_service import IngestService
663
+
664
+ ingest_svc = IngestService(conn)
665
+ ingest_id = ingest_svc.begin(
666
+ f"import_{noun}",
667
+ mode="bulk",
668
+ trigger="cli:data:import",
669
+ )
670
+
671
+ for i, row in enumerate(rows, 1):
672
+ row_id = row.get("id", "").strip()
673
+ if not row_id:
674
+ errors += 1
675
+ error_details.append({"row": i, "error": "Missing id value"})
676
+ continue
677
+
678
+ try:
679
+ row_id_int = int(row_id)
680
+ except ValueError:
681
+ errors += 1
682
+ error_details.append({"row": i, "error": f"Invalid id: {row_id!r}"})
683
+ continue
684
+
685
+ # Check record exists
686
+ existing = conn.execute(
687
+ f"SELECT id FROM {spec.table} WHERE id = ?", # noqa: S608
688
+ (row_id_int,),
689
+ ).fetchone()
690
+ if existing is None:
691
+ errors += 1
692
+ error_details.append({"row": i, "error": f"ID {row_id_int} not found"})
693
+ continue
694
+
695
+ # Build SET clause from writable columns with non-empty values
696
+ set_parts: list[str] = []
697
+ set_params: list = []
698
+ for col in csv_writable:
699
+ val = row.get(col, "")
700
+ if val == "":
701
+ continue # Empty = skip (preserve existing)
702
+ # Sentinel: "0" for project_id/client_id clears to NULL
703
+ if col in ("project_id", "client_id") and val == "0":
704
+ set_parts.append(f"{col} = ?")
705
+ set_params.append(None)
706
+ else:
707
+ set_parts.append(f"{col} = ?")
708
+ set_params.append(val)
709
+
710
+ if not set_parts:
711
+ skipped += 1
712
+ continue
713
+
714
+ if dry_run:
715
+ updated += 1
716
+ else:
717
+ update_sql = (
718
+ f"UPDATE {spec.table} SET {', '.join(set_parts)} " # noqa: S608
719
+ f"WHERE id = ?"
720
+ )
721
+ set_params.append(row_id_int)
722
+ conn.execute(update_sql, set_params)
723
+ updated += 1
724
+
725
+ # Complete ingest tracking and commit everything together
726
+ if ingest_svc is not None and ingest_id is not None:
727
+ ingest_svc.complete(
728
+ ingest_id,
729
+ result={
730
+ "items_processed": updated + skipped + errors,
731
+ "items_updated": updated,
732
+ "items_skipped": skipped,
733
+ "errors": errors,
734
+ },
735
+ metadata={"error_details": error_details} if error_details else None,
736
+ )
737
+
738
+ summary: dict = {
739
+ "total": updated + skipped + errors,
740
+ "updated": updated,
741
+ "skipped": skipped,
742
+ "errors": errors,
743
+ }
744
+ if error_details:
745
+ summary["error_details"] = error_details
746
+
747
+ if getattr(args, "json", False):
748
+ output_json(summary)
749
+ elif dry_run:
750
+ table = Table(title=f"Dry run — import {noun}")
751
+ table.add_column("Metric", style="cyan")
752
+ table.add_column("Count", justify="right")
753
+ table.add_row("Would update", str(updated))
754
+ table.add_row("Skipped (no changes)", str(skipped))
755
+ table.add_row("Errors", str(errors))
756
+ table.add_row("Total rows", str(updated + skipped + errors))
757
+ console.print(table)
758
+ if error_details:
759
+ for ed in error_details:
760
+ console.print(f" [red]Row {ed['row']}: {ed['error']}[/red]")
761
+ console.print("[dim]Pass --commit to apply these changes.[/dim]")
762
+ else:
763
+ table = Table(title=f"Import {noun}")
764
+ table.add_column("Metric", style="cyan")
765
+ table.add_column("Count", justify="right")
766
+ table.add_row("Updated", str(updated))
767
+ table.add_row("Skipped (no changes)", str(skipped))
768
+ table.add_row("Errors", str(errors))
769
+ table.add_row("Total rows", str(updated + skipped + errors))
770
+ console.print(table)
771
+ if error_details:
772
+ for ed in error_details:
773
+ console.print(f" [red]Row {ed['row']}: {ed['error']}[/red]")
774
+
775
+
776
+ # ---------------------------------------------------------------------------
777
+ # Registration
778
+ # ---------------------------------------------------------------------------
779
+
780
+
781
+ def register(subparsers) -> None:
782
+ """Register ``fp data`` with ``export``, ``template``, and ``import`` subcommands."""
783
+ data_parser = subparsers.add_parser(
784
+ "data",
785
+ help="Export data, generate templates, or import metadata corrections",
786
+ formatter_class=FORMATTER,
787
+ )
788
+ data_parser.set_defaults(func=lambda args: data_parser.print_help())
789
+ data_sub = data_parser.add_subparsers(dest="data_action", metavar="ACTION")
790
+
791
+ # -- fp data export ---------------------------------------------------
792
+ export_parser = data_sub.add_parser(
793
+ "export",
794
+ help="Export entity data as CSV",
795
+ formatter_class=FORMATTER,
796
+ )
797
+ export_parser.add_argument(
798
+ "noun",
799
+ choices=ALL_EXPORT_NOUNS,
800
+ help="Entity type to export",
801
+ )
802
+ export_parser.add_argument(
803
+ "--file",
804
+ default=None,
805
+ help="Write output to file instead of stdout",
806
+ )
807
+ export_parser.add_argument(
808
+ "--status",
809
+ default=None,
810
+ help="Filter by status (e.g., active)",
811
+ )
812
+ export_parser.add_argument(
813
+ "--limit",
814
+ type=int,
815
+ default=None,
816
+ help="Maximum number of rows to export",
817
+ )
818
+ export_parser.add_argument(
819
+ "--offset",
820
+ type=int,
821
+ default=None,
822
+ help="Number of rows to skip before exporting",
823
+ )
824
+ export_parser.set_defaults(func=_handle_export)
825
+
826
+ # -- fp data template -------------------------------------------------
827
+ template_parser = data_sub.add_parser(
828
+ "template",
829
+ help="Generate an import-compatible CSV template",
830
+ formatter_class=FORMATTER,
831
+ )
832
+ template_parser.add_argument(
833
+ "noun",
834
+ choices=ALL_EXPORT_NOUNS,
835
+ help="Entity type for template",
836
+ )
837
+ template_parser.add_argument(
838
+ "--file",
839
+ default=None,
840
+ help="Write template to file instead of stdout",
841
+ )
842
+ template_parser.set_defaults(func=_handle_template)
843
+
844
+ # -- fp data import ---------------------------------------------------
845
+ import_parser = data_sub.add_parser(
846
+ "import",
847
+ help="Import metadata corrections from CSV",
848
+ description=(
849
+ "Import metadata corrections for data-source entities.\n\n"
850
+ "Reads a CSV file with an 'id' column and updates writable metadata\n"
851
+ "columns. Pipeline-managed fields (path, external_id, etc.) are\n"
852
+ "read-only and ignored during import.\n\n"
853
+ "Default mode is dry-run (preview only). Pass --commit to apply."
854
+ ),
855
+ formatter_class=FORMATTER,
856
+ )
857
+ import_parser.add_argument(
858
+ "noun",
859
+ choices=IMPORT_NOUNS,
860
+ help="Entity type to import",
861
+ )
862
+ import_parser.add_argument(
863
+ "file",
864
+ help="Path to CSV file",
865
+ )
866
+ import_parser.add_argument(
867
+ "--dry-run",
868
+ action="store_true",
869
+ default=False,
870
+ help="Preview changes without writing (default behavior)",
871
+ )
872
+ import_parser.add_argument(
873
+ "--commit",
874
+ action="store_true",
875
+ default=False,
876
+ help="Apply changes to the database",
877
+ )
878
+ add_json_flag(import_parser)
879
+ import_parser.set_defaults(func=_handle_import)