footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,794 @@
1
+ """fp upsert — create, update, or assign entity records.
2
+
3
+ Create/update (routes through ``service.upsert()``):
4
+ ``fp upsert client --name Acme --type external``
5
+ ``fp upsert clients data.csv`` (bulk CSV, tracked via IngestService)
6
+
7
+ Assign relationships (routes through ``service.assign()``):
8
+ ``fp upsert file 42 --project-id 3``
9
+ ``fp upsert files --folder /path --project-id 3`` (bulk path)
10
+ """
11
+
12
+ import csv
13
+ import os
14
+ import sqlite3
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ from rich.table import Table
19
+
20
+ from footprinter.cli._common import (
21
+ FORMATTER,
22
+ add_json_flag,
23
+ console,
24
+ open_db,
25
+ output_json,
26
+ )
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Entity dispatch table
30
+ # ---------------------------------------------------------------------------
31
+
32
+ #: Maps recognised nouns to (service_module, entity_type, mode).
33
+ ENTITY_MAP: dict[str, tuple[str, str, str]] = {
34
+ # singular → single record
35
+ "client": ("client_service", "client", "single"),
36
+ "project": ("project_service", "project", "single"),
37
+ # plural → bulk CSV import
38
+ "clients": ("client_service", "client", "bulk"),
39
+ "projects": ("project_service", "project", "bulk"),
40
+ # data entity singular → relationship assignment
41
+ "file": ("file_service", "file", "assign"),
42
+ "email": ("email_service", "email", "assign"),
43
+ "chat": ("chat_service", "chat", "assign"),
44
+ "visit": ("visit_service", "visit", "assign"),
45
+ "folder": ("folder_service", "folder", "assign"),
46
+ # data entity plural → bulk path assignment
47
+ "files": ("file_service", "file", "bulk_assign"),
48
+ "folders": ("folder_service", "folder", "bulk_assign"),
49
+ }
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Status validation — imported from db layer (single source of truth)
53
+ # ---------------------------------------------------------------------------
54
+
55
+ from footprinter.db.clients import VALID_STATUSES as VALID_CLIENT_STATUSES
56
+ from footprinter.db.projects import VALID_STATUSES as VALID_PROJECT_STATUSES
57
+
58
+ VALID_STATUSES_BY_ENTITY: dict[str, frozenset[str]] = {
59
+ "client": VALID_CLIENT_STATUSES,
60
+ "project": VALID_PROJECT_STATUSES,
61
+ }
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Per-entity argument specs for single mode
65
+ # ---------------------------------------------------------------------------
66
+
67
+ #: Each entry: (cli_flag, argparse_kwargs, service_kwarg_name)
68
+ SINGLE_ARGS: dict[str, list[tuple[str, dict, str]]] = {
69
+ "client": [
70
+ ("--name", {"required": True, "help": "Client name"}, "name"),
71
+ (
72
+ "--type",
73
+ {"required": True, "help": "Client type (external, internal, personal)", "dest": "client_type"},
74
+ "client_type",
75
+ ),
76
+ ("--path-pattern", {"default": None, "help": "Path pattern for client files"}, "path_pattern"),
77
+ ("--status", {"default": None, "help": "Client status (active, hidden, removed)"}, "status"),
78
+ ],
79
+ "project": [
80
+ ("--name", {"required": True, "help": "Project name", "dest": "project_name"}, "project_name"),
81
+ ("--root-path", {"default": None, "help": "Project root path"}, "root_path"),
82
+ ("--client-id", {"default": None, "type": int, "help": "Client ID"}, "client_id"),
83
+ ("--project-type", {"default": None, "help": "Project type (python, node, etc.)"}, "project_type"),
84
+ ("--description", {"default": None, "help": "Project description"}, "description"),
85
+ ("--github-url", {"default": None, "help": "GitHub repository URL"}, "github_url"),
86
+ (
87
+ "--status",
88
+ {
89
+ "default": None,
90
+ "help": "Project status (active, hidden, removed, paused, completed, abandoned, archived, merged)",
91
+ },
92
+ "status",
93
+ ),
94
+ ],
95
+ }
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # Per-entity CSV column specs for bulk mode
99
+ # ---------------------------------------------------------------------------
100
+
101
+ #: (required_columns, optional_columns, int_columns)
102
+ CSV_COLUMNS: dict[str, tuple[list[str], list[str], list[str]]] = {
103
+ "client": (
104
+ ["name", "client_type"],
105
+ ["slug", "path_pattern", "status"],
106
+ [],
107
+ ),
108
+ "project": (
109
+ ["project_name"],
110
+ ["root_path", "client_id", "client", "project_type", "description", "github_url", "status"],
111
+ ["client_id"],
112
+ ),
113
+ }
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # Service resolution
117
+ # ---------------------------------------------------------------------------
118
+
119
+
120
+ def _get_service(service_name: str):
121
+ """Lazy-import and return a service module from footprinter.services."""
122
+ import footprinter.services as svc
123
+
124
+ return getattr(svc, service_name)
125
+
126
+
127
+ # ---------------------------------------------------------------------------
128
+ # Handlers
129
+ # ---------------------------------------------------------------------------
130
+
131
+
132
+ def _handle_single(args) -> None:
133
+ """Handle singular noun: ``fp upsert client --name X --type Y``."""
134
+ from footprinter.services.roles import Role
135
+
136
+ noun = args.noun
137
+ svc_name, entity_type, _mode = ENTITY_MAP[noun]
138
+ service = _get_service(svc_name)
139
+ arg_specs = SINGLE_ARGS[entity_type]
140
+
141
+ # Build kwargs from CLI flags
142
+ kwargs: dict = {}
143
+ for _flag, _ap_kwargs, svc_kwarg in arg_specs:
144
+ val = getattr(args, svc_kwarg, None)
145
+ if val is not None:
146
+ kwargs[svc_kwarg] = val
147
+
148
+ # Validate status against entity-specific allowed values
149
+ if "status" in kwargs:
150
+ valid = VALID_STATUSES_BY_ENTITY.get(entity_type)
151
+ if valid and kwargs["status"] not in valid:
152
+ console.print(
153
+ f"[red]Invalid status '{kwargs['status']}' for {entity_type}. Valid: {', '.join(sorted(valid))}[/red]"
154
+ )
155
+ sys.exit(1)
156
+ # Preserve audit trail when setting removed status
157
+ if kwargs["status"] == "removed":
158
+ kwargs["status_reason"] = "cli:upsert"
159
+
160
+ with open_db() as conn:
161
+ try:
162
+ result = service.upsert(conn, role=Role.ADMIN, **kwargs)
163
+ except ValueError as e:
164
+ console.print(f"[red]{e}[/red]")
165
+ sys.exit(1)
166
+
167
+ if getattr(args, "json", False):
168
+ output_json(result)
169
+ else:
170
+ action = result.get("action", "done")
171
+ console.print(f"[green]{entity_type.title()} {result['id']} {action}.[/green]")
172
+
173
+
174
+ def _validate_and_read_csv(
175
+ csv_path: Path,
176
+ required_cols: list[str],
177
+ ) -> list[dict]:
178
+ """Read and validate CSV structure. Returns rows or exits on error."""
179
+ if not csv_path.exists():
180
+ console.print(f"[red]File not found: {csv_path}[/red]")
181
+ sys.exit(1)
182
+
183
+ with open(csv_path, encoding="utf-8", newline="") as f:
184
+ reader = csv.DictReader(f)
185
+ if reader.fieldnames is None:
186
+ console.print("[red]Empty or invalid CSV file.[/red]")
187
+ sys.exit(1)
188
+
189
+ missing = set(required_cols) - set(reader.fieldnames)
190
+ if missing:
191
+ console.print(f"[red]Missing required columns: {', '.join(sorted(missing))}[/red]")
192
+ sys.exit(1)
193
+
194
+ return list(reader)
195
+
196
+
197
+ def _process_csv_rows(
198
+ conn,
199
+ rows: list[dict],
200
+ service,
201
+ entity_type: str,
202
+ required_cols: list[str],
203
+ optional_cols: list[str],
204
+ int_cols: list[str],
205
+ ) -> tuple[int, int, int, list[dict]]:
206
+ """Process CSV rows through the service layer.
207
+
208
+ Returns (created, updated, errors, error_details).
209
+ """
210
+ from footprinter.services.roles import Role
211
+
212
+ created = 0
213
+ updated = 0
214
+ errors = 0
215
+ error_details: list[dict] = []
216
+
217
+ for i, row in enumerate(rows, 1):
218
+ # Build service kwargs from CSV columns
219
+ kwargs: dict = {}
220
+ for col in required_cols + optional_cols:
221
+ val = row.get(col)
222
+ if val is not None and val != "":
223
+ kwargs[col] = val
224
+
225
+ # Coerce int columns
226
+ row_bad = False
227
+ for col in int_cols:
228
+ if col in kwargs:
229
+ try:
230
+ kwargs[col] = int(kwargs[col])
231
+ except (ValueError, TypeError):
232
+ errors += 1
233
+ error_details.append(
234
+ {
235
+ "row": i,
236
+ "error": f"Invalid {col}: {kwargs[col]!r}",
237
+ }
238
+ )
239
+ row_bad = True
240
+ break
241
+ if row_bad:
242
+ continue
243
+
244
+ # Skip if missing required columns (after filtering empty strings)
245
+ missing_vals = [c for c in required_cols if c not in kwargs]
246
+ if missing_vals:
247
+ errors += 1
248
+ error_details.append(
249
+ {
250
+ "row": i,
251
+ "error": f"Missing required values: {', '.join(missing_vals)}",
252
+ }
253
+ )
254
+ continue
255
+
256
+ # Resolve client name → client_id for projects
257
+ if entity_type == "project" and "client" in kwargs and "client_id" not in kwargs:
258
+ from footprinter.db.clients import find_client_id_by_name
259
+
260
+ client_name = kwargs.pop("client")
261
+ resolved_id = find_client_id_by_name(conn, client_name)
262
+ if resolved_id is None:
263
+ errors += 1
264
+ error_details.append(
265
+ {
266
+ "row": i,
267
+ "error": f"Client not found: {client_name!r}",
268
+ }
269
+ )
270
+ continue
271
+ kwargs["client_id"] = resolved_id
272
+
273
+ # Remove 'client' if both client and client_id were provided
274
+ kwargs.pop("client", None)
275
+
276
+ try:
277
+ result = service.upsert(conn, role=Role.ADMIN, **kwargs)
278
+ if result["action"] == "created":
279
+ created += 1
280
+ else:
281
+ updated += 1
282
+ except ValueError as e:
283
+ errors += 1
284
+ error_details.append({"row": i, "error": str(e)})
285
+
286
+ return created, updated, errors, error_details
287
+
288
+
289
+ def _check_exists(conn, entity_type: str, kwargs: dict) -> bool:
290
+ """Check whether a record matching *kwargs* already exists."""
291
+ if entity_type == "client":
292
+ from footprinter.db.clients import find_client_id_by_name
293
+
294
+ return find_client_id_by_name(conn, kwargs.get("name", "")) is not None
295
+ if entity_type == "project":
296
+ from footprinter.db.projects import find_project_id_by_key
297
+
298
+ return (
299
+ find_project_id_by_key(
300
+ conn,
301
+ root_path=kwargs.get("root_path"),
302
+ project_name=kwargs.get("project_name"),
303
+ )
304
+ is not None
305
+ )
306
+ return False
307
+
308
+
309
+ def _dry_run_csv_rows(
310
+ conn,
311
+ rows: list[dict],
312
+ service,
313
+ entity_type: str,
314
+ required_cols: list[str],
315
+ optional_cols: list[str],
316
+ int_cols: list[str],
317
+ ) -> tuple[int, int, int, list[dict]]:
318
+ """Validate CSV rows without writing. Returns (would_create, would_update, errors, error_details)."""
319
+ from footprinter.db.clients import VALID_CLIENT_TYPES
320
+
321
+ would_create = 0
322
+ would_update = 0
323
+ errors = 0
324
+ error_details: list[dict] = []
325
+
326
+ for i, row in enumerate(rows, 1):
327
+ kwargs: dict = {}
328
+ for col in required_cols + optional_cols:
329
+ val = row.get(col)
330
+ if val is not None and val != "":
331
+ kwargs[col] = val
332
+
333
+ # Coerce int columns
334
+ row_bad = False
335
+ for col in int_cols:
336
+ if col in kwargs:
337
+ try:
338
+ kwargs[col] = int(kwargs[col])
339
+ except (ValueError, TypeError):
340
+ errors += 1
341
+ error_details.append(
342
+ {
343
+ "row": i,
344
+ "error": f"Invalid {col}: {kwargs[col]!r}",
345
+ }
346
+ )
347
+ row_bad = True
348
+ break
349
+ if row_bad:
350
+ continue
351
+
352
+ # Check required values
353
+ missing_vals = [c for c in required_cols if c not in kwargs]
354
+ if missing_vals:
355
+ errors += 1
356
+ error_details.append(
357
+ {
358
+ "row": i,
359
+ "error": f"Missing required values: {', '.join(missing_vals)}",
360
+ }
361
+ )
362
+ continue
363
+
364
+ # Validate controlled values
365
+ if entity_type == "client":
366
+ ct = kwargs.get("client_type", "")
367
+ if ct not in VALID_CLIENT_TYPES:
368
+ errors += 1
369
+ error_details.append(
370
+ {
371
+ "row": i,
372
+ "error": (
373
+ f"Invalid client_type: {ct!r}."
374
+ f" Must be one of: {', '.join(sorted(VALID_CLIENT_TYPES))}"
375
+ ),
376
+ }
377
+ )
378
+ continue
379
+
380
+ # Probe existence
381
+ if _check_exists(conn, entity_type, kwargs):
382
+ would_update += 1
383
+ else:
384
+ would_create += 1
385
+
386
+ return would_create, would_update, errors, error_details
387
+
388
+
389
+ def _handle_bulk(args) -> None:
390
+ """Handle plural noun: ``fp upsert clients data.csv``.
391
+
392
+ Default mode is dry-run (validate without writing). Pass ``--commit``
393
+ to apply changes.
394
+ """
395
+ from footprinter.services.ingest_service import IngestService
396
+
397
+ noun = args.noun
398
+ svc_name, entity_type, _mode = ENTITY_MAP[noun]
399
+ service = _get_service(svc_name)
400
+ required_cols, optional_cols, int_cols = CSV_COLUMNS[entity_type]
401
+ csv_path = Path(args.file)
402
+ has_dry_run = getattr(args, "dry_run", False)
403
+ has_commit = getattr(args, "commit", False)
404
+ if has_dry_run and has_commit:
405
+ console.print("[red]Cannot use --dry-run and --commit together.[/red]")
406
+ sys.exit(1)
407
+ dry_run = not has_commit
408
+
409
+ rows = _validate_and_read_csv(csv_path, required_cols)
410
+
411
+ if not rows:
412
+ if getattr(args, "json", False):
413
+ output_json({"total": 0, "created": 0, "updated": 0, "errors": 0})
414
+ else:
415
+ console.print("[dim]No rows in CSV — nothing to do.[/dim]")
416
+ return
417
+
418
+ if dry_run:
419
+ with open_db() as conn:
420
+ would_create, would_update, errors, error_details = _dry_run_csv_rows(
421
+ conn,
422
+ rows,
423
+ service,
424
+ entity_type,
425
+ required_cols,
426
+ optional_cols,
427
+ int_cols,
428
+ )
429
+
430
+ summary = {
431
+ "total": would_create + would_update + errors,
432
+ "would_create": would_create,
433
+ "would_update": would_update,
434
+ "errors": errors,
435
+ }
436
+ if error_details:
437
+ summary["error_details"] = error_details
438
+
439
+ if getattr(args, "json", False):
440
+ output_json(summary)
441
+ else:
442
+ table = Table(title=f"Dry run — {noun}")
443
+ table.add_column("Metric", style="cyan")
444
+ table.add_column("Count", justify="right")
445
+ table.add_row("Would create", str(would_create))
446
+ table.add_row("Would update", str(would_update))
447
+ table.add_row("Errors", str(errors))
448
+ table.add_row("Total", str(would_create + would_update + errors))
449
+ console.print(table)
450
+ console.print("[dim]Pass --commit to apply these changes.[/dim]")
451
+ return
452
+
453
+ # Commit mode — write through service layer with ingest tracking
454
+ pipe_name = f"upsert_{entity_type}"
455
+
456
+ with open_db() as conn:
457
+ ingest_svc = IngestService(conn)
458
+ ingest_id = ingest_svc.begin(pipe_name, mode="bulk", trigger="cli:upsert")
459
+
460
+ try:
461
+ created, updated, errors, error_details = _process_csv_rows(
462
+ conn,
463
+ rows,
464
+ service,
465
+ entity_type,
466
+ required_cols,
467
+ optional_cols,
468
+ int_cols,
469
+ )
470
+
471
+ ingest_svc.complete(
472
+ ingest_id,
473
+ result={
474
+ "items_processed": created + updated + errors,
475
+ "items_new": created,
476
+ "items_updated": updated,
477
+ "errors": errors,
478
+ },
479
+ metadata={"error_details": error_details} if error_details else None,
480
+ )
481
+
482
+ except Exception as e:
483
+ ingest_svc.fail(ingest_id, error=str(e))
484
+ console.print(f"[red]Bulk upsert failed: {e}[/red]")
485
+ sys.exit(1)
486
+
487
+ summary = {
488
+ "total": created + updated + errors,
489
+ "created": created,
490
+ "updated": updated,
491
+ "errors": errors,
492
+ }
493
+ if error_details:
494
+ summary["error_details"] = error_details
495
+
496
+ if getattr(args, "json", False):
497
+ output_json(summary)
498
+ else:
499
+ table = Table(title=f"Upsert {noun}")
500
+ table.add_column("Metric", style="cyan")
501
+ table.add_column("Count", justify="right")
502
+ table.add_row("Created", str(created))
503
+ table.add_row("Updated", str(updated))
504
+ table.add_row("Errors", str(errors))
505
+ table.add_row("Total", str(created + updated + errors))
506
+ console.print(table)
507
+
508
+
509
+ def _handle_assign(args) -> None:
510
+ """Handle data entity noun: ``fp upsert file 42 --project-id 3``."""
511
+ from footprinter.services.roles import Role
512
+
513
+ noun = args.noun
514
+ svc_name, entity_type, _mode = ENTITY_MAP[noun]
515
+ service = _get_service(svc_name)
516
+
517
+ entity_id = args.id
518
+ project_id = getattr(args, "project_id", None)
519
+ client_id = getattr(args, "client_id", None)
520
+
521
+ if project_id is None and client_id is None:
522
+ console.print("[red]At least one of --project-id or --client-id is required.[/red]")
523
+ sys.exit(1)
524
+
525
+ with open_db() as conn:
526
+ try:
527
+ result = service.assign(
528
+ conn,
529
+ entity_id,
530
+ role=Role.ADMIN,
531
+ project_id=project_id,
532
+ client_id=client_id,
533
+ )
534
+ except (ValueError, PermissionError) as e:
535
+ console.print(f"[red]{e}[/red]")
536
+ sys.exit(1)
537
+
538
+ if result is None:
539
+ console.print(f"[red]{entity_type.title()} {entity_id} not found.[/red]")
540
+ sys.exit(1)
541
+
542
+ if getattr(args, "json", False):
543
+ output_json(result)
544
+ else:
545
+ parts = []
546
+ if project_id is not None:
547
+ parts.append(f"project {project_id}")
548
+ if client_id is not None:
549
+ parts.append(f"client {client_id}")
550
+ console.print(f"[green]{entity_type.title()} {entity_id} assigned to {' and '.join(parts)}.[/green]")
551
+
552
+
553
+ def _handle_bulk_assign(args) -> None:
554
+ """Handle bulk path-based assignment for files and folders.
555
+
556
+ Files: iterates files under a folder path via ``service.assign()``.
557
+ Folders: cascades project/client via ``cascade_project_id`` /
558
+ ``cascade_client_id`` in the db layer.
559
+ """
560
+ from footprinter.services.roles import Role
561
+
562
+ noun = args.noun
563
+ svc_name, entity_type, _mode = ENTITY_MAP[noun]
564
+ service = _get_service(svc_name)
565
+
566
+ folder_path = os.path.expanduser(args.folder).rstrip("/")
567
+ project_id = getattr(args, "project_id", None)
568
+ client_id = getattr(args, "client_id", None)
569
+
570
+ if project_id is None and client_id is None:
571
+ console.print("[red]At least one of --project-id or --client-id is required.[/red]")
572
+ sys.exit(1)
573
+
574
+ project_assigned = 0
575
+ client_assigned = 0
576
+ files_touched = 0
577
+
578
+ with open_db() as conn:
579
+ try:
580
+ if entity_type == "file":
581
+ from footprinter.db.files import list_file_ids_under_path
582
+
583
+ file_ids = list_file_ids_under_path(conn, folder_path)
584
+ for fid in file_ids:
585
+ result = service.assign(
586
+ conn,
587
+ fid,
588
+ role=Role.ADMIN,
589
+ project_id=project_id,
590
+ client_id=client_id,
591
+ )
592
+ if result is not None:
593
+ files_touched += 1
594
+ # assign() is atomic — one call sets both fields per file
595
+ if project_id is not None:
596
+ project_assigned = files_touched
597
+ if client_id is not None:
598
+ client_assigned = files_touched
599
+ elif entity_type == "folder":
600
+ from footprinter.db.folders import (
601
+ cascade_client_id,
602
+ cascade_project_id,
603
+ get_folder_by_path,
604
+ )
605
+
606
+ folder_row = get_folder_by_path(conn, folder_path)
607
+ if folder_row is None:
608
+ console.print(f"[red]Folder not found: {folder_path}[/red]")
609
+ sys.exit(1)
610
+ folder_id = folder_row["id"]
611
+
612
+ if project_id is not None:
613
+ result = cascade_project_id(conn, folder_id, project_id)
614
+ project_assigned = result["folders_updated"] + result["files_updated"]
615
+ if client_id is not None:
616
+ result = cascade_client_id(conn, folder_id, client_id)
617
+ client_assigned = result["folders_updated"] + result["files_updated"]
618
+ except (ValueError, PermissionError, sqlite3.OperationalError) as e:
619
+ console.print(f"[red]{e}[/red]")
620
+ sys.exit(1)
621
+
622
+ # assigned = unique entities touched (not total field-writes).
623
+ # Both cascades walk the same tree, so max() avoids double-counting.
624
+ if entity_type == "file":
625
+ assigned = files_touched
626
+ else:
627
+ assigned = max(project_assigned, client_assigned)
628
+ summary: dict = {"assigned": assigned}
629
+ if project_id is not None:
630
+ summary["project_assigned"] = project_assigned
631
+ if client_id is not None:
632
+ summary["client_assigned"] = client_assigned
633
+
634
+ if getattr(args, "json", False):
635
+ output_json(summary)
636
+ elif project_id is not None and client_id is not None:
637
+ console.print(
638
+ f"[green]Project assigned to {project_assigned} {entity_type}(s). "
639
+ f"Client assigned to {client_assigned} {entity_type}(s).[/green]"
640
+ )
641
+ else:
642
+ console.print(f"[green]{assigned} {entity_type}(s) assigned.[/green]")
643
+
644
+
645
+ # ---------------------------------------------------------------------------
646
+ # Registration
647
+ # ---------------------------------------------------------------------------
648
+
649
+
650
+ def register(subparsers) -> None:
651
+ """Register the ``upsert`` subcommand with noun sub-subparsers."""
652
+ parser = subparsers.add_parser(
653
+ "upsert",
654
+ help="Create or update entity records",
655
+ description=(
656
+ "Create or update records, or assign relationships.\n\n"
657
+ "Single: fp upsert client --name Acme --type external\n"
658
+ "Bulk: fp upsert clients data.csv\n"
659
+ "Assign: fp upsert file 42 --project-id 3\n"
660
+ "Bulk assign: fp upsert files --folder ~/Work/acme/ --project-id 3"
661
+ ),
662
+ epilog=(
663
+ "examples:\n"
664
+ " fp upsert client --name Acme --type external Create or update a client\n"
665
+ " fp upsert project --name my-proj Create or update a project\n"
666
+ " fp upsert clients data.csv Bulk import clients from CSV\n"
667
+ " fp upsert file 42 --project-id 3 Assign file to project\n"
668
+ " fp upsert email 10 --client-id 1 Assign email to client\n"
669
+ " fp upsert files --folder ~/Work/acme --project-id 3 Assign files under folder\n"
670
+ "\n"
671
+ "entity nouns:\n"
672
+ " create/update: client, project, clients, projects\n"
673
+ " assign: file, email, chat, visit, folder\n"
674
+ " bulk assign: files, folders\n"
675
+ "\n"
676
+ "tip: use 'fp upsert <noun> --help' for details on any noun."
677
+ ),
678
+ formatter_class=FORMATTER,
679
+ )
680
+ noun_subs = parser.add_subparsers(
681
+ dest="noun",
682
+ metavar="NOUN",
683
+ title="entity nouns (one required)",
684
+ )
685
+ parser.set_defaults(func=lambda args: parser.print_help())
686
+
687
+ # Singular nouns — per-entity CLI flags
688
+ for noun in ["client", "project"]:
689
+ entity_type = ENTITY_MAP[noun][1]
690
+ p = noun_subs.add_parser(
691
+ noun,
692
+ help=f"Create or update a single {entity_type}",
693
+ description=f"Upsert a single {entity_type} record from CLI flags.",
694
+ formatter_class=FORMATTER,
695
+ )
696
+ for flag, ap_kwargs, _svc_kwarg in SINGLE_ARGS[entity_type]:
697
+ p.add_argument(flag, **ap_kwargs)
698
+ add_json_flag(p)
699
+ p.set_defaults(func=_handle_single)
700
+
701
+ # Plural nouns — CSV file argument
702
+ _BULK_EPILOGS = {
703
+ "clients": (
704
+ "CSV columns:\n"
705
+ " required: name, client_type\n"
706
+ " optional: slug, path_pattern, status\n"
707
+ "\n"
708
+ " client_type values: external, internal, personal\n"
709
+ " status values: active (default), hidden, removed\n"
710
+ "\n"
711
+ "example CSV:\n"
712
+ " name,client_type,path_pattern\n"
713
+ " Acme Corp,external,/Work/acme\n"
714
+ " Internal Tools,internal,\n"
715
+ "\n"
716
+ "modes:\n"
717
+ " Default is dry-run (validate only). Pass --commit to write.\n"
718
+ " Existing records (matched by name) are updated, new ones created."
719
+ ),
720
+ "projects": (
721
+ "CSV columns:\n"
722
+ " required: project_name\n"
723
+ " optional: root_path, client_id, client, project_type,\n"
724
+ " description, github_url, status\n"
725
+ "\n"
726
+ " client: client name (resolved to client_id)\n"
727
+ " status values: active (default), paused, completed, abandoned,\n"
728
+ " archived, hidden, removed\n"
729
+ "\n"
730
+ "example CSV:\n"
731
+ " project_name,client,project_type,root_path\n"
732
+ " my-api,Acme Corp,python,/Work/acme/api\n"
733
+ " docs-site,,node,/Work/docs\n"
734
+ "\n"
735
+ "modes:\n"
736
+ " Default is dry-run (validate only). Pass --commit to write.\n"
737
+ " Existing records (matched by root_path or project_name) are\n"
738
+ " updated, new ones created."
739
+ ),
740
+ }
741
+ for noun in ["clients", "projects"]:
742
+ entity_type = ENTITY_MAP[noun][1]
743
+ p = noun_subs.add_parser(
744
+ noun,
745
+ help=f"Bulk import {noun} from CSV",
746
+ description=f"Bulk import {noun} from a CSV file.",
747
+ epilog=_BULK_EPILOGS[noun],
748
+ formatter_class=FORMATTER,
749
+ )
750
+ p.add_argument("file", help="Path to CSV file")
751
+ p.add_argument(
752
+ "--dry-run",
753
+ action="store_true",
754
+ default=False,
755
+ help="Validate and preview changes without writing (default behavior)",
756
+ )
757
+ p.add_argument(
758
+ "--commit",
759
+ action="store_true",
760
+ default=False,
761
+ help="Apply validated changes to the database",
762
+ )
763
+ add_json_flag(p)
764
+ p.set_defaults(func=_handle_bulk)
765
+
766
+ # Data entity singular nouns — relationship assignment
767
+ for noun in ["file", "email", "chat", "visit", "folder"]:
768
+ entity_type = ENTITY_MAP[noun][1]
769
+ p = noun_subs.add_parser(
770
+ noun,
771
+ help=f"Assign a {entity_type} to a project or client",
772
+ description=f"Assign a {entity_type} to a project and/or client by ID.",
773
+ formatter_class=FORMATTER,
774
+ )
775
+ p.add_argument("id", type=int, help=f"{entity_type.title()} ID")
776
+ p.add_argument("--project-id", type=int, default=None, dest="project_id", help="Project ID to assign")
777
+ p.add_argument("--client-id", type=int, default=None, dest="client_id", help="Client ID to assign")
778
+ add_json_flag(p)
779
+ p.set_defaults(func=_handle_assign)
780
+
781
+ # Data entity plural nouns — bulk path assignment
782
+ for noun in ["files", "folders"]:
783
+ entity_type = ENTITY_MAP[noun][1]
784
+ p = noun_subs.add_parser(
785
+ noun,
786
+ help=f"Bulk assign {noun} under a folder",
787
+ description=f"Assign all {noun} under a folder to a project and/or client.",
788
+ formatter_class=FORMATTER,
789
+ )
790
+ p.add_argument("--folder", required=True, help="Folder path to assign under")
791
+ p.add_argument("--project-id", type=int, default=None, dest="project_id", help="Project ID to assign")
792
+ p.add_argument("--client-id", type=int, default=None, dest="client_id", help="Client ID to assign")
793
+ add_json_flag(p)
794
+ p.set_defaults(func=_handle_bulk_assign)