footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,128 @@
1
+ """fp delete — soft-delete entity records via the service layer.
2
+
3
+ Routes ``fp delete client 42`` through the service layer's ``delete()``
4
+ function, which sets ``status='removed'``. Requires confirmation unless
5
+ ``--yes`` is passed.
6
+ """
7
+
8
+ import sys
9
+
10
+ from footprinter.cli._common import (
11
+ FORMATTER,
12
+ add_json_flag,
13
+ console,
14
+ open_db,
15
+ output_json,
16
+ )
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Entity dispatch table
20
+ # ---------------------------------------------------------------------------
21
+
22
+ #: Maps each deletable noun to (service_module, name_key).
23
+ DELETABLE_ENTITIES: dict[str, tuple[str, str]] = {
24
+ "client": ("client_service", "name"),
25
+ "project": ("project_service", "project_name"),
26
+ }
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Service resolution
30
+ # ---------------------------------------------------------------------------
31
+
32
+
33
+ def _get_service(service_name: str):
34
+ """Lazy-import and return a service module from footprinter.services."""
35
+ import footprinter.services as svc
36
+
37
+ return getattr(svc, service_name)
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Handler
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ def _handle_delete(args) -> None:
46
+ """Handle ``fp delete <noun> <id>``."""
47
+ from footprinter.services.roles import Role
48
+
49
+ noun = args.noun
50
+ svc_name, name_key = DELETABLE_ENTITIES[noun]
51
+ service = _get_service(svc_name)
52
+
53
+ try:
54
+ entity_id = int(args.id)
55
+ except ValueError:
56
+ console.print(f"[red]Invalid ID: {args.id!r} — expected an integer.[/red]")
57
+ sys.exit(1)
58
+
59
+ with open_db() as conn:
60
+ record = service.get(conn, entity_id, role=Role.ADMIN)
61
+
62
+ if record is None:
63
+ console.print(f"[red]{noun.title()} {args.id} not found.[/red]")
64
+ sys.exit(1)
65
+
66
+ entity_name = record.get(name_key, "")
67
+
68
+ if not args.yes:
69
+ from footprinter.cli._prompt import SafeConfirm
70
+
71
+ if not SafeConfirm.ask(
72
+ f"Delete {noun} #{entity_id} ({entity_name})?",
73
+ default=False,
74
+ ):
75
+ console.print("[dim]Cancelled.[/dim]")
76
+ sys.exit(0)
77
+
78
+ result = service.delete(conn, entity_id, role=Role.ADMIN)
79
+
80
+ if getattr(args, "json", False):
81
+ output_json(result)
82
+ else:
83
+ console.print(f"Deleted {noun} #{entity_id} ({entity_name}).")
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Registration
88
+ # ---------------------------------------------------------------------------
89
+
90
+
91
+ def register(subparsers) -> None:
92
+ """Register the ``delete`` subcommand with noun sub-subparsers."""
93
+ parser = subparsers.add_parser(
94
+ "delete",
95
+ help="Soft-delete a record",
96
+ description="Soft-delete a record by setting status to 'removed'.",
97
+ epilog=(
98
+ "examples:\n"
99
+ " fp delete client 42 Delete client #42\n"
100
+ " fp delete project 7 --yes Skip confirmation\n"
101
+ " fp delete client 1 --json JSON output\n"
102
+ ),
103
+ formatter_class=FORMATTER,
104
+ )
105
+ noun_subs = parser.add_subparsers(
106
+ dest="noun",
107
+ metavar="NOUN",
108
+ title="entity nouns (one required)",
109
+ )
110
+ parser.set_defaults(func=lambda args: parser.print_help())
111
+
112
+ for noun in DELETABLE_ENTITIES:
113
+ p = noun_subs.add_parser(
114
+ noun,
115
+ help=f"Delete a {noun}",
116
+ description=f"Soft-delete a {noun} record by ID.",
117
+ formatter_class=FORMATTER,
118
+ )
119
+ p.add_argument("id", help=f"{noun.title()} ID")
120
+ p.add_argument(
121
+ "--yes",
122
+ "-y",
123
+ action="store_true",
124
+ default=False,
125
+ help="Skip confirmation prompt",
126
+ )
127
+ add_json_flag(p)
128
+ p.set_defaults(func=_handle_delete)
@@ -0,0 +1,579 @@
1
+ """fp ingest — pipeline execution, import, and refresh.
2
+
3
+ Thin routing layer that delegates to existing orchestrator/analysis classes.
4
+ All heavy imports are deferred inside handler functions to keep ``fp --help`` fast.
5
+ """
6
+
7
+ import sys
8
+
9
+ from footprinter.cli._common import FORMATTER, add_json_flag, console, output_json
10
+
11
+ # ---------------------------------------------------------------------------
12
+ # argparse registration
13
+ # ---------------------------------------------------------------------------
14
+
15
+
16
+ def _build_parser(subparsers, name):
17
+ """Build and return the ingest parser."""
18
+ parser = subparsers.add_parser(
19
+ name,
20
+ help="Run the data ingest pipeline",
21
+ description=(
22
+ "Execute the data pipeline or manage pipeline operations.\n\n"
23
+ "By default, runs all sources incrementally (new/updated only).\n"
24
+ "Use --full to re-process everything. Use 'refresh <source>'\n"
25
+ "to run a single source. The --pipe flag is available for\n"
26
+ "power users who need to target specific internal pipes."
27
+ ),
28
+ epilog=(
29
+ "examples:\n"
30
+ " fp ingest All sources (incremental)\n"
31
+ " fp ingest --full All sources (full re-process)\n"
32
+ " fp ingest refresh local Re-scan local files (incremental)\n"
33
+ " fp ingest refresh all --full Re-scan all sources (full)\n"
34
+ " fp ingest --pipe local_files,browser Specific internal pipes\n"
35
+ " fp ingest --rebuild-vectors Rebuild vectors (incremental)\n"
36
+ " fp ingest --rebuild-vectors full Rebuild vectors (full reset)\n"
37
+ " fp ingest status Show pipeline diagnostics\n"
38
+ " fp ingest import export.zip Import a chat export"
39
+ ),
40
+ formatter_class=FORMATTER,
41
+ )
42
+
43
+ # Pipeline flags (on the parent parser, not sub-subparsers)
44
+ parser.add_argument(
45
+ "--pipe",
46
+ "-s",
47
+ type=str,
48
+ metavar="PIPE",
49
+ help="Comma-separated pipes to run (e.g. local_files,browser)",
50
+ )
51
+ parser.add_argument(
52
+ "--full",
53
+ "-f",
54
+ action="store_true",
55
+ help="Full mode: re-process everything (default: incremental)",
56
+ )
57
+ parser.add_argument(
58
+ "--quiet",
59
+ "-q",
60
+ action="store_true",
61
+ help="Suppress Rich output (for scripts and cron)",
62
+ )
63
+ parser.add_argument(
64
+ "--rebuild-vectors",
65
+ nargs="?",
66
+ const="incremental",
67
+ default=None,
68
+ choices=["incremental", "sync", "full"],
69
+ metavar="MODE",
70
+ help=(
71
+ "Rebuild the vector store. Modes: incremental (default, "
72
+ "process new/modified/removed only), sync (incremental + "
73
+ "verify counts), full (delete and rebuild everything)"
74
+ ),
75
+ )
76
+ parser.add_argument(
77
+ "--vector-source",
78
+ choices=["files", "chats", "all"],
79
+ default="all",
80
+ help="Which vectors to rebuild (default: all). Only used with --rebuild-vectors",
81
+ )
82
+ parser.add_argument(
83
+ "--phase",
84
+ choices=["files", "messages", "chat_info"],
85
+ default=None,
86
+ help="Run a single rebuild phase (default: all). Only used with --rebuild-vectors",
87
+ )
88
+ parser.add_argument(
89
+ "--repair-fts",
90
+ action="store_true",
91
+ help="Drop and rebuild FTS search indexes",
92
+ )
93
+ parser.add_argument(
94
+ "--verbose",
95
+ "-v",
96
+ action="store_true",
97
+ help="Verbose logging to file",
98
+ )
99
+
100
+ # Sub-subparsers for ingest actions
101
+ subs = parser.add_subparsers(dest="ingest_action", metavar="COMMAND", title="commands (one required)")
102
+
103
+ # status
104
+ status_p = subs.add_parser(
105
+ "status",
106
+ help="Show pipeline diagnostics",
107
+ description="Show data counts and pipeline health diagnostics.",
108
+ formatter_class=FORMATTER,
109
+ )
110
+ add_json_flag(status_p)
111
+
112
+ # import
113
+ import_p = subs.add_parser(
114
+ "import",
115
+ help="Import a chat export",
116
+ description=(
117
+ "Import a Claude or ChatGPT chat export.\n\n"
118
+ "Accepts .zip files or extracted directories. Duplicate\n"
119
+ "imports are detected and skipped."
120
+ ),
121
+ epilog=("examples:\n fp ingest import ~/Downloads/claude-export.zip\n fp ingest import ./extracted-chats/"),
122
+ formatter_class=FORMATTER,
123
+ )
124
+ import_p.add_argument("path", help="Path to .zip file or extracted directory")
125
+ import_p.add_argument(
126
+ "--quiet",
127
+ action="store_true",
128
+ help="Suppress progress UI and summary output",
129
+ )
130
+
131
+ # refresh
132
+ refresh_p = subs.add_parser(
133
+ "refresh",
134
+ help="Re-scan a data source (default: incremental)",
135
+ description=(
136
+ "Re-scan a data source, processing new and updated entries.\n\n"
137
+ "Valid sources: local, browser, chat, and connector sources.\n"
138
+ "Some sources require connectors. See fp connect list.\n"
139
+ "Use --full to re-process everything."
140
+ ),
141
+ epilog=(
142
+ "examples:\n"
143
+ " fp ingest refresh local Re-scan local files (incremental)\n"
144
+ " fp ingest refresh local --full Re-scan local files (full)\n"
145
+ " fp ingest refresh browser Re-scan browser history\n"
146
+ " fp ingest refresh all Re-scan everything"
147
+ ),
148
+ formatter_class=FORMATTER,
149
+ )
150
+ refresh_p.add_argument("source", help="Source to refresh (e.g. local, browser, chat, all)")
151
+ refresh_p.add_argument(
152
+ "--full", "-f", action="store_true", help="Full mode: re-process everything (default: incremental)"
153
+ )
154
+
155
+ return parser
156
+
157
+
158
+ def register(subparsers) -> None:
159
+ """Register the ``ingest`` command."""
160
+ ingest_parser = _build_parser(subparsers, "ingest")
161
+ ingest_parser.set_defaults(func=_handle_ingest)
162
+
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # Dispatchers
166
+ # ---------------------------------------------------------------------------
167
+
168
+
169
+ def _handle_ingest(args) -> None:
170
+ """Route to the correct handler based on args."""
171
+ # --repair-fts and --rebuild-vectors take precedence over everything
172
+ if getattr(args, "repair_fts", False):
173
+ from footprinter.ingest.cli import _repair_fts
174
+
175
+ _repair_fts(quiet=getattr(args, "quiet", False))
176
+ return
177
+
178
+ rebuild_mode = getattr(args, "rebuild_vectors", None)
179
+ if rebuild_mode:
180
+ from footprinter.ingest.cli import _rebuild_vectors
181
+
182
+ _rebuild_vectors(
183
+ quiet=getattr(args, "quiet", False),
184
+ source=getattr(args, "vector_source", "all"),
185
+ phase=getattr(args, "phase", None),
186
+ mode=rebuild_mode,
187
+ )
188
+ return
189
+
190
+ action = getattr(args, "ingest_action", None)
191
+
192
+ if action is None:
193
+ # Bare `fp ingest` or `fp ingest --pipe/--full`
194
+ _ingest_pipeline(args)
195
+ return
196
+
197
+ handlers = {
198
+ "status": _ingest_status,
199
+ "import": _ingest_import,
200
+ "refresh": _ingest_refresh,
201
+ }
202
+ handler = handlers.get(action)
203
+ if handler:
204
+ handler(args)
205
+
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # Handlers
209
+ # ---------------------------------------------------------------------------
210
+
211
+
212
+ def _print_source_banner(config: dict, *, quiet: bool = False, console=None):
213
+ """Print a startup banner listing active and inactive data sources."""
214
+ if quiet:
215
+ return
216
+
217
+ if console is None:
218
+ from footprinter.cli._common import console as _console
219
+
220
+ console = _console
221
+
222
+ from footprinter.connectors import discover_connectors, is_configured, is_installed
223
+
224
+ console.print("[bold]Sources:[/bold]")
225
+ if config.get("directories"):
226
+ console.print(" [green]\u2713[/green] Local files")
227
+ else:
228
+ console.print(" [dim]\u2022 Local files (no directories configured)[/dim]")
229
+ if config.get("browsers"):
230
+ console.print(" [green]\u2713[/green] Browser history")
231
+ else:
232
+ console.print(" [dim]\u2022 Browser history (no browsers configured)[/dim]")
233
+
234
+ for name, spec in discover_connectors().items():
235
+ if is_installed(spec) and is_configured(spec, config):
236
+ console.print(f" [green]\u2713[/green] {spec.description}")
237
+ else:
238
+ console.print(f" [dim]\u2022 {spec.description} (fp connect install {name})[/dim]")
239
+
240
+ console.print()
241
+
242
+
243
+ def _run_with_logging(
244
+ orchestrator,
245
+ *,
246
+ pipes=None,
247
+ refresh_source=None,
248
+ mode,
249
+ quiet=False,
250
+ verbose=False,
251
+ header="Footprinter Data Pipeline",
252
+ show_banner=False,
253
+ show_next_steps=True,
254
+ ):
255
+ """Shared run helper: Rich Progress, file logging, run record, cleanup.
256
+
257
+ Dispatch target:
258
+ - ``refresh_source`` set → orchestrator.run_refresh(refresh_source)
259
+ - ``pipes`` set → orchestrator.run_pipes(pipes)
260
+ - neither → orchestrator.run_pipeline("all")
261
+
262
+ Shows a stage counter ("Stage 2/5: local_files") and intra-stage
263
+ progress counts for adapters that report them via on_progress.
264
+ """
265
+ import fcntl
266
+ import logging
267
+ from datetime import datetime, timezone
268
+
269
+ from footprinter.ingest.run_record import save_run_record
270
+ from footprinter.ingest.status import print_results
271
+ from footprinter.paths import get_run_lock_path, get_run_logs_dir, prune_run_logs
272
+ from footprinter.utils.logging_config import add_file_handler
273
+
274
+ # Acquire run lock (prevents concurrent fp ingest)
275
+ lock_path = get_run_lock_path()
276
+ lock_fd = open(lock_path, "w")
277
+ try:
278
+ fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
279
+ except BlockingIOError:
280
+ lock_fd.close()
281
+ console.print("[red]Error:[/red] Another fp ingest is already in progress.")
282
+ sys.exit(1)
283
+
284
+ started_at = datetime.now(timezone.utc)
285
+ results = []
286
+ progress = None
287
+ file_handler = None
288
+
289
+ try:
290
+ # Prune old run logs before creating a new one
291
+ prune_run_logs()
292
+
293
+ # Set up file logging
294
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
295
+ log_path = get_run_logs_dir() / f"run_{timestamp}.log"
296
+ file_handler = add_file_handler(
297
+ log_path,
298
+ level=logging.DEBUG if verbose else logging.INFO,
299
+ )
300
+ logging.getLogger("footprinter").info(
301
+ "Pipeline started: mode=%s, header=%s",
302
+ mode,
303
+ header,
304
+ )
305
+
306
+ # Resolve stage list for counter display
307
+ if refresh_source is not None:
308
+ stage_list = orchestrator.refresh_pipes.get(refresh_source, [])
309
+ elif pipes is not None:
310
+ stage_list = pipes
311
+ else:
312
+ stage_list = orchestrator.runner.pipelines.get("all", [])
313
+ total_stages = len(stage_list)
314
+ stage_index = [0] # mutable counter for closures
315
+ current_task = [None] # track active progress task
316
+
317
+ # Rich Progress (unless quiet)
318
+ if not quiet:
319
+ from rich.progress import (
320
+ BarColumn,
321
+ MofNCompleteColumn,
322
+ Progress,
323
+ SpinnerColumn,
324
+ TextColumn,
325
+ )
326
+
327
+ console.print()
328
+ console.print(f"[bold]{header}[/bold] [dim]({mode})[/dim]")
329
+ console.print()
330
+
331
+ if show_banner:
332
+ _print_source_banner(orchestrator.config, console=console)
333
+
334
+ progress = Progress(
335
+ SpinnerColumn(),
336
+ TextColumn("{task.description}"),
337
+ BarColumn(),
338
+ MofNCompleteColumn(),
339
+ console=console,
340
+ transient=True,
341
+ )
342
+ progress.start()
343
+
344
+ def on_start(stage):
345
+ """Log and show progress task when a pipeline stage begins."""
346
+ stage_index[0] += 1
347
+ logging.getLogger("footprinter").info("Stage started: %s", stage)
348
+ if progress is not None:
349
+ label = f"Stage {stage_index[0]}/{total_stages}: [cyan]{stage}[/cyan]"
350
+ current_task[0] = progress.add_task(label, total=None)
351
+
352
+ def on_progress(count):
353
+ """Update intra-stage progress count."""
354
+ if progress is not None and current_task[0] is not None:
355
+ progress.update(current_task[0], completed=count)
356
+
357
+ def on_end(stage, result):
358
+ """Log result, remove progress task, and print result line."""
359
+ from footprinter.ingest.status import _stage_detail_string
360
+
361
+ results.append(result)
362
+ status = result.get("status", "unknown")
363
+ elapsed = result.get("elapsed_seconds", 0)
364
+ logging.getLogger("footprinter").info(
365
+ "Stage ended: %s status=%s elapsed=%.1fs",
366
+ stage,
367
+ status,
368
+ elapsed,
369
+ )
370
+
371
+ # Remove progress task before printing result line
372
+ if progress is not None and current_task[0] is not None:
373
+ progress.remove_task(current_task[0])
374
+ current_task[0] = None
375
+
376
+ if status in ("completed", "info"):
377
+ icon = "[green]\u2713[/green]"
378
+ elif status == "completed_with_errors":
379
+ icon = "[yellow]\u26a0[/yellow]"
380
+ elif status == "error":
381
+ icon = "[red]\u2717[/red]"
382
+ elif status == "skipped":
383
+ icon = "[dim]\u25cb[/dim]"
384
+ else:
385
+ icon = "[dim]?[/dim]"
386
+
387
+ if not quiet:
388
+ details = _stage_detail_string(result)
389
+ detail_part = f" {details}" if details else ""
390
+ console.print(f" {icon} {stage}{detail_part} [dim]({elapsed:.1f}s)[/dim]")
391
+
392
+ if refresh_source is not None:
393
+ orchestrator.run_refresh(
394
+ refresh_source,
395
+ on_pipe_start=on_start,
396
+ on_pipe_end=on_end,
397
+ on_progress=on_progress,
398
+ )
399
+ elif pipes:
400
+ orchestrator.run_pipes(
401
+ pipes,
402
+ on_pipe_start=on_start,
403
+ on_pipe_end=on_end,
404
+ on_progress=on_progress,
405
+ )
406
+ else:
407
+ orchestrator.run_pipeline(
408
+ "all",
409
+ on_pipe_start=on_start,
410
+ on_pipe_end=on_end,
411
+ on_progress=on_progress,
412
+ )
413
+
414
+ if progress is not None:
415
+ progress.stop()
416
+
417
+ # Save run record
418
+ record_path = save_run_record(results, mode=mode, started_at=started_at)
419
+ logging.getLogger("footprinter").info("Run record saved to %s", record_path)
420
+
421
+ print_results(results, quiet=quiet, show_next_steps=show_next_steps)
422
+
423
+ if not quiet:
424
+ console.print(f"[dim]Log: {log_path}[/dim]")
425
+
426
+ except ValueError:
427
+ if progress is not None:
428
+ progress.stop()
429
+ raise
430
+ except KeyboardInterrupt:
431
+ if progress is not None:
432
+ progress.stop()
433
+ record_path = save_run_record(results, mode=mode, started_at=started_at, interrupted=True)
434
+ logging.getLogger("footprinter").info("Run record saved to %s", record_path)
435
+ raise
436
+ finally:
437
+ lock_fd.close()
438
+ if file_handler:
439
+ logging.root.removeHandler(file_handler)
440
+ file_handler.close()
441
+ orchestrator.close()
442
+
443
+
444
+ def _ingest_pipeline(args) -> None:
445
+ """Execute the data pipeline: bare ingest, --pipe, --full."""
446
+ from footprinter.ingest.orchestrator import DataPipelineOrchestrator
447
+
448
+ pipe_str = getattr(args, "pipe", None)
449
+ pipes = [s.strip() for s in pipe_str.split(",")] if pipe_str else None
450
+
451
+ orchestrator = DataPipelineOrchestrator()
452
+ orchestrator.full_mode = getattr(args, "full", False)
453
+ quiet = getattr(args, "quiet", False)
454
+ verbose = getattr(args, "verbose", False)
455
+ mode_str = "full" if orchestrator.full_mode else "incremental"
456
+
457
+ if pipes is not None:
458
+ try:
459
+ orchestrator.runner.validate_pipes(pipes)
460
+ except ValueError as e:
461
+ console.print(f"[red]Error:[/red] {e}")
462
+ sys.exit(1)
463
+
464
+ try:
465
+ _run_with_logging(
466
+ orchestrator,
467
+ pipes=pipes,
468
+ mode=mode_str,
469
+ quiet=quiet,
470
+ verbose=verbose,
471
+ show_banner=True,
472
+ )
473
+ except ValueError as e:
474
+ console.print(f"[red]Error:[/red] {e}")
475
+ sys.exit(1)
476
+ except KeyboardInterrupt:
477
+ console.print("[dim]Interrupted.[/dim]")
478
+ sys.exit(130)
479
+
480
+
481
+ def _ingest_status(args) -> None:
482
+ """Show pipeline diagnostics (data counts)."""
483
+ from footprinter.paths import get_db_path
484
+
485
+ db_path = get_db_path()
486
+ if not db_path.exists():
487
+ if getattr(args, "json", False):
488
+ output_json({})
489
+ else:
490
+ console.print("[dim]No database found. Run [bold]fp ingest[/bold] to start indexing.[/dim]")
491
+ return
492
+
493
+ from footprinter.ingest.status import get_status, print_status
494
+
495
+ status = get_status(str(db_path))
496
+
497
+ if getattr(args, "json", False):
498
+ output_json(status)
499
+ else:
500
+ print_status(status, quiet=getattr(args, "quiet", False))
501
+
502
+
503
+ def _ingest_import(args) -> None:
504
+ """Import a chat export file."""
505
+ from pathlib import Path
506
+
507
+ from footprinter.ingest.chat_indexer import ChatIndexer
508
+ from footprinter.ingest.database import Database
509
+ from footprinter.paths import get_db_path
510
+
511
+ quiet = getattr(args, "quiet", False)
512
+
513
+ try:
514
+ db = Database(str(get_db_path()))
515
+ manager = ChatIndexer(db)
516
+ result = manager.upload(Path(args.path), console=None if quiet else console)
517
+
518
+ status = result.get("status", "unknown")
519
+ if not quiet:
520
+ if status == "duplicate":
521
+ prev = result.get("previous_upload", {})
522
+ console.print(
523
+ f"[yellow]Already imported[/yellow] (uploaded {prev.get('uploaded_at', 'unknown')})"
524
+ )
525
+ else:
526
+ added = result.get("chats_added", 0)
527
+ updated = result.get("chats_updated", 0)
528
+ messages = result.get("messages_imported", 0)
529
+ errors = result.get("errors", 0)
530
+ console.print(
531
+ f"[green]Imported[/green] {added + updated} chats ({added} new, {updated} updated), {messages} messages"
532
+ )
533
+ if errors:
534
+ console.print(f"[yellow]Warning:[/yellow] {errors} chats failed to import")
535
+ except Exception as e:
536
+ if not quiet:
537
+ console.print(f"[red]Import failed:[/red] {e}")
538
+ sys.exit(1)
539
+
540
+
541
+ def _ingest_refresh(args) -> None:
542
+ """Re-scan a data source."""
543
+ from footprinter.ingest.orchestrator import DataPipelineOrchestrator
544
+
545
+ orchestrator = DataPipelineOrchestrator()
546
+ refresh_pipes = orchestrator.refresh_pipes
547
+
548
+ source = args.source
549
+ valid_sources = list(refresh_pipes.keys())
550
+
551
+ # Early source validation (before lock/log setup) for a clean exit on bad input.
552
+ if source not in refresh_pipes:
553
+ console.print(f"[red]Error:[/red] Unknown refresh source: {source}")
554
+ console.print(f"Valid sources: {', '.join(valid_sources)}")
555
+ sys.exit(1)
556
+
557
+ stages = refresh_pipes[source]
558
+ orchestrator.full_mode = getattr(args, "full", False)
559
+ quiet = getattr(args, "quiet", False)
560
+ verbose = getattr(args, "verbose", False)
561
+ mode_str = "full" if orchestrator.full_mode else "incremental"
562
+
563
+ try:
564
+ # Route through orchestrator.run_refresh so access_resolution (a POST_PIPE)
565
+ # runs inline — run_pipes rejects POST_PIPES by design.
566
+ _run_with_logging(
567
+ orchestrator,
568
+ refresh_source=source,
569
+ mode=mode_str,
570
+ quiet=quiet,
571
+ verbose=verbose,
572
+ header=f"Footprinter Refresh source={source}, {len(stages)} stages",
573
+ )
574
+ except ValueError as e:
575
+ console.print(f"[red]Error:[/red] {e}")
576
+ sys.exit(1)
577
+ except KeyboardInterrupt:
578
+ console.print("[dim]Interrupted.[/dim]")
579
+ sys.exit(130)