footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,128 @@
1
+ """fp delete — soft-delete entity records via the service layer.
2
+
3
+ Routes ``fp delete client 42`` through the service layer's ``delete()``
4
+ function, which sets ``status='removed'``. Requires confirmation unless
5
+ ``--yes`` is passed.
6
+ """
7
+
8
+ import sys
9
+
10
+ from footprinter.cli._common import (
11
+ FORMATTER,
12
+ add_json_flag,
13
+ console,
14
+ open_db,
15
+ output_json,
16
+ )
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Entity dispatch table
20
+ # ---------------------------------------------------------------------------
21
+
22
+ #: Maps each deletable noun to (service_module, name_key).
23
+ DELETABLE_ENTITIES: dict[str, tuple[str, str]] = {
24
+ "client": ("client_service", "name"),
25
+ "project": ("project_service", "project_name"),
26
+ }
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Service resolution
30
+ # ---------------------------------------------------------------------------
31
+
32
+
33
+ def _get_service(service_name: str):
34
+ """Lazy-import and return a service module from footprinter.services."""
35
+ import footprinter.services as svc
36
+
37
+ return getattr(svc, service_name)
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Handler
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ def _handle_delete(args) -> None:
46
+ """Handle ``fp delete <noun> <id>``."""
47
+ from footprinter.services.roles import Role
48
+
49
+ noun = args.noun
50
+ svc_name, name_key = DELETABLE_ENTITIES[noun]
51
+ service = _get_service(svc_name)
52
+
53
+ try:
54
+ entity_id = int(args.id)
55
+ except ValueError:
56
+ console.print(f"[red]Invalid ID: {args.id!r} — expected an integer.[/red]")
57
+ sys.exit(1)
58
+
59
+ with open_db() as conn:
60
+ record = service.get(conn, entity_id, role=Role.ADMIN)
61
+
62
+ if record is None:
63
+ console.print(f"[red]{noun.title()} {args.id} not found.[/red]")
64
+ sys.exit(1)
65
+
66
+ entity_name = record.get(name_key, "")
67
+
68
+ if not args.yes:
69
+ from footprinter.cli._prompt import SafeConfirm
70
+
71
+ if not SafeConfirm.ask(
72
+ f"Delete {noun} #{entity_id} ({entity_name})?",
73
+ default=False,
74
+ ):
75
+ console.print("[dim]Cancelled.[/dim]")
76
+ sys.exit(0)
77
+
78
+ result = service.delete(conn, entity_id, role=Role.ADMIN)
79
+
80
+ if getattr(args, "json", False):
81
+ output_json(result)
82
+ else:
83
+ console.print(f"Deleted {noun} #{entity_id} ({entity_name}).")
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Registration
88
+ # ---------------------------------------------------------------------------
89
+
90
+
91
+ def register(subparsers) -> None:
92
+ """Register the ``delete`` subcommand with noun sub-subparsers."""
93
+ parser = subparsers.add_parser(
94
+ "delete",
95
+ help="Soft-delete a record",
96
+ description="Soft-delete a record by setting status to 'removed'.",
97
+ epilog=(
98
+ "examples:\n"
99
+ " fp delete client 42 Delete client #42\n"
100
+ " fp delete project 7 --yes Skip confirmation\n"
101
+ " fp delete client 1 --json JSON output\n"
102
+ ),
103
+ formatter_class=FORMATTER,
104
+ )
105
+ noun_subs = parser.add_subparsers(
106
+ dest="noun",
107
+ metavar="NOUN",
108
+ title="entity nouns (one required)",
109
+ )
110
+ parser.set_defaults(func=lambda args: parser.print_help())
111
+
112
+ for noun in DELETABLE_ENTITIES:
113
+ p = noun_subs.add_parser(
114
+ noun,
115
+ help=f"Delete a {noun}",
116
+ description=f"Soft-delete a {noun} record by ID.",
117
+ formatter_class=FORMATTER,
118
+ )
119
+ p.add_argument("id", help=f"{noun.title()} ID")
120
+ p.add_argument(
121
+ "--yes",
122
+ "-y",
123
+ action="store_true",
124
+ default=False,
125
+ help="Skip confirmation prompt",
126
+ )
127
+ add_json_flag(p)
128
+ p.set_defaults(func=_handle_delete)
@@ -0,0 +1,543 @@
1
+ """fp ingest — pipeline execution, import, and refresh.
2
+
3
+ Thin routing layer that delegates to existing orchestrator/analysis classes.
4
+ All heavy imports are deferred inside handler functions to keep ``fp --help`` fast.
5
+ """
6
+
7
+ import sys
8
+
9
+ from footprinter.cli._common import FORMATTER, add_json_flag, console, output_json
10
+
11
+ # ---------------------------------------------------------------------------
12
+ # argparse registration
13
+ # ---------------------------------------------------------------------------
14
+
15
+
16
+ def _build_parser(subparsers, name):
17
+ """Build and return the ingest parser."""
18
+ parser = subparsers.add_parser(
19
+ name,
20
+ help="Run the data ingest pipeline",
21
+ description=(
22
+ "Execute the data pipeline or manage pipeline operations.\n\n"
23
+ "By default, runs all sources incrementally (new/updated only).\n"
24
+ "Use --full to re-process everything. Use 'refresh <source>'\n"
25
+ "to run a single source. The --pipe flag is available for\n"
26
+ "power users who need to target specific internal pipes."
27
+ ),
28
+ epilog=(
29
+ "examples:\n"
30
+ " fp ingest All sources (incremental)\n"
31
+ " fp ingest --full All sources (full re-process)\n"
32
+ " fp ingest refresh local Re-scan local files (incremental)\n"
33
+ " fp ingest refresh all --full Re-scan all sources (full)\n"
34
+ " fp ingest --pipe local_files,browser Specific internal pipes\n"
35
+ " fp ingest --rebuild-vectors Rebuild vectors (incremental)\n"
36
+ " fp ingest --rebuild-vectors full Rebuild vectors (full reset)\n"
37
+ " fp ingest status Show pipeline diagnostics\n"
38
+ " fp ingest import export.zip Import a chat export"
39
+ ),
40
+ formatter_class=FORMATTER,
41
+ )
42
+
43
+ # Pipeline flags (on the parent parser, not sub-subparsers)
44
+ parser.add_argument(
45
+ "--pipe",
46
+ "-s",
47
+ type=str,
48
+ metavar="PIPE",
49
+ help="Comma-separated pipes to run (e.g. local_files,browser)",
50
+ )
51
+ parser.add_argument(
52
+ "--full",
53
+ "-f",
54
+ action="store_true",
55
+ help="Full mode: re-process everything (default: incremental)",
56
+ )
57
+ parser.add_argument(
58
+ "--quiet",
59
+ "-q",
60
+ action="store_true",
61
+ help="Suppress Rich output (for scripts and cron)",
62
+ )
63
+ parser.add_argument(
64
+ "--rebuild-vectors",
65
+ nargs="?",
66
+ const="incremental",
67
+ default=None,
68
+ choices=["incremental", "sync", "full"],
69
+ metavar="MODE",
70
+ help=(
71
+ "Rebuild the vector store. Modes: incremental (default, "
72
+ "process new/modified/removed only), sync (incremental + "
73
+ "verify counts), full (delete and rebuild everything)"
74
+ ),
75
+ )
76
+ parser.add_argument(
77
+ "--vector-source",
78
+ choices=["files", "chats", "all"],
79
+ default="all",
80
+ help="Which vectors to rebuild (default: all). Only used with --rebuild-vectors",
81
+ )
82
+ parser.add_argument(
83
+ "--phase",
84
+ choices=["files", "messages", "chat_info"],
85
+ default=None,
86
+ help="Run a single rebuild phase (default: all). Only used with --rebuild-vectors",
87
+ )
88
+ parser.add_argument(
89
+ "--repair-fts",
90
+ action="store_true",
91
+ help="Drop and rebuild FTS search indexes",
92
+ )
93
+ parser.add_argument(
94
+ "--verbose",
95
+ "-v",
96
+ action="store_true",
97
+ help="Verbose logging to file",
98
+ )
99
+
100
+ # Sub-subparsers for ingest actions
101
+ subs = parser.add_subparsers(dest="ingest_action", metavar="COMMAND", title="commands (one required)")
102
+
103
+ # status
104
+ status_p = subs.add_parser(
105
+ "status",
106
+ help="Show pipeline diagnostics",
107
+ description="Show data counts and pipeline health diagnostics.",
108
+ formatter_class=FORMATTER,
109
+ )
110
+ add_json_flag(status_p)
111
+
112
+ # import
113
+ import_p = subs.add_parser(
114
+ "import",
115
+ help="Import a chat export",
116
+ description=(
117
+ "Import a Claude or ChatGPT chat export.\n\n"
118
+ "Accepts .zip files or extracted directories. Duplicate\n"
119
+ "imports are detected and skipped."
120
+ ),
121
+ epilog=("examples:\n fp ingest import ~/Downloads/claude-export.zip\n fp ingest import ./extracted-chats/"),
122
+ formatter_class=FORMATTER,
123
+ )
124
+ import_p.add_argument("path", help="Path to .zip file or extracted directory")
125
+
126
+ # refresh
127
+ refresh_p = subs.add_parser(
128
+ "refresh",
129
+ help="Re-scan a data source (default: incremental)",
130
+ description=(
131
+ "Re-scan a data source, processing new and updated entries.\n\n"
132
+ "Valid sources: local, browser, chat, and connector sources.\n"
133
+ "Some sources require connectors. See fp connect list.\n"
134
+ "Use --full to re-process everything."
135
+ ),
136
+ epilog=(
137
+ "examples:\n"
138
+ " fp ingest refresh local Re-scan local files (incremental)\n"
139
+ " fp ingest refresh local --full Re-scan local files (full)\n"
140
+ " fp ingest refresh browser Re-scan browser history\n"
141
+ " fp ingest refresh all Re-scan everything"
142
+ ),
143
+ formatter_class=FORMATTER,
144
+ )
145
+ refresh_p.add_argument("source", help="Source to refresh (e.g. local, browser, chat, all)")
146
+ refresh_p.add_argument(
147
+ "--full", "-f", action="store_true", help="Full mode: re-process everything (default: incremental)"
148
+ )
149
+
150
+ return parser
151
+
152
+
153
+ def register(subparsers) -> None:
154
+ """Register the ``ingest`` command."""
155
+ ingest_parser = _build_parser(subparsers, "ingest")
156
+ ingest_parser.set_defaults(func=_handle_ingest)
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Dispatchers
161
+ # ---------------------------------------------------------------------------
162
+
163
+
164
+ def _handle_ingest(args) -> None:
165
+ """Route to the correct handler based on args."""
166
+ # --repair-fts and --rebuild-vectors take precedence over everything
167
+ if getattr(args, "repair_fts", False):
168
+ from footprinter.ingest.cli import _repair_fts
169
+
170
+ _repair_fts(quiet=getattr(args, "quiet", False))
171
+ return
172
+
173
+ rebuild_mode = getattr(args, "rebuild_vectors", None)
174
+ if rebuild_mode:
175
+ from footprinter.ingest.cli import _rebuild_vectors
176
+
177
+ _rebuild_vectors(
178
+ quiet=getattr(args, "quiet", False),
179
+ source=getattr(args, "vector_source", "all"),
180
+ phase=getattr(args, "phase", None),
181
+ mode=rebuild_mode,
182
+ )
183
+ return
184
+
185
+ action = getattr(args, "ingest_action", None)
186
+
187
+ if action is None:
188
+ # Bare `fp ingest` or `fp ingest --pipe/--full`
189
+ _ingest_pipeline(args)
190
+ return
191
+
192
+ handlers = {
193
+ "status": _ingest_status,
194
+ "import": _ingest_import,
195
+ "refresh": _ingest_refresh,
196
+ }
197
+ handler = handlers.get(action)
198
+ if handler:
199
+ handler(args)
200
+
201
+
202
+ # ---------------------------------------------------------------------------
203
+ # Handlers
204
+ # ---------------------------------------------------------------------------
205
+
206
+
207
+ def _print_source_banner(config: dict, *, quiet: bool = False, console=None):
208
+ """Print a startup banner listing active and inactive data sources."""
209
+ if quiet:
210
+ return
211
+
212
+ if console is None:
213
+ from footprinter.cli._common import console as _console
214
+
215
+ console = _console
216
+
217
+ from footprinter.connectors import discover_connectors, is_configured, is_installed
218
+
219
+ console.print("[bold]Sources:[/bold]")
220
+ if config.get("directories"):
221
+ console.print(" [green]\u2713[/green] Local files")
222
+ else:
223
+ console.print(" [dim]\u2022 Local files (no directories configured)[/dim]")
224
+ if config.get("browsers"):
225
+ console.print(" [green]\u2713[/green] Browser history")
226
+ else:
227
+ console.print(" [dim]\u2022 Browser history (no browsers configured)[/dim]")
228
+
229
+ for name, spec in discover_connectors().items():
230
+ if is_installed(spec) and is_configured(spec, config):
231
+ console.print(f" [green]\u2713[/green] {spec.description}")
232
+ else:
233
+ console.print(f" [dim]\u2022 {spec.description} (fp connect install {name})[/dim]")
234
+
235
+ console.print()
236
+
237
+
238
+ def _run_with_logging(
239
+ orchestrator,
240
+ *,
241
+ pipes=None,
242
+ mode,
243
+ quiet=False,
244
+ verbose=False,
245
+ header="Footprinter Data Pipeline",
246
+ show_banner=False,
247
+ show_next_steps=True,
248
+ ):
249
+ """Shared run helper: Rich Progress, file logging, run record, cleanup.
250
+
251
+ If ``pipes`` is not provided, defaults to the ``"all"`` pipeline.
252
+ Shows a stage counter ("Stage 2/5: local_files") and intra-stage
253
+ progress counts for adapters that report them via on_progress.
254
+ """
255
+ import fcntl
256
+ import logging
257
+ from datetime import datetime, timezone
258
+
259
+ from footprinter.ingest.run_record import save_run_record
260
+ from footprinter.ingest.status import print_results
261
+ from footprinter.paths import get_run_lock_path, get_run_logs_dir, prune_run_logs
262
+ from footprinter.utils.logging_config import add_file_handler
263
+
264
+ # Acquire run lock (prevents concurrent fp ingest)
265
+ lock_path = get_run_lock_path()
266
+ lock_fd = open(lock_path, "w")
267
+ try:
268
+ fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
269
+ except BlockingIOError:
270
+ lock_fd.close()
271
+ console.print("[red]Error:[/red] Another fp ingest is already in progress.")
272
+ sys.exit(1)
273
+
274
+ started_at = datetime.now(timezone.utc)
275
+ results = []
276
+ progress = None
277
+ file_handler = None
278
+
279
+ try:
280
+ # Prune old run logs before creating a new one
281
+ prune_run_logs()
282
+
283
+ # Set up file logging
284
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
285
+ log_path = get_run_logs_dir() / f"run_{timestamp}.log"
286
+ file_handler = add_file_handler(
287
+ log_path,
288
+ level=logging.DEBUG if verbose else logging.INFO,
289
+ )
290
+ logging.getLogger("footprinter").info(
291
+ "Pipeline started: mode=%s, header=%s",
292
+ mode,
293
+ header,
294
+ )
295
+
296
+ # Resolve stage list for counter display
297
+ stage_list = pipes
298
+ if stage_list is None:
299
+ stage_list = orchestrator.runner.pipelines.get("all", [])
300
+ total_stages = len(stage_list)
301
+ stage_index = [0] # mutable counter for closures
302
+ current_task = [None] # track active progress task
303
+
304
+ # Rich Progress (unless quiet)
305
+ if not quiet:
306
+ from rich.progress import (
307
+ BarColumn,
308
+ MofNCompleteColumn,
309
+ Progress,
310
+ SpinnerColumn,
311
+ TextColumn,
312
+ )
313
+
314
+ console.print()
315
+ console.print(f"[bold]{header}[/bold] [dim]({mode})[/dim]")
316
+ console.print()
317
+
318
+ if show_banner:
319
+ _print_source_banner(orchestrator.config, console=console)
320
+
321
+ progress = Progress(
322
+ SpinnerColumn(),
323
+ TextColumn("{task.description}"),
324
+ BarColumn(),
325
+ MofNCompleteColumn(),
326
+ console=console,
327
+ transient=True,
328
+ )
329
+ progress.start()
330
+
331
+ def on_start(stage):
332
+ """Log and show progress task when a pipeline stage begins."""
333
+ stage_index[0] += 1
334
+ logging.getLogger("footprinter").info("Stage started: %s", stage)
335
+ if progress is not None:
336
+ label = f"Stage {stage_index[0]}/{total_stages}: [cyan]{stage}[/cyan]"
337
+ current_task[0] = progress.add_task(label, total=None)
338
+
339
+ def on_progress(count):
340
+ """Update intra-stage progress count."""
341
+ if progress is not None and current_task[0] is not None:
342
+ progress.update(current_task[0], completed=count)
343
+
344
+ def on_end(stage, result):
345
+ """Log result, remove progress task, and print result line."""
346
+ from footprinter.ingest.status import _stage_detail_string
347
+
348
+ results.append(result)
349
+ status = result.get("status", "unknown")
350
+ elapsed = result.get("elapsed_seconds", 0)
351
+ logging.getLogger("footprinter").info(
352
+ "Stage ended: %s status=%s elapsed=%.1fs",
353
+ stage,
354
+ status,
355
+ elapsed,
356
+ )
357
+
358
+ # Remove progress task before printing result line
359
+ if progress is not None and current_task[0] is not None:
360
+ progress.remove_task(current_task[0])
361
+ current_task[0] = None
362
+
363
+ if status in ("completed", "info"):
364
+ icon = "[green]\u2713[/green]"
365
+ elif status == "completed_with_errors":
366
+ icon = "[yellow]\u26a0[/yellow]"
367
+ elif status == "error":
368
+ icon = "[red]\u2717[/red]"
369
+ elif status == "skipped":
370
+ icon = "[dim]\u25cb[/dim]"
371
+ else:
372
+ icon = "[dim]?[/dim]"
373
+
374
+ if not quiet:
375
+ details = _stage_detail_string(result)
376
+ detail_part = f" {details}" if details else ""
377
+ console.print(f" {icon} {stage}{detail_part} [dim]({elapsed:.1f}s)[/dim]")
378
+
379
+ if pipes:
380
+ orchestrator.run_pipes(
381
+ pipes,
382
+ on_pipe_start=on_start,
383
+ on_pipe_end=on_end,
384
+ on_progress=on_progress,
385
+ )
386
+ else:
387
+ orchestrator.run_pipeline(
388
+ "all",
389
+ on_pipe_start=on_start,
390
+ on_pipe_end=on_end,
391
+ on_progress=on_progress,
392
+ )
393
+
394
+ if progress is not None:
395
+ progress.stop()
396
+
397
+ # Save run record
398
+ record_path = save_run_record(results, mode=mode, started_at=started_at)
399
+ logging.getLogger("footprinter").info("Run record saved to %s", record_path)
400
+
401
+ print_results(results, quiet=quiet, show_next_steps=show_next_steps)
402
+
403
+ if not quiet:
404
+ console.print(f"[dim]Log: {log_path}[/dim]")
405
+
406
+ except ValueError:
407
+ if progress is not None:
408
+ progress.stop()
409
+ raise
410
+ except KeyboardInterrupt:
411
+ if progress is not None:
412
+ progress.stop()
413
+ record_path = save_run_record(results, mode=mode, started_at=started_at, interrupted=True)
414
+ logging.getLogger("footprinter").info("Run record saved to %s", record_path)
415
+ raise
416
+ finally:
417
+ lock_fd.close()
418
+ if file_handler:
419
+ logging.root.removeHandler(file_handler)
420
+ file_handler.close()
421
+ orchestrator.close()
422
+
423
+
424
+ def _ingest_pipeline(args) -> None:
425
+ """Execute the data pipeline: bare ingest, --pipe, --full."""
426
+ from footprinter.ingest.orchestrator import DataPipelineOrchestrator
427
+
428
+ pipe_str = getattr(args, "pipe", None)
429
+ pipes = [s.strip() for s in pipe_str.split(",")] if pipe_str else None
430
+
431
+ orchestrator = DataPipelineOrchestrator()
432
+ orchestrator.full_mode = getattr(args, "full", False)
433
+ quiet = getattr(args, "quiet", False)
434
+ verbose = getattr(args, "verbose", False)
435
+ mode_str = "full" if orchestrator.full_mode else "incremental"
436
+
437
+ try:
438
+ _run_with_logging(
439
+ orchestrator,
440
+ pipes=pipes,
441
+ mode=mode_str,
442
+ quiet=quiet,
443
+ verbose=verbose,
444
+ show_banner=True,
445
+ )
446
+ except ValueError as e:
447
+ console.print(f"[red]Error:[/red] {e}")
448
+ sys.exit(1)
449
+ except KeyboardInterrupt:
450
+ console.print("[dim]Interrupted.[/dim]")
451
+ sys.exit(130)
452
+
453
+
454
+ def _ingest_status(args) -> None:
455
+ """Show pipeline diagnostics (data counts)."""
456
+ from footprinter.paths import get_db_path
457
+
458
+ db_path = get_db_path()
459
+ if not db_path.exists():
460
+ if getattr(args, "json", False):
461
+ output_json({})
462
+ else:
463
+ console.print("[dim]No database found. Run [bold]fp ingest[/bold] to start indexing.[/dim]")
464
+ return
465
+
466
+ from footprinter.ingest.status import get_status, print_status
467
+
468
+ status = get_status(str(db_path))
469
+
470
+ if getattr(args, "json", False):
471
+ output_json(status)
472
+ else:
473
+ print_status(status, quiet=getattr(args, "quiet", False))
474
+
475
+
476
+ def _ingest_import(args) -> None:
477
+ """Import a chat export file."""
478
+ from pathlib import Path
479
+
480
+ from footprinter.ingest.chat_indexer import ChatIndexer
481
+ from footprinter.ingest.database import Database
482
+ from footprinter.paths import get_db_path
483
+
484
+ try:
485
+ db = Database(str(get_db_path()))
486
+ manager = ChatIndexer(db)
487
+ result = manager.upload(Path(args.path))
488
+
489
+ status = result.get("status", "unknown")
490
+ if status == "duplicate":
491
+ prev = result.get("previous_upload", {})
492
+ console.print(f"[yellow]Already imported[/yellow] (uploaded {prev.get('uploaded_at', 'unknown')})")
493
+ else:
494
+ added = result.get("chats_added", 0)
495
+ updated = result.get("chats_updated", 0)
496
+ messages = result.get("messages_imported", 0)
497
+ errors = result.get("errors", 0)
498
+ console.print(
499
+ f"[green]Imported[/green] {added + updated} chats ({added} new, {updated} updated), {messages} messages"
500
+ )
501
+ if errors:
502
+ console.print(f"[yellow]Warning:[/yellow] {errors} chats failed to import")
503
+ except Exception as e:
504
+ console.print(f"[red]Import failed:[/red] {e}")
505
+ sys.exit(1)
506
+
507
+
508
+ def _ingest_refresh(args) -> None:
509
+ """Re-scan a data source."""
510
+ from footprinter.ingest.orchestrator import DataPipelineOrchestrator
511
+
512
+ orchestrator = DataPipelineOrchestrator()
513
+ refresh_pipes = orchestrator.refresh_pipes
514
+
515
+ source = args.source
516
+ valid_sources = list(refresh_pipes.keys())
517
+
518
+ if source not in refresh_pipes:
519
+ console.print(f"[red]Error:[/red] Unknown refresh source: {source}")
520
+ console.print(f"Valid sources: {', '.join(valid_sources)}")
521
+ sys.exit(1)
522
+
523
+ stages = refresh_pipes[source]
524
+ orchestrator.full_mode = getattr(args, "full", False)
525
+ quiet = getattr(args, "quiet", False)
526
+ verbose = getattr(args, "verbose", False)
527
+ mode_str = "full" if orchestrator.full_mode else "incremental"
528
+
529
+ try:
530
+ _run_with_logging(
531
+ orchestrator,
532
+ pipes=stages,
533
+ mode=mode_str,
534
+ quiet=quiet,
535
+ verbose=verbose,
536
+ header=f"Footprinter Refresh source={source}, {len(stages)} stages",
537
+ )
538
+ except ValueError as e:
539
+ console.print(f"[red]Error:[/red] {e}")
540
+ sys.exit(1)
541
+ except KeyboardInterrupt:
542
+ console.print("[dim]Interrupted.[/dim]")
543
+ sys.exit(130)