footprinter-cli 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +431 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
- footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
- footprinter/bundled/samples/visible-file-sample.txt +2 -0
- footprinter/cli/__init__.py +135 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +327 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/_sample_seed.py +204 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +543 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +2001 -0
- footprinter/cli/status.py +747 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +602 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +724 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +487 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +315 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +223 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +112 -0
- footprinter/ingest/pipe_runner.py +200 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +186 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +67 -0
- footprinter/mcp/errors.py +105 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +19 -0
- footprinter/paths.py +117 -0
- footprinter/permissions.py +1152 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1264 -0
- footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
- footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
- footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""fp delete — soft-delete entity records via the service layer.
|
|
2
|
+
|
|
3
|
+
Routes ``fp delete client 42`` through the service layer's ``delete()``
|
|
4
|
+
function, which sets ``status='removed'``. Requires confirmation unless
|
|
5
|
+
``--yes`` is passed.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
from footprinter.cli._common import (
|
|
11
|
+
FORMATTER,
|
|
12
|
+
add_json_flag,
|
|
13
|
+
console,
|
|
14
|
+
open_db,
|
|
15
|
+
output_json,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Entity dispatch table
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
#: Maps each deletable noun to (service_module, name_key).
|
|
23
|
+
DELETABLE_ENTITIES: dict[str, tuple[str, str]] = {
|
|
24
|
+
"client": ("client_service", "name"),
|
|
25
|
+
"project": ("project_service", "project_name"),
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Service resolution
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _get_service(service_name: str):
|
|
34
|
+
"""Lazy-import and return a service module from footprinter.services."""
|
|
35
|
+
import footprinter.services as svc
|
|
36
|
+
|
|
37
|
+
return getattr(svc, service_name)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# Handler
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _handle_delete(args) -> None:
|
|
46
|
+
"""Handle ``fp delete <noun> <id>``."""
|
|
47
|
+
from footprinter.services.roles import Role
|
|
48
|
+
|
|
49
|
+
noun = args.noun
|
|
50
|
+
svc_name, name_key = DELETABLE_ENTITIES[noun]
|
|
51
|
+
service = _get_service(svc_name)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
entity_id = int(args.id)
|
|
55
|
+
except ValueError:
|
|
56
|
+
console.print(f"[red]Invalid ID: {args.id!r} — expected an integer.[/red]")
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
|
|
59
|
+
with open_db() as conn:
|
|
60
|
+
record = service.get(conn, entity_id, role=Role.ADMIN)
|
|
61
|
+
|
|
62
|
+
if record is None:
|
|
63
|
+
console.print(f"[red]{noun.title()} {args.id} not found.[/red]")
|
|
64
|
+
sys.exit(1)
|
|
65
|
+
|
|
66
|
+
entity_name = record.get(name_key, "")
|
|
67
|
+
|
|
68
|
+
if not args.yes:
|
|
69
|
+
from footprinter.cli._prompt import SafeConfirm
|
|
70
|
+
|
|
71
|
+
if not SafeConfirm.ask(
|
|
72
|
+
f"Delete {noun} #{entity_id} ({entity_name})?",
|
|
73
|
+
default=False,
|
|
74
|
+
):
|
|
75
|
+
console.print("[dim]Cancelled.[/dim]")
|
|
76
|
+
sys.exit(0)
|
|
77
|
+
|
|
78
|
+
result = service.delete(conn, entity_id, role=Role.ADMIN)
|
|
79
|
+
|
|
80
|
+
if getattr(args, "json", False):
|
|
81
|
+
output_json(result)
|
|
82
|
+
else:
|
|
83
|
+
console.print(f"Deleted {noun} #{entity_id} ({entity_name}).")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Registration
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def register(subparsers) -> None:
|
|
92
|
+
"""Register the ``delete`` subcommand with noun sub-subparsers."""
|
|
93
|
+
parser = subparsers.add_parser(
|
|
94
|
+
"delete",
|
|
95
|
+
help="Soft-delete a record",
|
|
96
|
+
description="Soft-delete a record by setting status to 'removed'.",
|
|
97
|
+
epilog=(
|
|
98
|
+
"examples:\n"
|
|
99
|
+
" fp delete client 42 Delete client #42\n"
|
|
100
|
+
" fp delete project 7 --yes Skip confirmation\n"
|
|
101
|
+
" fp delete client 1 --json JSON output\n"
|
|
102
|
+
),
|
|
103
|
+
formatter_class=FORMATTER,
|
|
104
|
+
)
|
|
105
|
+
noun_subs = parser.add_subparsers(
|
|
106
|
+
dest="noun",
|
|
107
|
+
metavar="NOUN",
|
|
108
|
+
title="entity nouns (one required)",
|
|
109
|
+
)
|
|
110
|
+
parser.set_defaults(func=lambda args: parser.print_help())
|
|
111
|
+
|
|
112
|
+
for noun in DELETABLE_ENTITIES:
|
|
113
|
+
p = noun_subs.add_parser(
|
|
114
|
+
noun,
|
|
115
|
+
help=f"Delete a {noun}",
|
|
116
|
+
description=f"Soft-delete a {noun} record by ID.",
|
|
117
|
+
formatter_class=FORMATTER,
|
|
118
|
+
)
|
|
119
|
+
p.add_argument("id", help=f"{noun.title()} ID")
|
|
120
|
+
p.add_argument(
|
|
121
|
+
"--yes",
|
|
122
|
+
"-y",
|
|
123
|
+
action="store_true",
|
|
124
|
+
default=False,
|
|
125
|
+
help="Skip confirmation prompt",
|
|
126
|
+
)
|
|
127
|
+
add_json_flag(p)
|
|
128
|
+
p.set_defaults(func=_handle_delete)
|
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
"""fp ingest — pipeline execution, import, and refresh.
|
|
2
|
+
|
|
3
|
+
Thin routing layer that delegates to existing orchestrator/analysis classes.
|
|
4
|
+
All heavy imports are deferred inside handler functions to keep ``fp --help`` fast.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from footprinter.cli._common import FORMATTER, add_json_flag, console, output_json
|
|
10
|
+
|
|
11
|
+
# ---------------------------------------------------------------------------
|
|
12
|
+
# argparse registration
|
|
13
|
+
# ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _build_parser(subparsers, name):
|
|
17
|
+
"""Build and return the ingest parser."""
|
|
18
|
+
parser = subparsers.add_parser(
|
|
19
|
+
name,
|
|
20
|
+
help="Run the data ingest pipeline",
|
|
21
|
+
description=(
|
|
22
|
+
"Execute the data pipeline or manage pipeline operations.\n\n"
|
|
23
|
+
"By default, runs all sources incrementally (new/updated only).\n"
|
|
24
|
+
"Use --full to re-process everything. Use 'refresh <source>'\n"
|
|
25
|
+
"to run a single source. The --pipe flag is available for\n"
|
|
26
|
+
"power users who need to target specific internal pipes."
|
|
27
|
+
),
|
|
28
|
+
epilog=(
|
|
29
|
+
"examples:\n"
|
|
30
|
+
" fp ingest All sources (incremental)\n"
|
|
31
|
+
" fp ingest --full All sources (full re-process)\n"
|
|
32
|
+
" fp ingest refresh local Re-scan local files (incremental)\n"
|
|
33
|
+
" fp ingest refresh all --full Re-scan all sources (full)\n"
|
|
34
|
+
" fp ingest --pipe local_files,browser Specific internal pipes\n"
|
|
35
|
+
" fp ingest --rebuild-vectors Rebuild vectors (incremental)\n"
|
|
36
|
+
" fp ingest --rebuild-vectors full Rebuild vectors (full reset)\n"
|
|
37
|
+
" fp ingest status Show pipeline diagnostics\n"
|
|
38
|
+
" fp ingest import export.zip Import a chat export"
|
|
39
|
+
),
|
|
40
|
+
formatter_class=FORMATTER,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Pipeline flags (on the parent parser, not sub-subparsers)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--pipe",
|
|
46
|
+
"-s",
|
|
47
|
+
type=str,
|
|
48
|
+
metavar="PIPE",
|
|
49
|
+
help="Comma-separated pipes to run (e.g. local_files,browser)",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--full",
|
|
53
|
+
"-f",
|
|
54
|
+
action="store_true",
|
|
55
|
+
help="Full mode: re-process everything (default: incremental)",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--quiet",
|
|
59
|
+
"-q",
|
|
60
|
+
action="store_true",
|
|
61
|
+
help="Suppress Rich output (for scripts and cron)",
|
|
62
|
+
)
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
"--rebuild-vectors",
|
|
65
|
+
nargs="?",
|
|
66
|
+
const="incremental",
|
|
67
|
+
default=None,
|
|
68
|
+
choices=["incremental", "sync", "full"],
|
|
69
|
+
metavar="MODE",
|
|
70
|
+
help=(
|
|
71
|
+
"Rebuild the vector store. Modes: incremental (default, "
|
|
72
|
+
"process new/modified/removed only), sync (incremental + "
|
|
73
|
+
"verify counts), full (delete and rebuild everything)"
|
|
74
|
+
),
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--vector-source",
|
|
78
|
+
choices=["files", "chats", "all"],
|
|
79
|
+
default="all",
|
|
80
|
+
help="Which vectors to rebuild (default: all). Only used with --rebuild-vectors",
|
|
81
|
+
)
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
"--phase",
|
|
84
|
+
choices=["files", "messages", "chat_info"],
|
|
85
|
+
default=None,
|
|
86
|
+
help="Run a single rebuild phase (default: all). Only used with --rebuild-vectors",
|
|
87
|
+
)
|
|
88
|
+
parser.add_argument(
|
|
89
|
+
"--repair-fts",
|
|
90
|
+
action="store_true",
|
|
91
|
+
help="Drop and rebuild FTS search indexes",
|
|
92
|
+
)
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
"--verbose",
|
|
95
|
+
"-v",
|
|
96
|
+
action="store_true",
|
|
97
|
+
help="Verbose logging to file",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Sub-subparsers for ingest actions
|
|
101
|
+
subs = parser.add_subparsers(dest="ingest_action", metavar="COMMAND", title="commands (one required)")
|
|
102
|
+
|
|
103
|
+
# status
|
|
104
|
+
status_p = subs.add_parser(
|
|
105
|
+
"status",
|
|
106
|
+
help="Show pipeline diagnostics",
|
|
107
|
+
description="Show data counts and pipeline health diagnostics.",
|
|
108
|
+
formatter_class=FORMATTER,
|
|
109
|
+
)
|
|
110
|
+
add_json_flag(status_p)
|
|
111
|
+
|
|
112
|
+
# import
|
|
113
|
+
import_p = subs.add_parser(
|
|
114
|
+
"import",
|
|
115
|
+
help="Import a chat export",
|
|
116
|
+
description=(
|
|
117
|
+
"Import a Claude or ChatGPT chat export.\n\n"
|
|
118
|
+
"Accepts .zip files or extracted directories. Duplicate\n"
|
|
119
|
+
"imports are detected and skipped."
|
|
120
|
+
),
|
|
121
|
+
epilog=("examples:\n fp ingest import ~/Downloads/claude-export.zip\n fp ingest import ./extracted-chats/"),
|
|
122
|
+
formatter_class=FORMATTER,
|
|
123
|
+
)
|
|
124
|
+
import_p.add_argument("path", help="Path to .zip file or extracted directory")
|
|
125
|
+
|
|
126
|
+
# refresh
|
|
127
|
+
refresh_p = subs.add_parser(
|
|
128
|
+
"refresh",
|
|
129
|
+
help="Re-scan a data source (default: incremental)",
|
|
130
|
+
description=(
|
|
131
|
+
"Re-scan a data source, processing new and updated entries.\n\n"
|
|
132
|
+
"Valid sources: local, browser, chat, and connector sources.\n"
|
|
133
|
+
"Some sources require connectors. See fp connect list.\n"
|
|
134
|
+
"Use --full to re-process everything."
|
|
135
|
+
),
|
|
136
|
+
epilog=(
|
|
137
|
+
"examples:\n"
|
|
138
|
+
" fp ingest refresh local Re-scan local files (incremental)\n"
|
|
139
|
+
" fp ingest refresh local --full Re-scan local files (full)\n"
|
|
140
|
+
" fp ingest refresh browser Re-scan browser history\n"
|
|
141
|
+
" fp ingest refresh all Re-scan everything"
|
|
142
|
+
),
|
|
143
|
+
formatter_class=FORMATTER,
|
|
144
|
+
)
|
|
145
|
+
refresh_p.add_argument("source", help="Source to refresh (e.g. local, browser, chat, all)")
|
|
146
|
+
refresh_p.add_argument(
|
|
147
|
+
"--full", "-f", action="store_true", help="Full mode: re-process everything (default: incremental)"
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return parser
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def register(subparsers) -> None:
|
|
154
|
+
"""Register the ``ingest`` command."""
|
|
155
|
+
ingest_parser = _build_parser(subparsers, "ingest")
|
|
156
|
+
ingest_parser.set_defaults(func=_handle_ingest)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
# Dispatchers
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _handle_ingest(args) -> None:
|
|
165
|
+
"""Route to the correct handler based on args."""
|
|
166
|
+
# --repair-fts and --rebuild-vectors take precedence over everything
|
|
167
|
+
if getattr(args, "repair_fts", False):
|
|
168
|
+
from footprinter.ingest.cli import _repair_fts
|
|
169
|
+
|
|
170
|
+
_repair_fts(quiet=getattr(args, "quiet", False))
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
rebuild_mode = getattr(args, "rebuild_vectors", None)
|
|
174
|
+
if rebuild_mode:
|
|
175
|
+
from footprinter.ingest.cli import _rebuild_vectors
|
|
176
|
+
|
|
177
|
+
_rebuild_vectors(
|
|
178
|
+
quiet=getattr(args, "quiet", False),
|
|
179
|
+
source=getattr(args, "vector_source", "all"),
|
|
180
|
+
phase=getattr(args, "phase", None),
|
|
181
|
+
mode=rebuild_mode,
|
|
182
|
+
)
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
action = getattr(args, "ingest_action", None)
|
|
186
|
+
|
|
187
|
+
if action is None:
|
|
188
|
+
# Bare `fp ingest` or `fp ingest --pipe/--full`
|
|
189
|
+
_ingest_pipeline(args)
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
handlers = {
|
|
193
|
+
"status": _ingest_status,
|
|
194
|
+
"import": _ingest_import,
|
|
195
|
+
"refresh": _ingest_refresh,
|
|
196
|
+
}
|
|
197
|
+
handler = handlers.get(action)
|
|
198
|
+
if handler:
|
|
199
|
+
handler(args)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# ---------------------------------------------------------------------------
|
|
203
|
+
# Handlers
|
|
204
|
+
# ---------------------------------------------------------------------------
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _print_source_banner(config: dict, *, quiet: bool = False, console=None):
|
|
208
|
+
"""Print a startup banner listing active and inactive data sources."""
|
|
209
|
+
if quiet:
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
if console is None:
|
|
213
|
+
from footprinter.cli._common import console as _console
|
|
214
|
+
|
|
215
|
+
console = _console
|
|
216
|
+
|
|
217
|
+
from footprinter.connectors import discover_connectors, is_configured, is_installed
|
|
218
|
+
|
|
219
|
+
console.print("[bold]Sources:[/bold]")
|
|
220
|
+
if config.get("directories"):
|
|
221
|
+
console.print(" [green]\u2713[/green] Local files")
|
|
222
|
+
else:
|
|
223
|
+
console.print(" [dim]\u2022 Local files (no directories configured)[/dim]")
|
|
224
|
+
if config.get("browsers"):
|
|
225
|
+
console.print(" [green]\u2713[/green] Browser history")
|
|
226
|
+
else:
|
|
227
|
+
console.print(" [dim]\u2022 Browser history (no browsers configured)[/dim]")
|
|
228
|
+
|
|
229
|
+
for name, spec in discover_connectors().items():
|
|
230
|
+
if is_installed(spec) and is_configured(spec, config):
|
|
231
|
+
console.print(f" [green]\u2713[/green] {spec.description}")
|
|
232
|
+
else:
|
|
233
|
+
console.print(f" [dim]\u2022 {spec.description} (fp connect install {name})[/dim]")
|
|
234
|
+
|
|
235
|
+
console.print()
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _run_with_logging(
|
|
239
|
+
orchestrator,
|
|
240
|
+
*,
|
|
241
|
+
pipes=None,
|
|
242
|
+
mode,
|
|
243
|
+
quiet=False,
|
|
244
|
+
verbose=False,
|
|
245
|
+
header="Footprinter Data Pipeline",
|
|
246
|
+
show_banner=False,
|
|
247
|
+
show_next_steps=True,
|
|
248
|
+
):
|
|
249
|
+
"""Shared run helper: Rich Progress, file logging, run record, cleanup.
|
|
250
|
+
|
|
251
|
+
If ``pipes`` is not provided, defaults to the ``"all"`` pipeline.
|
|
252
|
+
Shows a stage counter ("Stage 2/5: local_files") and intra-stage
|
|
253
|
+
progress counts for adapters that report them via on_progress.
|
|
254
|
+
"""
|
|
255
|
+
import fcntl
|
|
256
|
+
import logging
|
|
257
|
+
from datetime import datetime, timezone
|
|
258
|
+
|
|
259
|
+
from footprinter.ingest.run_record import save_run_record
|
|
260
|
+
from footprinter.ingest.status import print_results
|
|
261
|
+
from footprinter.paths import get_run_lock_path, get_run_logs_dir, prune_run_logs
|
|
262
|
+
from footprinter.utils.logging_config import add_file_handler
|
|
263
|
+
|
|
264
|
+
# Acquire run lock (prevents concurrent fp ingest)
|
|
265
|
+
lock_path = get_run_lock_path()
|
|
266
|
+
lock_fd = open(lock_path, "w")
|
|
267
|
+
try:
|
|
268
|
+
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
269
|
+
except BlockingIOError:
|
|
270
|
+
lock_fd.close()
|
|
271
|
+
console.print("[red]Error:[/red] Another fp ingest is already in progress.")
|
|
272
|
+
sys.exit(1)
|
|
273
|
+
|
|
274
|
+
started_at = datetime.now(timezone.utc)
|
|
275
|
+
results = []
|
|
276
|
+
progress = None
|
|
277
|
+
file_handler = None
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
# Prune old run logs before creating a new one
|
|
281
|
+
prune_run_logs()
|
|
282
|
+
|
|
283
|
+
# Set up file logging
|
|
284
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
285
|
+
log_path = get_run_logs_dir() / f"run_{timestamp}.log"
|
|
286
|
+
file_handler = add_file_handler(
|
|
287
|
+
log_path,
|
|
288
|
+
level=logging.DEBUG if verbose else logging.INFO,
|
|
289
|
+
)
|
|
290
|
+
logging.getLogger("footprinter").info(
|
|
291
|
+
"Pipeline started: mode=%s, header=%s",
|
|
292
|
+
mode,
|
|
293
|
+
header,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Resolve stage list for counter display
|
|
297
|
+
stage_list = pipes
|
|
298
|
+
if stage_list is None:
|
|
299
|
+
stage_list = orchestrator.runner.pipelines.get("all", [])
|
|
300
|
+
total_stages = len(stage_list)
|
|
301
|
+
stage_index = [0] # mutable counter for closures
|
|
302
|
+
current_task = [None] # track active progress task
|
|
303
|
+
|
|
304
|
+
# Rich Progress (unless quiet)
|
|
305
|
+
if not quiet:
|
|
306
|
+
from rich.progress import (
|
|
307
|
+
BarColumn,
|
|
308
|
+
MofNCompleteColumn,
|
|
309
|
+
Progress,
|
|
310
|
+
SpinnerColumn,
|
|
311
|
+
TextColumn,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
console.print()
|
|
315
|
+
console.print(f"[bold]{header}[/bold] [dim]({mode})[/dim]")
|
|
316
|
+
console.print()
|
|
317
|
+
|
|
318
|
+
if show_banner:
|
|
319
|
+
_print_source_banner(orchestrator.config, console=console)
|
|
320
|
+
|
|
321
|
+
progress = Progress(
|
|
322
|
+
SpinnerColumn(),
|
|
323
|
+
TextColumn("{task.description}"),
|
|
324
|
+
BarColumn(),
|
|
325
|
+
MofNCompleteColumn(),
|
|
326
|
+
console=console,
|
|
327
|
+
transient=True,
|
|
328
|
+
)
|
|
329
|
+
progress.start()
|
|
330
|
+
|
|
331
|
+
def on_start(stage):
|
|
332
|
+
"""Log and show progress task when a pipeline stage begins."""
|
|
333
|
+
stage_index[0] += 1
|
|
334
|
+
logging.getLogger("footprinter").info("Stage started: %s", stage)
|
|
335
|
+
if progress is not None:
|
|
336
|
+
label = f"Stage {stage_index[0]}/{total_stages}: [cyan]{stage}[/cyan]"
|
|
337
|
+
current_task[0] = progress.add_task(label, total=None)
|
|
338
|
+
|
|
339
|
+
def on_progress(count):
|
|
340
|
+
"""Update intra-stage progress count."""
|
|
341
|
+
if progress is not None and current_task[0] is not None:
|
|
342
|
+
progress.update(current_task[0], completed=count)
|
|
343
|
+
|
|
344
|
+
def on_end(stage, result):
|
|
345
|
+
"""Log result, remove progress task, and print result line."""
|
|
346
|
+
from footprinter.ingest.status import _stage_detail_string
|
|
347
|
+
|
|
348
|
+
results.append(result)
|
|
349
|
+
status = result.get("status", "unknown")
|
|
350
|
+
elapsed = result.get("elapsed_seconds", 0)
|
|
351
|
+
logging.getLogger("footprinter").info(
|
|
352
|
+
"Stage ended: %s status=%s elapsed=%.1fs",
|
|
353
|
+
stage,
|
|
354
|
+
status,
|
|
355
|
+
elapsed,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# Remove progress task before printing result line
|
|
359
|
+
if progress is not None and current_task[0] is not None:
|
|
360
|
+
progress.remove_task(current_task[0])
|
|
361
|
+
current_task[0] = None
|
|
362
|
+
|
|
363
|
+
if status in ("completed", "info"):
|
|
364
|
+
icon = "[green]\u2713[/green]"
|
|
365
|
+
elif status == "completed_with_errors":
|
|
366
|
+
icon = "[yellow]\u26a0[/yellow]"
|
|
367
|
+
elif status == "error":
|
|
368
|
+
icon = "[red]\u2717[/red]"
|
|
369
|
+
elif status == "skipped":
|
|
370
|
+
icon = "[dim]\u25cb[/dim]"
|
|
371
|
+
else:
|
|
372
|
+
icon = "[dim]?[/dim]"
|
|
373
|
+
|
|
374
|
+
if not quiet:
|
|
375
|
+
details = _stage_detail_string(result)
|
|
376
|
+
detail_part = f" {details}" if details else ""
|
|
377
|
+
console.print(f" {icon} {stage}{detail_part} [dim]({elapsed:.1f}s)[/dim]")
|
|
378
|
+
|
|
379
|
+
if pipes:
|
|
380
|
+
orchestrator.run_pipes(
|
|
381
|
+
pipes,
|
|
382
|
+
on_pipe_start=on_start,
|
|
383
|
+
on_pipe_end=on_end,
|
|
384
|
+
on_progress=on_progress,
|
|
385
|
+
)
|
|
386
|
+
else:
|
|
387
|
+
orchestrator.run_pipeline(
|
|
388
|
+
"all",
|
|
389
|
+
on_pipe_start=on_start,
|
|
390
|
+
on_pipe_end=on_end,
|
|
391
|
+
on_progress=on_progress,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
if progress is not None:
|
|
395
|
+
progress.stop()
|
|
396
|
+
|
|
397
|
+
# Save run record
|
|
398
|
+
record_path = save_run_record(results, mode=mode, started_at=started_at)
|
|
399
|
+
logging.getLogger("footprinter").info("Run record saved to %s", record_path)
|
|
400
|
+
|
|
401
|
+
print_results(results, quiet=quiet, show_next_steps=show_next_steps)
|
|
402
|
+
|
|
403
|
+
if not quiet:
|
|
404
|
+
console.print(f"[dim]Log: {log_path}[/dim]")
|
|
405
|
+
|
|
406
|
+
except ValueError:
|
|
407
|
+
if progress is not None:
|
|
408
|
+
progress.stop()
|
|
409
|
+
raise
|
|
410
|
+
except KeyboardInterrupt:
|
|
411
|
+
if progress is not None:
|
|
412
|
+
progress.stop()
|
|
413
|
+
record_path = save_run_record(results, mode=mode, started_at=started_at, interrupted=True)
|
|
414
|
+
logging.getLogger("footprinter").info("Run record saved to %s", record_path)
|
|
415
|
+
raise
|
|
416
|
+
finally:
|
|
417
|
+
lock_fd.close()
|
|
418
|
+
if file_handler:
|
|
419
|
+
logging.root.removeHandler(file_handler)
|
|
420
|
+
file_handler.close()
|
|
421
|
+
orchestrator.close()
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def _ingest_pipeline(args) -> None:
|
|
425
|
+
"""Execute the data pipeline: bare ingest, --pipe, --full."""
|
|
426
|
+
from footprinter.ingest.orchestrator import DataPipelineOrchestrator
|
|
427
|
+
|
|
428
|
+
pipe_str = getattr(args, "pipe", None)
|
|
429
|
+
pipes = [s.strip() for s in pipe_str.split(",")] if pipe_str else None
|
|
430
|
+
|
|
431
|
+
orchestrator = DataPipelineOrchestrator()
|
|
432
|
+
orchestrator.full_mode = getattr(args, "full", False)
|
|
433
|
+
quiet = getattr(args, "quiet", False)
|
|
434
|
+
verbose = getattr(args, "verbose", False)
|
|
435
|
+
mode_str = "full" if orchestrator.full_mode else "incremental"
|
|
436
|
+
|
|
437
|
+
try:
|
|
438
|
+
_run_with_logging(
|
|
439
|
+
orchestrator,
|
|
440
|
+
pipes=pipes,
|
|
441
|
+
mode=mode_str,
|
|
442
|
+
quiet=quiet,
|
|
443
|
+
verbose=verbose,
|
|
444
|
+
show_banner=True,
|
|
445
|
+
)
|
|
446
|
+
except ValueError as e:
|
|
447
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
448
|
+
sys.exit(1)
|
|
449
|
+
except KeyboardInterrupt:
|
|
450
|
+
console.print("[dim]Interrupted.[/dim]")
|
|
451
|
+
sys.exit(130)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def _ingest_status(args) -> None:
|
|
455
|
+
"""Show pipeline diagnostics (data counts)."""
|
|
456
|
+
from footprinter.paths import get_db_path
|
|
457
|
+
|
|
458
|
+
db_path = get_db_path()
|
|
459
|
+
if not db_path.exists():
|
|
460
|
+
if getattr(args, "json", False):
|
|
461
|
+
output_json({})
|
|
462
|
+
else:
|
|
463
|
+
console.print("[dim]No database found. Run [bold]fp ingest[/bold] to start indexing.[/dim]")
|
|
464
|
+
return
|
|
465
|
+
|
|
466
|
+
from footprinter.ingest.status import get_status, print_status
|
|
467
|
+
|
|
468
|
+
status = get_status(str(db_path))
|
|
469
|
+
|
|
470
|
+
if getattr(args, "json", False):
|
|
471
|
+
output_json(status)
|
|
472
|
+
else:
|
|
473
|
+
print_status(status, quiet=getattr(args, "quiet", False))
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _ingest_import(args) -> None:
|
|
477
|
+
"""Import a chat export file."""
|
|
478
|
+
from pathlib import Path
|
|
479
|
+
|
|
480
|
+
from footprinter.ingest.chat_indexer import ChatIndexer
|
|
481
|
+
from footprinter.ingest.database import Database
|
|
482
|
+
from footprinter.paths import get_db_path
|
|
483
|
+
|
|
484
|
+
try:
|
|
485
|
+
db = Database(str(get_db_path()))
|
|
486
|
+
manager = ChatIndexer(db)
|
|
487
|
+
result = manager.upload(Path(args.path))
|
|
488
|
+
|
|
489
|
+
status = result.get("status", "unknown")
|
|
490
|
+
if status == "duplicate":
|
|
491
|
+
prev = result.get("previous_upload", {})
|
|
492
|
+
console.print(f"[yellow]Already imported[/yellow] (uploaded {prev.get('uploaded_at', 'unknown')})")
|
|
493
|
+
else:
|
|
494
|
+
added = result.get("chats_added", 0)
|
|
495
|
+
updated = result.get("chats_updated", 0)
|
|
496
|
+
messages = result.get("messages_imported", 0)
|
|
497
|
+
errors = result.get("errors", 0)
|
|
498
|
+
console.print(
|
|
499
|
+
f"[green]Imported[/green] {added + updated} chats ({added} new, {updated} updated), {messages} messages"
|
|
500
|
+
)
|
|
501
|
+
if errors:
|
|
502
|
+
console.print(f"[yellow]Warning:[/yellow] {errors} chats failed to import")
|
|
503
|
+
except Exception as e:
|
|
504
|
+
console.print(f"[red]Import failed:[/red] {e}")
|
|
505
|
+
sys.exit(1)
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def _ingest_refresh(args) -> None:
|
|
509
|
+
"""Re-scan a data source."""
|
|
510
|
+
from footprinter.ingest.orchestrator import DataPipelineOrchestrator
|
|
511
|
+
|
|
512
|
+
orchestrator = DataPipelineOrchestrator()
|
|
513
|
+
refresh_pipes = orchestrator.refresh_pipes
|
|
514
|
+
|
|
515
|
+
source = args.source
|
|
516
|
+
valid_sources = list(refresh_pipes.keys())
|
|
517
|
+
|
|
518
|
+
if source not in refresh_pipes:
|
|
519
|
+
console.print(f"[red]Error:[/red] Unknown refresh source: {source}")
|
|
520
|
+
console.print(f"Valid sources: {', '.join(valid_sources)}")
|
|
521
|
+
sys.exit(1)
|
|
522
|
+
|
|
523
|
+
stages = refresh_pipes[source]
|
|
524
|
+
orchestrator.full_mode = getattr(args, "full", False)
|
|
525
|
+
quiet = getattr(args, "quiet", False)
|
|
526
|
+
verbose = getattr(args, "verbose", False)
|
|
527
|
+
mode_str = "full" if orchestrator.full_mode else "incremental"
|
|
528
|
+
|
|
529
|
+
try:
|
|
530
|
+
_run_with_logging(
|
|
531
|
+
orchestrator,
|
|
532
|
+
pipes=stages,
|
|
533
|
+
mode=mode_str,
|
|
534
|
+
quiet=quiet,
|
|
535
|
+
verbose=verbose,
|
|
536
|
+
header=f"Footprinter Refresh source={source}, {len(stages)} stages",
|
|
537
|
+
)
|
|
538
|
+
except ValueError as e:
|
|
539
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
540
|
+
sys.exit(1)
|
|
541
|
+
except KeyboardInterrupt:
|
|
542
|
+
console.print("[dim]Interrupted.[/dim]")
|
|
543
|
+
sys.exit(130)
|