footprinter-cli 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +431 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
- footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
- footprinter/bundled/samples/visible-file-sample.txt +2 -0
- footprinter/cli/__init__.py +135 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +327 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/_sample_seed.py +204 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +543 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +2001 -0
- footprinter/cli/status.py +747 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +602 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +724 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +487 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +315 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +223 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +112 -0
- footprinter/ingest/pipe_runner.py +200 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +186 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +67 -0
- footprinter/mcp/errors.py +105 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +19 -0
- footprinter/paths.py +117 -0
- footprinter/permissions.py +1152 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1264 -0
- footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
- footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
- footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""Shared CLI utilities used across all CLI subcommands.
|
|
2
|
+
|
|
3
|
+
Provides database connection, argument helpers, identifier resolution,
|
|
4
|
+
JSON output, and shared constants.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import json
|
|
9
|
+
import sqlite3
|
|
10
|
+
import sys
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional, Union
|
|
14
|
+
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
|
|
17
|
+
from footprinter.services import access_service as _access
|
|
18
|
+
from footprinter.services.access_service import (
|
|
19
|
+
resolve_inherit_permission,
|
|
20
|
+
resolve_inherit_visibility,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Shared instances and constants
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
console = Console()
|
|
28
|
+
|
|
29
|
+
# Formatter for parsers that use description= or epilog= with pre-formatted text.
|
|
30
|
+
# Custom subclass replaces the dense argparse usage line with a clean header.
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class FootprinterHelpFormatter(argparse.RawDescriptionHelpFormatter):
|
|
34
|
+
def _format_usage(self, usage, actions, groups, prefix):
|
|
35
|
+
return f"\nUsage: {self._prog}\n\n"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
FORMATTER = FootprinterHelpFormatter
|
|
39
|
+
|
|
40
|
+
# Color vocabulary — consistent markup across CLI subcommands
|
|
41
|
+
C_SUCCESS = "green"
|
|
42
|
+
C_WARNING = "yellow"
|
|
43
|
+
C_ERROR = "red"
|
|
44
|
+
C_INFO = "cyan"
|
|
45
|
+
C_DIM = "dim"
|
|
46
|
+
|
|
47
|
+
VALID_STATUSES = frozenset(
|
|
48
|
+
{
|
|
49
|
+
"active",
|
|
50
|
+
"paused",
|
|
51
|
+
"completed",
|
|
52
|
+
"abandoned",
|
|
53
|
+
"archived",
|
|
54
|
+
"merged",
|
|
55
|
+
}
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
ALLOWED_TABLES = frozenset({"clients", "projects"})
|
|
59
|
+
ALLOWED_COLUMNS = frozenset({"name", "project_name"})
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
# Database connection
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def connect_db(db_path: Union[str, Path]) -> Optional[sqlite3.Connection]:
|
|
68
|
+
"""Open a read/write connection to the Footprinter database.
|
|
69
|
+
|
|
70
|
+
Returns None if the database file does not exist. Sets row_factory
|
|
71
|
+
and busy_timeout so callers don't need to repeat boilerplate.
|
|
72
|
+
"""
|
|
73
|
+
db_path = Path(db_path)
|
|
74
|
+
if not db_path.exists():
|
|
75
|
+
return None
|
|
76
|
+
conn = sqlite3.connect(str(db_path), timeout=10)
|
|
77
|
+
conn.row_factory = sqlite3.Row
|
|
78
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
79
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
80
|
+
return conn
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@contextmanager
|
|
84
|
+
def open_db(db_path=None):
|
|
85
|
+
"""Open the Footprinter DB; yields conn, closes on exit.
|
|
86
|
+
|
|
87
|
+
Exits with code 1 if the database file does not exist.
|
|
88
|
+
"""
|
|
89
|
+
if db_path is None:
|
|
90
|
+
from footprinter.paths import get_db_path
|
|
91
|
+
|
|
92
|
+
db_path = get_db_path()
|
|
93
|
+
conn = connect_db(db_path)
|
|
94
|
+
if conn is None:
|
|
95
|
+
console.print(
|
|
96
|
+
"[red]Database not found.[/red] Run [bold]fp setup[/bold] then [bold]fp ingest[/bold] to initialize."
|
|
97
|
+
)
|
|
98
|
+
sys.exit(1)
|
|
99
|
+
try:
|
|
100
|
+
yield conn
|
|
101
|
+
finally:
|
|
102
|
+
conn.close()
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@contextmanager
|
|
106
|
+
def open_database(db_path=None):
|
|
107
|
+
"""Open the Footprinter DB; yields a Database instance, closes on exit.
|
|
108
|
+
|
|
109
|
+
Like ``open_db`` but yields the full ``Database`` wrapper instead of a
|
|
110
|
+
raw ``sqlite3.Connection``. Use this when callers need methods only
|
|
111
|
+
available on the wrapper (e.g. ``ChatDedup``).
|
|
112
|
+
|
|
113
|
+
Exits with code 1 if the database file does not exist.
|
|
114
|
+
"""
|
|
115
|
+
if db_path is None:
|
|
116
|
+
from footprinter.paths import get_db_path
|
|
117
|
+
|
|
118
|
+
db_path = get_db_path()
|
|
119
|
+
db_path = Path(db_path)
|
|
120
|
+
if not db_path.exists():
|
|
121
|
+
console.print(
|
|
122
|
+
"[red]Database not found.[/red] Run [bold]fp setup[/bold] then [bold]fp ingest[/bold] to initialize."
|
|
123
|
+
)
|
|
124
|
+
sys.exit(1)
|
|
125
|
+
from footprinter.ingest.database import Database
|
|
126
|
+
|
|
127
|
+
db = Database(str(db_path))
|
|
128
|
+
try:
|
|
129
|
+
yield db
|
|
130
|
+
finally:
|
|
131
|
+
db.close()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
# Argument helpers
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def add_json_flag(parser) -> None:
|
|
140
|
+
"""Add a ``--json`` flag to an argparse parser."""
|
|
141
|
+
parser.add_argument(
|
|
142
|
+
"--json",
|
|
143
|
+
action="store_true",
|
|
144
|
+
default=False,
|
|
145
|
+
help="Output as JSON",
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def add_csv_flag(parser) -> None:
|
|
150
|
+
"""Add a ``--csv`` flag to an argparse parser."""
|
|
151
|
+
parser.add_argument(
|
|
152
|
+
"--csv",
|
|
153
|
+
action="store_true",
|
|
154
|
+
default=False,
|
|
155
|
+
help="Output as CSV",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
# Output helpers
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def output_json(data) -> None:
|
|
165
|
+
"""Pretty-print *data* as JSON to stdout.
|
|
166
|
+
|
|
167
|
+
Uses ``default=str`` so datetime objects serialize without error.
|
|
168
|
+
"""
|
|
169
|
+
print(json.dumps(data, indent=2, default=str))
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def output_csv(rows: list[dict], columns: list[str] | None = None) -> None:
|
|
173
|
+
"""Write *rows* as CSV to stdout.
|
|
174
|
+
|
|
175
|
+
If *columns* is given, output only those columns in that order.
|
|
176
|
+
Otherwise, use all keys from the first row.
|
|
177
|
+
"""
|
|
178
|
+
import csv
|
|
179
|
+
|
|
180
|
+
if not rows:
|
|
181
|
+
return
|
|
182
|
+
if columns is None:
|
|
183
|
+
columns = list(rows[0].keys())
|
|
184
|
+
writer = csv.DictWriter(sys.stdout, fieldnames=columns, extrasaction="ignore")
|
|
185
|
+
writer.writeheader()
|
|
186
|
+
for row in rows:
|
|
187
|
+
writer.writerow({k: str(v) if v is not None else "" for k, v in row.items()})
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
# Identifier resolution
|
|
192
|
+
# ---------------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def resolve_identifier(
|
|
196
|
+
conn: sqlite3.Connection,
|
|
197
|
+
table: str,
|
|
198
|
+
name_col: str,
|
|
199
|
+
identifier: str,
|
|
200
|
+
) -> int:
|
|
201
|
+
"""Resolve a user-supplied identifier to a row ID.
|
|
202
|
+
|
|
203
|
+
Tries numeric ID first, then falls back to case-insensitive name match.
|
|
204
|
+
|
|
205
|
+
Returns the integer row ID on success.
|
|
206
|
+
|
|
207
|
+
Raises ``ValueError`` when:
|
|
208
|
+
- No matching row is found (by ID or name)
|
|
209
|
+
- Multiple rows match the name (includes the full match list)
|
|
210
|
+
"""
|
|
211
|
+
if table not in ALLOWED_TABLES or name_col not in ALLOWED_COLUMNS:
|
|
212
|
+
raise ValueError(f"Invalid table/column: {table}.{name_col}")
|
|
213
|
+
|
|
214
|
+
# Try numeric ID first
|
|
215
|
+
try:
|
|
216
|
+
row_id = int(identifier)
|
|
217
|
+
cursor = conn.execute(
|
|
218
|
+
f"SELECT id FROM {table} WHERE id = ?",
|
|
219
|
+
(row_id,),
|
|
220
|
+
)
|
|
221
|
+
if cursor.fetchone():
|
|
222
|
+
return row_id
|
|
223
|
+
except ValueError:
|
|
224
|
+
pass
|
|
225
|
+
|
|
226
|
+
# Fall back to case-insensitive name match
|
|
227
|
+
cursor = conn.execute(
|
|
228
|
+
f"SELECT id, {name_col} FROM {table} WHERE {name_col} COLLATE NOCASE = ?",
|
|
229
|
+
(identifier,),
|
|
230
|
+
)
|
|
231
|
+
rows = cursor.fetchall()
|
|
232
|
+
|
|
233
|
+
if len(rows) == 0:
|
|
234
|
+
raise ValueError(f"No {table} found matching '{identifier}'")
|
|
235
|
+
|
|
236
|
+
if len(rows) == 1:
|
|
237
|
+
return rows[0]["id"]
|
|
238
|
+
|
|
239
|
+
# Ambiguous — list all matches
|
|
240
|
+
match_list = ", ".join(f"id={r['id']} name={r[name_col]!r}" for r in rows)
|
|
241
|
+
raise ValueError(f"Ambiguous: {len(rows)} {table} match '{identifier}': {match_list}")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# ---------------------------------------------------------------------------
|
|
245
|
+
# Pure utilities
|
|
246
|
+
# ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def add_verbose_flag(parser) -> None:
|
|
250
|
+
"""Add a ``--verbose`` flag to an argparse parser."""
|
|
251
|
+
parser.add_argument(
|
|
252
|
+
"--verbose",
|
|
253
|
+
action="store_true",
|
|
254
|
+
default=False,
|
|
255
|
+
help="Show access and visibility columns",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def enrich_verbose_access(
|
|
260
|
+
rows: list[dict],
|
|
261
|
+
entity_type: str,
|
|
262
|
+
*,
|
|
263
|
+
id_key: str = "id",
|
|
264
|
+
) -> None:
|
|
265
|
+
"""Annotate rows in-place with access, access_source, visibility.
|
|
266
|
+
|
|
267
|
+
Uses ``resolve_inherit_visibility`` / ``resolve_inherit_permission``
|
|
268
|
+
so that ``inherit`` values resolve to the global policy (when loaded
|
|
269
|
+
via ``load_globals``) or fall back to the hardcoded baseline.
|
|
270
|
+
|
|
271
|
+
Three cases based on the ``mcp_read`` key in each row dict:
|
|
272
|
+
|
|
273
|
+
* **Key absent** (folders, visits): access = "—", source = "—"
|
|
274
|
+
* **Key is None** (truly missing): fails closed, source = "default"
|
|
275
|
+
* **Key is "inherit"**: resolved via global policy (source = "global")
|
|
276
|
+
or baseline (source = "baseline") depending on whether ``load_globals``
|
|
277
|
+
has been called
|
|
278
|
+
* **Key has a real value**: access from value, source = "cached"
|
|
279
|
+
|
|
280
|
+
No-op if *rows* is empty.
|
|
281
|
+
"""
|
|
282
|
+
if not rows:
|
|
283
|
+
return
|
|
284
|
+
for r in rows:
|
|
285
|
+
if "mcp_read" not in r:
|
|
286
|
+
r["access"] = "—"
|
|
287
|
+
r["access_source"] = "—"
|
|
288
|
+
elif r["mcp_read"] not in (None, "inherit"):
|
|
289
|
+
r["access"] = "allow" if r["mcp_read"] == "allow" else "deny"
|
|
290
|
+
r["access_source"] = "cached"
|
|
291
|
+
else:
|
|
292
|
+
resolved = resolve_inherit_permission(r["mcp_read"])
|
|
293
|
+
r["access"] = resolved
|
|
294
|
+
if r["mcp_read"] == "inherit":
|
|
295
|
+
r["access_source"] = "global" if _access.is_global_policy_loaded() else "baseline"
|
|
296
|
+
else:
|
|
297
|
+
r["access_source"] = "default"
|
|
298
|
+
r["visibility"] = resolve_inherit_visibility(r.get("mcp_view"))
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def verbose_access_cells(row: dict) -> list[str]:
|
|
302
|
+
"""Return [access_cell, visibility_cell] with Rich color markup."""
|
|
303
|
+
access = row.get("access", "deny")
|
|
304
|
+
if access == "—":
|
|
305
|
+
access_cell = "[dim]—[/dim]"
|
|
306
|
+
elif access == "allow":
|
|
307
|
+
access_cell = "[green]allow[/green]"
|
|
308
|
+
else:
|
|
309
|
+
access_cell = "[red]deny[/red]"
|
|
310
|
+
|
|
311
|
+
visibility = row.get("visibility", "opaque")
|
|
312
|
+
vis_colors = {"visible": "green", "opaque": "yellow", "hidden": "red"}
|
|
313
|
+
vis_color = vis_colors.get(visibility, "white")
|
|
314
|
+
vis_cell = f"[{vis_color}]{visibility}[/{vis_color}]"
|
|
315
|
+
|
|
316
|
+
return [access_cell, vis_cell]
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def format_size(size_bytes: int) -> str:
|
|
320
|
+
"""Format a byte count as a human-readable string (B / KB / MB / GB)."""
|
|
321
|
+
if size_bytes < 1024:
|
|
322
|
+
return f"{size_bytes} B"
|
|
323
|
+
elif size_bytes < 1024 * 1024:
|
|
324
|
+
return f"{size_bytes / 1024:.1f} KB"
|
|
325
|
+
elif size_bytes < 1024 * 1024 * 1024:
|
|
326
|
+
return f"{size_bytes / (1024 * 1024):.1f} MB"
|
|
327
|
+
return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
|