footprinter-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +444 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/cli/__init__.py +128 -0
  19. footprinter/cli/__main__.py +6 -0
  20. footprinter/cli/_common.py +332 -0
  21. footprinter/cli/_policy_helpers.py +646 -0
  22. footprinter/cli/_prompt.py +220 -0
  23. footprinter/cli/api_cmd.py +32 -0
  24. footprinter/cli/connect.py +591 -0
  25. footprinter/cli/data.py +879 -0
  26. footprinter/cli/delete.py +128 -0
  27. footprinter/cli/ingest.py +579 -0
  28. footprinter/cli/mcp_cmd.py +750 -0
  29. footprinter/cli/mcp_setup.py +306 -0
  30. footprinter/cli/search.py +393 -0
  31. footprinter/cli/search_cmd.py +69 -0
  32. footprinter/cli/setup.py +1836 -0
  33. footprinter/cli/status.py +729 -0
  34. footprinter/cli/status_cmd.py +104 -0
  35. footprinter/cli/upsert.py +794 -0
  36. footprinter/cli/vectorize_cmd.py +215 -0
  37. footprinter/cli/view.py +322 -0
  38. footprinter/connectors/__init__.py +171 -0
  39. footprinter/connectors/config_utils.py +141 -0
  40. footprinter/db/__init__.py +37 -0
  41. footprinter/db/browser.py +198 -0
  42. footprinter/db/chats.py +610 -0
  43. footprinter/db/clients.py +307 -0
  44. footprinter/db/emails.py +279 -0
  45. footprinter/db/files.py +741 -0
  46. footprinter/db/folders.py +659 -0
  47. footprinter/db/messages.py +192 -0
  48. footprinter/db/policies.py +151 -0
  49. footprinter/db/projects.py +673 -0
  50. footprinter/db/search.py +573 -0
  51. footprinter/db/sql_utils.py +168 -0
  52. footprinter/db/status.py +320 -0
  53. footprinter/db/uploads.py +70 -0
  54. footprinter/ingest/__init__.py +0 -0
  55. footprinter/ingest/adapters/__init__.py +33 -0
  56. footprinter/ingest/adapters/browser.py +54 -0
  57. footprinter/ingest/adapters/chat.py +57 -0
  58. footprinter/ingest/adapters/ingest.py +146 -0
  59. footprinter/ingest/adapters/local_files.py +68 -0
  60. footprinter/ingest/adapters/local_folders.py +52 -0
  61. footprinter/ingest/adapters/protocol.py +174 -0
  62. footprinter/ingest/browser_indexer.py +216 -0
  63. footprinter/ingest/chat_dedup.py +156 -0
  64. footprinter/ingest/chat_indexer.py +515 -0
  65. footprinter/ingest/chat_parsers/__init__.py +8 -0
  66. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  67. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  68. footprinter/ingest/cli.py +827 -0
  69. footprinter/ingest/content_extractors.py +117 -0
  70. footprinter/ingest/database.py +36 -0
  71. footprinter/ingest/db/__init__.py +1 -0
  72. footprinter/ingest/db/connector_schema.py +47 -0
  73. footprinter/ingest/db/migration.py +328 -0
  74. footprinter/ingest/db/schema.py +1043 -0
  75. footprinter/ingest/db/security.py +6 -0
  76. footprinter/ingest/file_indexer.py +261 -0
  77. footprinter/ingest/file_scanner.py +277 -0
  78. footprinter/ingest/folder_indexer.py +226 -0
  79. footprinter/ingest/full_content_extractor.py +321 -0
  80. footprinter/ingest/orchestrator.py +125 -0
  81. footprinter/ingest/pipe_runner.py +217 -0
  82. footprinter/ingest/processing.py +165 -0
  83. footprinter/ingest/registry.py +201 -0
  84. footprinter/ingest/run_record.py +91 -0
  85. footprinter/ingest/status.py +346 -0
  86. footprinter/mcp/__init__.py +0 -0
  87. footprinter/mcp/__main__.py +5 -0
  88. footprinter/mcp/db.py +57 -0
  89. footprinter/mcp/errors.py +102 -0
  90. footprinter/mcp/extraction.py +226 -0
  91. footprinter/mcp/server.py +39 -0
  92. footprinter/mcp/tools/__init__.py +0 -0
  93. footprinter/mcp/tools/navigation.py +70 -0
  94. footprinter/mcp/tools/read.py +75 -0
  95. footprinter/mcp/tools/search.py +158 -0
  96. footprinter/mcp/tools/semantic.py +79 -0
  97. footprinter/mcp/tools/status.py +15 -0
  98. footprinter/paths.py +91 -0
  99. footprinter/permissions.py +1160 -0
  100. footprinter/semantic/__init__.py +13 -0
  101. footprinter/semantic/chunking.py +52 -0
  102. footprinter/semantic/embeddings.py +23 -0
  103. footprinter/semantic/hybrid_search.py +273 -0
  104. footprinter/semantic/vector_store.py +471 -0
  105. footprinter/services/__init__.py +49 -0
  106. footprinter/services/access_service.py +342 -0
  107. footprinter/services/chat_service.py +85 -0
  108. footprinter/services/client_service.py +267 -0
  109. footprinter/services/content_service.py +181 -0
  110. footprinter/services/email_service.py +89 -0
  111. footprinter/services/file_service.py +83 -0
  112. footprinter/services/folder_service.py +122 -0
  113. footprinter/services/includes.py +19 -0
  114. footprinter/services/ingest_service.py +231 -0
  115. footprinter/services/project_service.py +262 -0
  116. footprinter/services/roles.py +25 -0
  117. footprinter/services/search_service.py +177 -0
  118. footprinter/services/semantic_service.py +360 -0
  119. footprinter/services/status_service.py +18 -0
  120. footprinter/services/visit_service.py +65 -0
  121. footprinter/source_registry.py +194 -0
  122. footprinter/utils/__init__.py +7 -0
  123. footprinter/utils/hash_utils.py +59 -0
  124. footprinter/utils/logging_config.py +68 -0
  125. footprinter/utils/mime.py +30 -0
  126. footprinter/utils/text.py +6 -0
  127. footprinter/utils/time.py +11 -0
  128. footprinter/visibility.py +1272 -0
  129. footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
  130. footprinter_cli-1.0.0.dist-info/METADATA +229 -0
  131. footprinter_cli-1.0.0.dist-info/RECORD +134 -0
  132. footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
  133. footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
  134. footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,332 @@
1
+ """Shared CLI utilities used across all CLI subcommands.
2
+
3
+ Provides database connection, argument helpers, identifier resolution,
4
+ JSON output, and shared constants.
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+ import sqlite3
10
+ import sys
11
+ from contextlib import contextmanager
12
+ from pathlib import Path
13
+ from typing import Optional, Union
14
+
15
+ from rich.console import Console
16
+
17
+ from footprinter.services import access_service as _access
18
+ from footprinter.services.access_service import (
19
+ resolve_inherit_permission,
20
+ resolve_inherit_visibility,
21
+ )
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Shared instances and constants
25
+ # ---------------------------------------------------------------------------
26
+
27
+ console = Console()
28
+
29
+ # Formatter for parsers that use description= or epilog= with pre-formatted text.
30
+ # Custom subclass replaces the dense argparse usage line with a clean header.
31
+
32
+
33
+ class FootprinterHelpFormatter(argparse.RawDescriptionHelpFormatter):
34
+ def _format_usage(self, usage, actions, groups, prefix):
35
+ # argparse calls this with prefix="" when computing subparser prog
36
+ # prefixes; returning the Usage-wrapped string there would compound
37
+ # "Usage: Usage: fp …" at every nesting level.
38
+ if prefix == "":
39
+ return self._prog
40
+ return f"\nUsage: {self._prog}\n\n"
41
+
42
+
43
+ FORMATTER = FootprinterHelpFormatter
44
+
45
+ # Color vocabulary — consistent markup across CLI subcommands
46
+ C_SUCCESS = "green"
47
+ C_WARNING = "yellow"
48
+ C_ERROR = "red"
49
+ C_INFO = "cyan"
50
+ C_DIM = "dim"
51
+
52
+ VALID_STATUSES = frozenset(
53
+ {
54
+ "active",
55
+ "paused",
56
+ "completed",
57
+ "abandoned",
58
+ "archived",
59
+ "merged",
60
+ }
61
+ )
62
+
63
+ ALLOWED_TABLES = frozenset({"clients", "projects"})
64
+ ALLOWED_COLUMNS = frozenset({"name", "project_name"})
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Database connection
69
+ # ---------------------------------------------------------------------------
70
+
71
+
72
+ def connect_db(db_path: Union[str, Path]) -> Optional[sqlite3.Connection]:
73
+ """Open a read/write connection to the Footprinter database.
74
+
75
+ Returns None if the database file does not exist. Sets row_factory
76
+ and busy_timeout so callers don't need to repeat boilerplate.
77
+ """
78
+ db_path = Path(db_path)
79
+ if not db_path.exists():
80
+ return None
81
+ conn = sqlite3.connect(str(db_path), timeout=10)
82
+ conn.row_factory = sqlite3.Row
83
+ conn.execute("PRAGMA busy_timeout=5000")
84
+ conn.execute("PRAGMA foreign_keys=ON")
85
+ return conn
86
+
87
+
88
+ @contextmanager
89
+ def open_db(db_path=None):
90
+ """Open the Footprinter DB; yields conn, closes on exit.
91
+
92
+ Exits with code 1 if the database file does not exist.
93
+ """
94
+ if db_path is None:
95
+ from footprinter.paths import get_db_path
96
+
97
+ db_path = get_db_path()
98
+ conn = connect_db(db_path)
99
+ if conn is None:
100
+ console.print(
101
+ "[red]Database not found.[/red] Run [bold]fp setup[/bold] then [bold]fp ingest[/bold] to initialize."
102
+ )
103
+ sys.exit(1)
104
+ try:
105
+ yield conn
106
+ finally:
107
+ conn.close()
108
+
109
+
110
+ @contextmanager
111
+ def open_database(db_path=None):
112
+ """Open the Footprinter DB; yields a Database instance, closes on exit.
113
+
114
+ Like ``open_db`` but yields the full ``Database`` wrapper instead of a
115
+ raw ``sqlite3.Connection``. Use this when callers need methods only
116
+ available on the wrapper (e.g. ``ChatDedup``).
117
+
118
+ Exits with code 1 if the database file does not exist.
119
+ """
120
+ if db_path is None:
121
+ from footprinter.paths import get_db_path
122
+
123
+ db_path = get_db_path()
124
+ db_path = Path(db_path)
125
+ if not db_path.exists():
126
+ console.print(
127
+ "[red]Database not found.[/red] Run [bold]fp setup[/bold] then [bold]fp ingest[/bold] to initialize."
128
+ )
129
+ sys.exit(1)
130
+ from footprinter.ingest.database import Database
131
+
132
+ db = Database(str(db_path))
133
+ try:
134
+ yield db
135
+ finally:
136
+ db.close()
137
+
138
+
139
+ # ---------------------------------------------------------------------------
140
+ # Argument helpers
141
+ # ---------------------------------------------------------------------------
142
+
143
+
144
+ def add_json_flag(parser) -> None:
145
+ """Add a ``--json`` flag to an argparse parser."""
146
+ parser.add_argument(
147
+ "--json",
148
+ action="store_true",
149
+ default=False,
150
+ help="Output as JSON",
151
+ )
152
+
153
+
154
+ def add_csv_flag(parser) -> None:
155
+ """Add a ``--csv`` flag to an argparse parser."""
156
+ parser.add_argument(
157
+ "--csv",
158
+ action="store_true",
159
+ default=False,
160
+ help="Output as CSV",
161
+ )
162
+
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # Output helpers
166
+ # ---------------------------------------------------------------------------
167
+
168
+
169
+ def output_json(data) -> None:
170
+ """Pretty-print *data* as JSON to stdout.
171
+
172
+ Uses ``default=str`` so datetime objects serialize without error.
173
+ """
174
+ print(json.dumps(data, indent=2, default=str))
175
+
176
+
177
+ def output_csv(rows: list[dict], columns: list[str] | None = None) -> None:
178
+ """Write *rows* as CSV to stdout.
179
+
180
+ If *columns* is given, output only those columns in that order.
181
+ Otherwise, use all keys from the first row.
182
+ """
183
+ import csv
184
+
185
+ if not rows:
186
+ return
187
+ if columns is None:
188
+ columns = list(rows[0].keys())
189
+ writer = csv.DictWriter(sys.stdout, fieldnames=columns, extrasaction="ignore")
190
+ writer.writeheader()
191
+ for row in rows:
192
+ writer.writerow({k: str(v) if v is not None else "" for k, v in row.items()})
193
+
194
+
195
+ # ---------------------------------------------------------------------------
196
+ # Identifier resolution
197
+ # ---------------------------------------------------------------------------
198
+
199
+
200
+ def resolve_identifier(
201
+ conn: sqlite3.Connection,
202
+ table: str,
203
+ name_col: str,
204
+ identifier: str,
205
+ ) -> int:
206
+ """Resolve a user-supplied identifier to a row ID.
207
+
208
+ Tries numeric ID first, then falls back to case-insensitive name match.
209
+
210
+ Returns the integer row ID on success.
211
+
212
+ Raises ``ValueError`` when:
213
+ - No matching row is found (by ID or name)
214
+ - Multiple rows match the name (includes the full match list)
215
+ """
216
+ if table not in ALLOWED_TABLES or name_col not in ALLOWED_COLUMNS:
217
+ raise ValueError(f"Invalid table/column: {table}.{name_col}")
218
+
219
+ # Try numeric ID first
220
+ try:
221
+ row_id = int(identifier)
222
+ cursor = conn.execute(
223
+ f"SELECT id FROM {table} WHERE id = ?",
224
+ (row_id,),
225
+ )
226
+ if cursor.fetchone():
227
+ return row_id
228
+ except ValueError:
229
+ pass
230
+
231
+ # Fall back to case-insensitive name match
232
+ cursor = conn.execute(
233
+ f"SELECT id, {name_col} FROM {table} WHERE {name_col} COLLATE NOCASE = ?",
234
+ (identifier,),
235
+ )
236
+ rows = cursor.fetchall()
237
+
238
+ if len(rows) == 0:
239
+ raise ValueError(f"No {table} found matching '{identifier}'")
240
+
241
+ if len(rows) == 1:
242
+ return rows[0]["id"]
243
+
244
+ # Ambiguous — list all matches
245
+ match_list = ", ".join(f"id={r['id']} name={r[name_col]!r}" for r in rows)
246
+ raise ValueError(f"Ambiguous: {len(rows)} {table} match '{identifier}': {match_list}")
247
+
248
+
249
+ # ---------------------------------------------------------------------------
250
+ # Pure utilities
251
+ # ---------------------------------------------------------------------------
252
+
253
+
254
+ def add_verbose_flag(parser) -> None:
255
+ """Add a ``--verbose`` flag to an argparse parser."""
256
+ parser.add_argument(
257
+ "--verbose",
258
+ action="store_true",
259
+ default=False,
260
+ help="Show access and visibility columns",
261
+ )
262
+
263
+
264
+ def enrich_verbose_access(
265
+ rows: list[dict],
266
+ entity_type: str,
267
+ *,
268
+ id_key: str = "id",
269
+ ) -> None:
270
+ """Annotate rows in-place with access, access_source, visibility.
271
+
272
+ Uses ``resolve_inherit_visibility`` / ``resolve_inherit_permission``
273
+ so that ``inherit`` values resolve to the global policy (when loaded
274
+ via ``load_globals``) or fall back to the hardcoded baseline.
275
+
276
+ Three cases based on the ``mcp_read`` key in each row dict:
277
+
278
+ * **Key absent** (folders, visits): access = "—", source = "—"
279
+ * **Key is None** (truly missing): fails closed, source = "default"
280
+ * **Key is "inherit"**: resolved via global policy (source = "global")
281
+ or baseline (source = "baseline") depending on whether ``load_globals``
282
+ has been called
283
+ * **Key has a real value**: access from value, source = "cached"
284
+
285
+ No-op if *rows* is empty.
286
+ """
287
+ if not rows:
288
+ return
289
+ for r in rows:
290
+ if "mcp_read" not in r:
291
+ r["access"] = "—"
292
+ r["access_source"] = "—"
293
+ elif r["mcp_read"] not in (None, "inherit"):
294
+ r["access"] = "allow" if r["mcp_read"] == "allow" else "deny"
295
+ r["access_source"] = "cached"
296
+ else:
297
+ resolved = resolve_inherit_permission(r["mcp_read"])
298
+ r["access"] = resolved
299
+ if r["mcp_read"] == "inherit":
300
+ r["access_source"] = "global" if _access.is_global_policy_loaded() else "baseline"
301
+ else:
302
+ r["access_source"] = "default"
303
+ r["visibility"] = resolve_inherit_visibility(r.get("mcp_view"))
304
+
305
+
306
+ def verbose_access_cells(row: dict) -> list[str]:
307
+ """Return [access_cell, visibility_cell] with Rich color markup."""
308
+ access = row.get("access", "deny")
309
+ if access == "—":
310
+ access_cell = "[dim]—[/dim]"
311
+ elif access == "allow":
312
+ access_cell = "[green]allow[/green]"
313
+ else:
314
+ access_cell = "[red]deny[/red]"
315
+
316
+ visibility = row.get("visibility", "opaque")
317
+ vis_colors = {"visible": "green", "opaque": "yellow", "hidden": "red"}
318
+ vis_color = vis_colors.get(visibility, "white")
319
+ vis_cell = f"[{vis_color}]{visibility}[/{vis_color}]"
320
+
321
+ return [access_cell, vis_cell]
322
+
323
+
324
+ def format_size(size_bytes: int) -> str:
325
+ """Format a byte count as a human-readable string (B / KB / MB / GB)."""
326
+ if size_bytes < 1024:
327
+ return f"{size_bytes} B"
328
+ elif size_bytes < 1024 * 1024:
329
+ return f"{size_bytes / 1024:.1f} KB"
330
+ elif size_bytes < 1024 * 1024 * 1024:
331
+ return f"{size_bytes / (1024 * 1024):.1f} MB"
332
+ return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"