footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,327 @@
1
+ """Shared CLI utilities used across all CLI subcommands.
2
+
3
+ Provides database connection, argument helpers, identifier resolution,
4
+ JSON output, and shared constants.
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+ import sqlite3
10
+ import sys
11
+ from contextlib import contextmanager
12
+ from pathlib import Path
13
+ from typing import Optional, Union
14
+
15
+ from rich.console import Console
16
+
17
+ from footprinter.services import access_service as _access
18
+ from footprinter.services.access_service import (
19
+ resolve_inherit_permission,
20
+ resolve_inherit_visibility,
21
+ )
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Shared instances and constants
25
+ # ---------------------------------------------------------------------------
26
+
27
+ console = Console()
28
+
29
+ # Formatter for parsers that use description= or epilog= with pre-formatted text.
30
+ # Custom subclass replaces the dense argparse usage line with a clean header.
31
+
32
+
33
+ class FootprinterHelpFormatter(argparse.RawDescriptionHelpFormatter):
34
+ def _format_usage(self, usage, actions, groups, prefix):
35
+ return f"\nUsage: {self._prog}\n\n"
36
+
37
+
38
+ FORMATTER = FootprinterHelpFormatter
39
+
40
+ # Color vocabulary — consistent markup across CLI subcommands
41
+ C_SUCCESS = "green"
42
+ C_WARNING = "yellow"
43
+ C_ERROR = "red"
44
+ C_INFO = "cyan"
45
+ C_DIM = "dim"
46
+
47
+ VALID_STATUSES = frozenset(
48
+ {
49
+ "active",
50
+ "paused",
51
+ "completed",
52
+ "abandoned",
53
+ "archived",
54
+ "merged",
55
+ }
56
+ )
57
+
58
+ ALLOWED_TABLES = frozenset({"clients", "projects"})
59
+ ALLOWED_COLUMNS = frozenset({"name", "project_name"})
60
+
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # Database connection
64
+ # ---------------------------------------------------------------------------
65
+
66
+
67
+ def connect_db(db_path: Union[str, Path]) -> Optional[sqlite3.Connection]:
68
+ """Open a read/write connection to the Footprinter database.
69
+
70
+ Returns None if the database file does not exist. Sets row_factory
71
+ and busy_timeout so callers don't need to repeat boilerplate.
72
+ """
73
+ db_path = Path(db_path)
74
+ if not db_path.exists():
75
+ return None
76
+ conn = sqlite3.connect(str(db_path), timeout=10)
77
+ conn.row_factory = sqlite3.Row
78
+ conn.execute("PRAGMA busy_timeout=5000")
79
+ conn.execute("PRAGMA foreign_keys=ON")
80
+ return conn
81
+
82
+
83
+ @contextmanager
84
+ def open_db(db_path=None):
85
+ """Open the Footprinter DB; yields conn, closes on exit.
86
+
87
+ Exits with code 1 if the database file does not exist.
88
+ """
89
+ if db_path is None:
90
+ from footprinter.paths import get_db_path
91
+
92
+ db_path = get_db_path()
93
+ conn = connect_db(db_path)
94
+ if conn is None:
95
+ console.print(
96
+ "[red]Database not found.[/red] Run [bold]fp setup[/bold] then [bold]fp ingest[/bold] to initialize."
97
+ )
98
+ sys.exit(1)
99
+ try:
100
+ yield conn
101
+ finally:
102
+ conn.close()
103
+
104
+
105
+ @contextmanager
106
+ def open_database(db_path=None):
107
+ """Open the Footprinter DB; yields a Database instance, closes on exit.
108
+
109
+ Like ``open_db`` but yields the full ``Database`` wrapper instead of a
110
+ raw ``sqlite3.Connection``. Use this when callers need methods only
111
+ available on the wrapper (e.g. ``ChatDedup``).
112
+
113
+ Exits with code 1 if the database file does not exist.
114
+ """
115
+ if db_path is None:
116
+ from footprinter.paths import get_db_path
117
+
118
+ db_path = get_db_path()
119
+ db_path = Path(db_path)
120
+ if not db_path.exists():
121
+ console.print(
122
+ "[red]Database not found.[/red] Run [bold]fp setup[/bold] then [bold]fp ingest[/bold] to initialize."
123
+ )
124
+ sys.exit(1)
125
+ from footprinter.ingest.database import Database
126
+
127
+ db = Database(str(db_path))
128
+ try:
129
+ yield db
130
+ finally:
131
+ db.close()
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # Argument helpers
136
+ # ---------------------------------------------------------------------------
137
+
138
+
139
+ def add_json_flag(parser) -> None:
140
+ """Add a ``--json`` flag to an argparse parser."""
141
+ parser.add_argument(
142
+ "--json",
143
+ action="store_true",
144
+ default=False,
145
+ help="Output as JSON",
146
+ )
147
+
148
+
149
+ def add_csv_flag(parser) -> None:
150
+ """Add a ``--csv`` flag to an argparse parser."""
151
+ parser.add_argument(
152
+ "--csv",
153
+ action="store_true",
154
+ default=False,
155
+ help="Output as CSV",
156
+ )
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Output helpers
161
+ # ---------------------------------------------------------------------------
162
+
163
+
164
+ def output_json(data) -> None:
165
+ """Pretty-print *data* as JSON to stdout.
166
+
167
+ Uses ``default=str`` so datetime objects serialize without error.
168
+ """
169
+ print(json.dumps(data, indent=2, default=str))
170
+
171
+
172
+ def output_csv(rows: list[dict], columns: list[str] | None = None) -> None:
173
+ """Write *rows* as CSV to stdout.
174
+
175
+ If *columns* is given, output only those columns in that order.
176
+ Otherwise, use all keys from the first row.
177
+ """
178
+ import csv
179
+
180
+ if not rows:
181
+ return
182
+ if columns is None:
183
+ columns = list(rows[0].keys())
184
+ writer = csv.DictWriter(sys.stdout, fieldnames=columns, extrasaction="ignore")
185
+ writer.writeheader()
186
+ for row in rows:
187
+ writer.writerow({k: str(v) if v is not None else "" for k, v in row.items()})
188
+
189
+
190
+ # ---------------------------------------------------------------------------
191
+ # Identifier resolution
192
+ # ---------------------------------------------------------------------------
193
+
194
+
195
+ def resolve_identifier(
196
+ conn: sqlite3.Connection,
197
+ table: str,
198
+ name_col: str,
199
+ identifier: str,
200
+ ) -> int:
201
+ """Resolve a user-supplied identifier to a row ID.
202
+
203
+ Tries numeric ID first, then falls back to case-insensitive name match.
204
+
205
+ Returns the integer row ID on success.
206
+
207
+ Raises ``ValueError`` when:
208
+ - No matching row is found (by ID or name)
209
+ - Multiple rows match the name (includes the full match list)
210
+ """
211
+ if table not in ALLOWED_TABLES or name_col not in ALLOWED_COLUMNS:
212
+ raise ValueError(f"Invalid table/column: {table}.{name_col}")
213
+
214
+ # Try numeric ID first
215
+ try:
216
+ row_id = int(identifier)
217
+ cursor = conn.execute(
218
+ f"SELECT id FROM {table} WHERE id = ?",
219
+ (row_id,),
220
+ )
221
+ if cursor.fetchone():
222
+ return row_id
223
+ except ValueError:
224
+ pass
225
+
226
+ # Fall back to case-insensitive name match
227
+ cursor = conn.execute(
228
+ f"SELECT id, {name_col} FROM {table} WHERE {name_col} COLLATE NOCASE = ?",
229
+ (identifier,),
230
+ )
231
+ rows = cursor.fetchall()
232
+
233
+ if len(rows) == 0:
234
+ raise ValueError(f"No {table} found matching '{identifier}'")
235
+
236
+ if len(rows) == 1:
237
+ return rows[0]["id"]
238
+
239
+ # Ambiguous — list all matches
240
+ match_list = ", ".join(f"id={r['id']} name={r[name_col]!r}" for r in rows)
241
+ raise ValueError(f"Ambiguous: {len(rows)} {table} match '{identifier}': {match_list}")
242
+
243
+
244
+ # ---------------------------------------------------------------------------
245
+ # Pure utilities
246
+ # ---------------------------------------------------------------------------
247
+
248
+
249
+ def add_verbose_flag(parser) -> None:
250
+ """Add a ``--verbose`` flag to an argparse parser."""
251
+ parser.add_argument(
252
+ "--verbose",
253
+ action="store_true",
254
+ default=False,
255
+ help="Show access and visibility columns",
256
+ )
257
+
258
+
259
+ def enrich_verbose_access(
260
+ rows: list[dict],
261
+ entity_type: str,
262
+ *,
263
+ id_key: str = "id",
264
+ ) -> None:
265
+ """Annotate rows in-place with access, access_source, visibility.
266
+
267
+ Uses ``resolve_inherit_visibility`` / ``resolve_inherit_permission``
268
+ so that ``inherit`` values resolve to the global policy (when loaded
269
+ via ``load_globals``) or fall back to the hardcoded baseline.
270
+
271
+ Three cases based on the ``mcp_read`` key in each row dict:
272
+
273
+ * **Key absent** (folders, visits): access = "—", source = "—"
274
+ * **Key is None** (truly missing): fails closed, source = "default"
275
+ * **Key is "inherit"**: resolved via global policy (source = "global")
276
+ or baseline (source = "baseline") depending on whether ``load_globals``
277
+ has been called
278
+ * **Key has a real value**: access from value, source = "cached"
279
+
280
+ No-op if *rows* is empty.
281
+ """
282
+ if not rows:
283
+ return
284
+ for r in rows:
285
+ if "mcp_read" not in r:
286
+ r["access"] = "—"
287
+ r["access_source"] = "—"
288
+ elif r["mcp_read"] not in (None, "inherit"):
289
+ r["access"] = "allow" if r["mcp_read"] == "allow" else "deny"
290
+ r["access_source"] = "cached"
291
+ else:
292
+ resolved = resolve_inherit_permission(r["mcp_read"])
293
+ r["access"] = resolved
294
+ if r["mcp_read"] == "inherit":
295
+ r["access_source"] = "global" if _access.is_global_policy_loaded() else "baseline"
296
+ else:
297
+ r["access_source"] = "default"
298
+ r["visibility"] = resolve_inherit_visibility(r.get("mcp_view"))
299
+
300
+
301
+ def verbose_access_cells(row: dict) -> list[str]:
302
+ """Return [access_cell, visibility_cell] with Rich color markup."""
303
+ access = row.get("access", "deny")
304
+ if access == "—":
305
+ access_cell = "[dim]—[/dim]"
306
+ elif access == "allow":
307
+ access_cell = "[green]allow[/green]"
308
+ else:
309
+ access_cell = "[red]deny[/red]"
310
+
311
+ visibility = row.get("visibility", "opaque")
312
+ vis_colors = {"visible": "green", "opaque": "yellow", "hidden": "red"}
313
+ vis_color = vis_colors.get(visibility, "white")
314
+ vis_cell = f"[{vis_color}]{visibility}[/{vis_color}]"
315
+
316
+ return [access_cell, vis_cell]
317
+
318
+
319
+ def format_size(size_bytes: int) -> str:
320
+ """Format a byte count as a human-readable string (B / KB / MB / GB)."""
321
+ if size_bytes < 1024:
322
+ return f"{size_bytes} B"
323
+ elif size_bytes < 1024 * 1024:
324
+ return f"{size_bytes / 1024:.1f} KB"
325
+ elif size_bytes < 1024 * 1024 * 1024:
326
+ return f"{size_bytes / (1024 * 1024):.1f} MB"
327
+ return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"