footprinter-cli 1.0.2__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {footprinter_cli-1.0.2/footprinter_cli.egg-info → footprinter_cli-1.0.3}/PKG-INFO +18 -18
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/README.md +17 -17
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/config.example.yaml +9 -1
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/__init__.py +1 -2
- footprinter_cli-1.0.3/footprinter/cli/_vectorize_stage.py +117 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/doctor.py +6 -3
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/ingest.py +183 -1
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/setup.py +6 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/status.py +60 -1
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/upsert.py +2 -3
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/browser.py +2 -3
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/chats.py +28 -17
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/clients.py +2 -2
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/files.py +11 -5
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/folders.py +10 -14
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/projects.py +30 -17
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/search.py +1 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/status.py +49 -2
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/cli.py +1 -1
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/file_indexer.py +10 -71
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/file_scanner.py +27 -12
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/folder_indexer.py +2 -3
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/full_content_extractor.py +25 -12
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/orchestrator.py +5 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/processing.py +139 -1
- footprinter_cli-1.0.3/footprinter/ingest/scan_summary.py +78 -0
- footprinter_cli-1.0.3/footprinter/mcp/resources/discoverability.py +59 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/server.py +4 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/search.py +22 -4
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/vector_store.py +4 -1
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/project_service.py +1 -1
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3/footprinter_cli.egg-info}/PKG-INFO +18 -18
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/SOURCES.txt +3 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/pyproject.toml +1 -1
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/LICENSE +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/access.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/db.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/entities.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/search.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/semantic.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/server.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/status.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/context_patterns.yaml +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/extensions.yaml +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/filename_patterns.yaml +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/mime_mappings.yaml +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/salesforce_rules.yaml +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/security_patterns.yaml +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/__main__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/_common.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/_policy_helpers.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/_prompt.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/api_cmd.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/connect.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/data.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/delete.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/mcp_cmd.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/mcp_setup.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/search.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/search_cmd.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/status_cmd.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/uninstall.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/vectorize_cmd.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/view.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/connectors/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/connectors/config_utils.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/emails.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/messages.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/policies.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/sql_utils.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/uploads.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/browser.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/chat.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/ingest.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/local_files.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/local_folders.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/protocol.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/browser_indexer.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/chat_indexer.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/chat_parsers/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/chat_parsers/chatgpt_parser.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/chat_parsers/claude_parser.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/content_extractors.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/database.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/connector_schema.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/migration.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/schema.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/security.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/pipe_runner.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/registry.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/run_record.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/status.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/__main__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/db.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/errors.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/extraction.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/resources/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/resources/context.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/navigation.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/read.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/semantic.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/status.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/paths.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/permissions.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/chunking.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/embeddings.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/hybrid_search.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/access_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/chat_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/client_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/content_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/email_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/file_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/folder_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/includes.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/ingest_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/roles.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/search_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/semantic_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/status_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/visit_service.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/source_registry.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/__init__.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/hash_utils.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/logging_config.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/mime.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/text.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/time.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/visibility.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/dependency_links.txt +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/entry_points.txt +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/requires.txt +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/top_level.txt +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/setup.cfg +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_access_control_bypasses.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_access_control_docs.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_access_recalculate.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_build_status_filter.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_bundled.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_e2e_install.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_e2e_pipeline.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_edit_recalculate.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_examples.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_files_rename.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_files_surface.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_inherit_resolution.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_logging.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_no_project_root.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_package_init.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_paths_no_test_marker.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_pip_install_e2e.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_prompt_safety.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_resolver.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_security_layer.py +0 -0
- {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_security_permissions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: footprinter-cli
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: A local context layer for your files, browser history, chats, and email — searchable, user-owned, MCP-served.
|
|
5
5
|
Author: SwellCity Group
|
|
6
6
|
License: MIT
|
|
@@ -52,12 +52,12 @@ Requires-Dist: httpx<1.0,>=0.27.0; extra == "dev"
|
|
|
52
52
|
|
|
53
53
|
# Footprinter
|
|
54
54
|
|
|
55
|
-
[](https://github.com/harringjohn/footprinter-cli/actions/workflows/test.yml)
|
|
56
56
|
[](https://pypi.org/project/footprinter-cli/)
|
|
57
57
|
|
|
58
58
|
**A local context layer for your files, browser history, chats, and email — searchable, user-owned, and served to AI agents through [MCP](https://modelcontextprotocol.io/).**
|
|
59
59
|
|
|
60
|
-
Your work lives across
|
|
60
|
+
Your work lives across filesystems, browsers, inboxes, chat histories, and other tools. Footprinter indexes those sources into a single local store, organizes them into the projects and groupings you define, and serves the result to AI agents through a governed access layer. You control what the agent can see. Everything stays on your machine.
|
|
61
61
|
|
|
62
62
|
## Prerequisites
|
|
63
63
|
|
|
@@ -71,10 +71,10 @@ The fastest path on a clean machine is the install script — it ensures Python
|
|
|
71
71
|
|
|
72
72
|
```bash
|
|
73
73
|
# Base install (CLI + MCP + HTTP API)
|
|
74
|
-
curl -fsSL https://raw.githubusercontent.com/
|
|
74
|
+
curl -fsSL https://raw.githubusercontent.com/harringjohn/footprinter-cli/main/scripts/release/install.sh | bash
|
|
75
75
|
|
|
76
76
|
# Full install (adds semantic search + document parsing)
|
|
77
|
-
curl -fsSL https://raw.githubusercontent.com/
|
|
77
|
+
curl -fsSL https://raw.githubusercontent.com/harringjohn/footprinter-cli/main/scripts/release/install-full.sh | bash
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
If you prefer to manage the install yourself, use **pipx** (recommended) — it isolates Footprinter and sidesteps the macOS install caveats noted below:
|
|
@@ -160,7 +160,7 @@ Once configured, Claude can search your files, browse projects, and find related
|
|
|
160
160
|
| **Documents** | PDF, Word, Excel, PowerPoint content (with `[parse]` extra) |
|
|
161
161
|
| **Semantic embeddings** | Conceptual similarity across all sources (with `[semantic]` extra) |
|
|
162
162
|
|
|
163
|
-
What lands in the database — and when — is controlled by the **content storage tier** you opt into. By default, Footprinter only indexes metadata; it does not read your file content until you explicitly enable it. See [Content Storage](https://github.com/
|
|
163
|
+
What lands in the database — and when — is controlled by the **content storage tier** you opt into. By default, Footprinter only indexes metadata; it does not read your file content until you explicitly enable it. See [Content Storage](https://github.com/harringjohn/footprinter-cli/blob/main/reference/content-storage.md) for the full breakdown.
|
|
164
164
|
|
|
165
165
|
Additional sources are available through [connector plugins](#connectors).
|
|
166
166
|
|
|
@@ -213,22 +213,22 @@ Sources are scanned into SQLite with bidirectional links connecting local files
|
|
|
213
213
|
|
|
214
214
|
## Documentation
|
|
215
215
|
|
|
216
|
-
- [Interfaces](https://github.com/
|
|
217
|
-
- [Data Model](https://github.com/
|
|
218
|
-
- [Pipeline](https://github.com/
|
|
219
|
-
- [Content Storage](https://github.com/
|
|
220
|
-
- [Access Control](https://github.com/
|
|
216
|
+
- [Interfaces](https://github.com/harringjohn/footprinter-cli/blob/main/reference/interfaces.md) — CLI commands, MCP tools, Python API
|
|
217
|
+
- [Data Model](https://github.com/harringjohn/footprinter-cli/blob/main/reference/data-model.md) — database schema
|
|
218
|
+
- [Pipeline](https://github.com/harringjohn/footprinter-cli/blob/main/reference/pipeline.md) — indexing stages and configuration
|
|
219
|
+
- [Content Storage](https://github.com/harringjohn/footprinter-cli/blob/main/reference/content-storage.md) — metadata vs. snippet vs. full-content tiers
|
|
220
|
+
- [Access Control](https://github.com/harringjohn/footprinter-cli/blob/main/reference/mcp-access-control.md) — MCP security model
|
|
221
221
|
|
|
222
222
|
## Contributing
|
|
223
223
|
|
|
224
|
-
Bug fixes, documentation, and tests welcome. For new features or architectural changes, [open an issue](https://github.com/
|
|
224
|
+
Bug fixes, documentation, and tests welcome. For new features or architectural changes, [open an issue](https://github.com/harringjohn/footprinter-cli/issues) first to discuss the approach.
|
|
225
225
|
|
|
226
226
|
Connector plugins use an internal API that isn't stable yet — we're not accepting connector contributions at this time.
|
|
227
227
|
|
|
228
228
|
### Development setup
|
|
229
229
|
|
|
230
230
|
```bash
|
|
231
|
-
git clone https://github.com/
|
|
231
|
+
git clone https://github.com/harringjohn/footprinter-cli.git
|
|
232
232
|
cd footprinter
|
|
233
233
|
python3 -m venv venv
|
|
234
234
|
./venv/bin/pip install -e ".[dev]"
|
|
@@ -254,7 +254,7 @@ python3 -m venv venv
|
|
|
254
254
|
4. Run the test suite
|
|
255
255
|
5. Submit a PR targeting `main`
|
|
256
256
|
|
|
257
|
-
Never commit API keys, tokens, or credentials. Report security vulnerabilities privately — see [SECURITY.md](https://github.com/
|
|
257
|
+
Never commit API keys, tokens, or credentials. Report security vulnerabilities privately — see [SECURITY.md](https://github.com/harringjohn/footprinter-cli/blob/main/SECURITY.md).
|
|
258
258
|
|
|
259
259
|
### Pull request expectations
|
|
260
260
|
|
|
@@ -265,13 +265,13 @@ Never commit API keys, tokens, or credentials. Report security vulnerabilities p
|
|
|
265
265
|
|
|
266
266
|
All PRs are reviewed by the maintainer. Expect reviews within one week. CI must pass before review begins.
|
|
267
267
|
|
|
268
|
-
No Contributor License Agreement required. By submitting a PR, you agree your contribution is licensed under the project's [MIT License](https://github.com/
|
|
268
|
+
No Contributor License Agreement required. By submitting a PR, you agree your contribution is licensed under the project's [MIT License](https://github.com/harringjohn/footprinter-cli/blob/main/LICENSE).
|
|
269
269
|
|
|
270
270
|
## Community
|
|
271
271
|
|
|
272
|
-
- [Code of Conduct](https://github.com/
|
|
273
|
-
- [Security Policy](https://github.com/
|
|
272
|
+
- [Code of Conduct](https://github.com/harringjohn/footprinter-cli/blob/main/CODE_OF_CONDUCT.md)
|
|
273
|
+
- [Security Policy](https://github.com/harringjohn/footprinter-cli/blob/main/SECURITY.md)
|
|
274
274
|
|
|
275
275
|
## License
|
|
276
276
|
|
|
277
|
-
MIT — see [LICENSE](https://github.com/
|
|
277
|
+
MIT — see [LICENSE](https://github.com/harringjohn/footprinter-cli/blob/main/LICENSE).
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# Footprinter
|
|
2
2
|
|
|
3
|
-
[](https://github.com/harringjohn/footprinter-cli/actions/workflows/test.yml)
|
|
4
4
|
[](https://pypi.org/project/footprinter-cli/)
|
|
5
5
|
|
|
6
6
|
**A local context layer for your files, browser history, chats, and email — searchable, user-owned, and served to AI agents through [MCP](https://modelcontextprotocol.io/).**
|
|
7
7
|
|
|
8
|
-
Your work lives across
|
|
8
|
+
Your work lives across filesystems, browsers, inboxes, chat histories, and other tools. Footprinter indexes those sources into a single local store, organizes them into the projects and groupings you define, and serves the result to AI agents through a governed access layer. You control what the agent can see. Everything stays on your machine.
|
|
9
9
|
|
|
10
10
|
## Prerequisites
|
|
11
11
|
|
|
@@ -19,10 +19,10 @@ The fastest path on a clean machine is the install script — it ensures Python
|
|
|
19
19
|
|
|
20
20
|
```bash
|
|
21
21
|
# Base install (CLI + MCP + HTTP API)
|
|
22
|
-
curl -fsSL https://raw.githubusercontent.com/
|
|
22
|
+
curl -fsSL https://raw.githubusercontent.com/harringjohn/footprinter-cli/main/scripts/release/install.sh | bash
|
|
23
23
|
|
|
24
24
|
# Full install (adds semantic search + document parsing)
|
|
25
|
-
curl -fsSL https://raw.githubusercontent.com/
|
|
25
|
+
curl -fsSL https://raw.githubusercontent.com/harringjohn/footprinter-cli/main/scripts/release/install-full.sh | bash
|
|
26
26
|
```
|
|
27
27
|
|
|
28
28
|
If you prefer to manage the install yourself, use **pipx** (recommended) — it isolates Footprinter and sidesteps the macOS install caveats noted below:
|
|
@@ -108,7 +108,7 @@ Once configured, Claude can search your files, browse projects, and find related
|
|
|
108
108
|
| **Documents** | PDF, Word, Excel, PowerPoint content (with `[parse]` extra) |
|
|
109
109
|
| **Semantic embeddings** | Conceptual similarity across all sources (with `[semantic]` extra) |
|
|
110
110
|
|
|
111
|
-
What lands in the database — and when — is controlled by the **content storage tier** you opt into. By default, Footprinter only indexes metadata; it does not read your file content until you explicitly enable it. See [Content Storage](https://github.com/
|
|
111
|
+
What lands in the database — and when — is controlled by the **content storage tier** you opt into. By default, Footprinter only indexes metadata; it does not read your file content until you explicitly enable it. See [Content Storage](https://github.com/harringjohn/footprinter-cli/blob/main/reference/content-storage.md) for the full breakdown.
|
|
112
112
|
|
|
113
113
|
Additional sources are available through [connector plugins](#connectors).
|
|
114
114
|
|
|
@@ -161,22 +161,22 @@ Sources are scanned into SQLite with bidirectional links connecting local files
|
|
|
161
161
|
|
|
162
162
|
## Documentation
|
|
163
163
|
|
|
164
|
-
- [Interfaces](https://github.com/
|
|
165
|
-
- [Data Model](https://github.com/
|
|
166
|
-
- [Pipeline](https://github.com/
|
|
167
|
-
- [Content Storage](https://github.com/
|
|
168
|
-
- [Access Control](https://github.com/
|
|
164
|
+
- [Interfaces](https://github.com/harringjohn/footprinter-cli/blob/main/reference/interfaces.md) — CLI commands, MCP tools, Python API
|
|
165
|
+
- [Data Model](https://github.com/harringjohn/footprinter-cli/blob/main/reference/data-model.md) — database schema
|
|
166
|
+
- [Pipeline](https://github.com/harringjohn/footprinter-cli/blob/main/reference/pipeline.md) — indexing stages and configuration
|
|
167
|
+
- [Content Storage](https://github.com/harringjohn/footprinter-cli/blob/main/reference/content-storage.md) — metadata vs. snippet vs. full-content tiers
|
|
168
|
+
- [Access Control](https://github.com/harringjohn/footprinter-cli/blob/main/reference/mcp-access-control.md) — MCP security model
|
|
169
169
|
|
|
170
170
|
## Contributing
|
|
171
171
|
|
|
172
|
-
Bug fixes, documentation, and tests welcome. For new features or architectural changes, [open an issue](https://github.com/
|
|
172
|
+
Bug fixes, documentation, and tests welcome. For new features or architectural changes, [open an issue](https://github.com/harringjohn/footprinter-cli/issues) first to discuss the approach.
|
|
173
173
|
|
|
174
174
|
Connector plugins use an internal API that isn't stable yet — we're not accepting connector contributions at this time.
|
|
175
175
|
|
|
176
176
|
### Development setup
|
|
177
177
|
|
|
178
178
|
```bash
|
|
179
|
-
git clone https://github.com/
|
|
179
|
+
git clone https://github.com/harringjohn/footprinter-cli.git
|
|
180
180
|
cd footprinter
|
|
181
181
|
python3 -m venv venv
|
|
182
182
|
./venv/bin/pip install -e ".[dev]"
|
|
@@ -202,7 +202,7 @@ python3 -m venv venv
|
|
|
202
202
|
4. Run the test suite
|
|
203
203
|
5. Submit a PR targeting `main`
|
|
204
204
|
|
|
205
|
-
Never commit API keys, tokens, or credentials. Report security vulnerabilities privately — see [SECURITY.md](https://github.com/
|
|
205
|
+
Never commit API keys, tokens, or credentials. Report security vulnerabilities privately — see [SECURITY.md](https://github.com/harringjohn/footprinter-cli/blob/main/SECURITY.md).
|
|
206
206
|
|
|
207
207
|
### Pull request expectations
|
|
208
208
|
|
|
@@ -213,13 +213,13 @@ Never commit API keys, tokens, or credentials. Report security vulnerabilities p
|
|
|
213
213
|
|
|
214
214
|
All PRs are reviewed by the maintainer. Expect reviews within one week. CI must pass before review begins.
|
|
215
215
|
|
|
216
|
-
No Contributor License Agreement required. By submitting a PR, you agree your contribution is licensed under the project's [MIT License](https://github.com/
|
|
216
|
+
No Contributor License Agreement required. By submitting a PR, you agree your contribution is licensed under the project's [MIT License](https://github.com/harringjohn/footprinter-cli/blob/main/LICENSE).
|
|
217
217
|
|
|
218
218
|
## Community
|
|
219
219
|
|
|
220
|
-
- [Code of Conduct](https://github.com/
|
|
221
|
-
- [Security Policy](https://github.com/
|
|
220
|
+
- [Code of Conduct](https://github.com/harringjohn/footprinter-cli/blob/main/CODE_OF_CONDUCT.md)
|
|
221
|
+
- [Security Policy](https://github.com/harringjohn/footprinter-cli/blob/main/SECURITY.md)
|
|
222
222
|
|
|
223
223
|
## License
|
|
224
224
|
|
|
225
|
-
MIT — see [LICENSE](https://github.com/
|
|
225
|
+
MIT — see [LICENSE](https://github.com/harringjohn/footprinter-cli/blob/main/LICENSE).
|
|
@@ -78,9 +78,12 @@ exclusions:
|
|
|
78
78
|
# Indexing configuration - INDEX ALL FILE TYPES
|
|
79
79
|
indexing:
|
|
80
80
|
supported_extensions: [] # Empty = index ALL file types
|
|
81
|
-
max_file_size_mb:
|
|
81
|
+
max_file_size_mb: 50 # MB; 0 = no size limit. 50 is generous for prose/docs.
|
|
82
82
|
lookback_days: 14 # Browser history window (days back to index)
|
|
83
83
|
content_snippets: false # Extract file/email content previews for keyword search
|
|
84
|
+
# `fp ingest --preview` (FPR-1723) — pre-scan summary tuning
|
|
85
|
+
preview_top_n: 10 # rows shown for top files / top directories
|
|
86
|
+
preview_size_threshold_mb: 50 # files at or above this size are flagged as outliers
|
|
84
87
|
|
|
85
88
|
# Semantic search — stores content as embeddings in a local ChromaDB database
|
|
86
89
|
# Enables finding files and chats by meaning, not just keywords
|
|
@@ -104,6 +107,11 @@ vectorization:
|
|
|
104
107
|
- .txt
|
|
105
108
|
- .pdf
|
|
106
109
|
- .docx
|
|
110
|
+
# Maximum file size (MB) eligible for vectorization. Applies even when
|
|
111
|
+
# indexing.max_file_size_mb is 0 (no limit). Chunking a multi-GB file
|
|
112
|
+
# produces millions of chunks and can OOM the host — this guardrail is
|
|
113
|
+
# always on. Skipped files appear in the post-ingest summary.
|
|
114
|
+
max_vectorize_size_mb: 100
|
|
107
115
|
# Chunk size in characters — tuned for MiniLM-L6-v2 (256-token input window).
|
|
108
116
|
# ~1000 chars ≈ 250 tokens. Larger chunks get silently truncated by the model,
|
|
109
117
|
# meaning content past the window is invisible to semantic search.
|
|
@@ -29,10 +29,9 @@ def main(argv=None) -> None:
|
|
|
29
29
|
|
|
30
30
|
if argv is None:
|
|
31
31
|
argv = _sys.argv[1:]
|
|
32
|
-
from footprinter.source_registry import ConfigError as _ConfigError
|
|
33
|
-
|
|
34
32
|
from footprinter import __version__
|
|
35
33
|
from footprinter.cli._common import FORMATTER
|
|
34
|
+
from footprinter.source_registry import ConfigError as _ConfigError
|
|
36
35
|
|
|
37
36
|
parser = argparse.ArgumentParser(
|
|
38
37
|
prog="fp",
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Shared post-ingest vectorization stage helper (FPR-1721).
|
|
2
|
+
|
|
3
|
+
Phased ingest: ``fp setup`` and ``fp ingest`` print "index is ready"
|
|
4
|
+
once the main pipeline returns, then run this follow-up stage with its
|
|
5
|
+
own progress UI. Centralized here so both call sites stay consistent.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from footprinter.cli._common import console
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _file_vectorization_in_config() -> bool:
|
|
14
|
+
"""Check config without touching the vector store."""
|
|
15
|
+
try:
|
|
16
|
+
from footprinter.source_registry import get_config
|
|
17
|
+
|
|
18
|
+
return bool(get_config().get("semantic", {}).get("file_vectorization", False))
|
|
19
|
+
except Exception: # ConfigError, ImportError, etc. — treat as disabled
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def run_vectorization_stage(*, quiet: bool = False) -> None:
|
|
24
|
+
"""Run vectorization as a follow-up stage after the main pipeline.
|
|
25
|
+
|
|
26
|
+
No-op when ``semantic.file_vectorization`` is disabled. On failure
|
|
27
|
+
the wizard/ingest run continues — vectorization is best-effort.
|
|
28
|
+
"""
|
|
29
|
+
if not _file_vectorization_in_config():
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
from footprinter.ingest.orchestrator import DataPipelineOrchestrator
|
|
33
|
+
|
|
34
|
+
if not quiet:
|
|
35
|
+
console.print()
|
|
36
|
+
console.print("[green]✓ Your Footprinter index is ready to use.[/green]")
|
|
37
|
+
console.print()
|
|
38
|
+
console.print("[bold]Deep Read (semantic search)[/bold] is running in the background.")
|
|
39
|
+
console.print(
|
|
40
|
+
"A message will appear here when complete. [yellow]Do not close this window.[/yellow]"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
orchestrator = DataPipelineOrchestrator()
|
|
44
|
+
progress = None
|
|
45
|
+
task_id = None
|
|
46
|
+
try:
|
|
47
|
+
if not quiet:
|
|
48
|
+
from rich.progress import (
|
|
49
|
+
BarColumn,
|
|
50
|
+
MofNCompleteColumn,
|
|
51
|
+
Progress,
|
|
52
|
+
SpinnerColumn,
|
|
53
|
+
TextColumn,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
progress = Progress(
|
|
57
|
+
SpinnerColumn(),
|
|
58
|
+
TextColumn("{task.description}"),
|
|
59
|
+
BarColumn(),
|
|
60
|
+
MofNCompleteColumn(),
|
|
61
|
+
console=console,
|
|
62
|
+
transient=True,
|
|
63
|
+
)
|
|
64
|
+
progress.start()
|
|
65
|
+
task_id = progress.add_task("[cyan]Vectorizing files[/cyan]", total=None)
|
|
66
|
+
|
|
67
|
+
def on_progress(count: int) -> None:
|
|
68
|
+
if progress is not None and task_id is not None:
|
|
69
|
+
progress.update(task_id, completed=count)
|
|
70
|
+
|
|
71
|
+
result = orchestrator.run_vectorization(on_progress=on_progress)
|
|
72
|
+
|
|
73
|
+
if progress is not None:
|
|
74
|
+
progress.stop()
|
|
75
|
+
|
|
76
|
+
if quiet:
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
status = result.status.value
|
|
80
|
+
data = result.data or {}
|
|
81
|
+
if status == "skipped":
|
|
82
|
+
return
|
|
83
|
+
new = data.get("vectorized_new", 0)
|
|
84
|
+
failed = data.get("vectorized_failed", 0)
|
|
85
|
+
skipped_missing = data.get("vectorized_skipped_missing", 0)
|
|
86
|
+
skipped_large = data.get("vectorized_skipped_large", 0)
|
|
87
|
+
skipped_large_files = data.get("skipped_large_files") or []
|
|
88
|
+
if status == "completed_with_errors" or failed:
|
|
89
|
+
console.print(
|
|
90
|
+
f" [yellow]⚠[/yellow] Deep Read: {new} embedded, {failed} failed, "
|
|
91
|
+
f"{skipped_missing} skipped"
|
|
92
|
+
+ (f", {skipped_large} too large" if skipped_large else "")
|
|
93
|
+
)
|
|
94
|
+
else:
|
|
95
|
+
trail = ""
|
|
96
|
+
if skipped_missing:
|
|
97
|
+
trail += f", {skipped_missing} skipped"
|
|
98
|
+
if skipped_large:
|
|
99
|
+
trail += f", {skipped_large} too large"
|
|
100
|
+
console.print(f" [green]✓[/green] Deep Read complete: {new} embedded" + trail)
|
|
101
|
+
if skipped_large_files:
|
|
102
|
+
from footprinter.cli._policy_helpers import abbreviate_home
|
|
103
|
+
|
|
104
|
+
console.print(
|
|
105
|
+
f" [yellow]⚠[/yellow] Skipped {len(skipped_large_files)}"
|
|
106
|
+
f" file(s) over vectorize cap:"
|
|
107
|
+
)
|
|
108
|
+
for entry in skipped_large_files:
|
|
109
|
+
size_mb = entry.get("size_bytes", 0) / (1024 * 1024)
|
|
110
|
+
console.print(f" {size_mb:>7.1f} MB {abbreviate_home(entry.get('path', ''))}")
|
|
111
|
+
except Exception as e: # Intentional broad catch: follow-up stage must not crash setup/ingest
|
|
112
|
+
if progress is not None:
|
|
113
|
+
progress.stop()
|
|
114
|
+
if not quiet:
|
|
115
|
+
console.print(f" [yellow]Vectorization warning:[/yellow] {e}")
|
|
116
|
+
finally:
|
|
117
|
+
orchestrator.close()
|
|
@@ -113,7 +113,8 @@ def _check_fda() -> Check:
|
|
|
113
113
|
return Check(
|
|
114
114
|
"fda",
|
|
115
115
|
"WARN",
|
|
116
|
-
"Cannot read Safari History.db — grant Full Disk Access to your terminal in
|
|
116
|
+
"Cannot read Safari History.db — grant Full Disk Access to your terminal in"
|
|
117
|
+
" System Settings > Privacy & Security",
|
|
117
118
|
)
|
|
118
119
|
|
|
119
120
|
|
|
@@ -127,7 +128,8 @@ def _check_semantic_deps() -> Check:
|
|
|
127
128
|
return Check(
|
|
128
129
|
"semantic_deps",
|
|
129
130
|
"WARN",
|
|
130
|
-
f"Optional semantic search dependencies not installed: {', '.join(missing)}
|
|
131
|
+
f"Optional semantic search dependencies not installed: {', '.join(missing)}"
|
|
132
|
+
" — install with: pipx install --force 'footprinter-cli[full]'",
|
|
131
133
|
)
|
|
132
134
|
|
|
133
135
|
|
|
@@ -141,7 +143,8 @@ def _check_parse_deps() -> Check:
|
|
|
141
143
|
return Check(
|
|
142
144
|
"parse_deps",
|
|
143
145
|
"WARN",
|
|
144
|
-
f"Optional parsing dependencies not installed: {', '.join(missing)}
|
|
146
|
+
f"Optional parsing dependencies not installed: {', '.join(missing)}"
|
|
147
|
+
" — install with: pipx install --force 'footprinter-cli[full]'",
|
|
145
148
|
)
|
|
146
149
|
|
|
147
150
|
|
|
@@ -34,6 +34,7 @@ def _build_parser(subparsers, name):
|
|
|
34
34
|
" fp ingest --pipe local_files,browser Specific internal pipes\n"
|
|
35
35
|
" fp ingest --rebuild-vectors Rebuild vectors (incremental)\n"
|
|
36
36
|
" fp ingest --rebuild-vectors full Rebuild vectors (full reset)\n"
|
|
37
|
+
" fp ingest --preview Pre-scan summary (no ingest)\n"
|
|
37
38
|
" fp ingest status Show pipeline diagnostics\n"
|
|
38
39
|
" fp ingest import export.zip Import a chat export"
|
|
39
40
|
),
|
|
@@ -85,6 +86,16 @@ def _build_parser(subparsers, name):
|
|
|
85
86
|
default=None,
|
|
86
87
|
help="Run a single rebuild phase (default: all). Only used with --rebuild-vectors",
|
|
87
88
|
)
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
"--preview",
|
|
91
|
+
action="store_true",
|
|
92
|
+
help=(
|
|
93
|
+
"Pre-scan configured directories and print a summary "
|
|
94
|
+
"(file counts by extension, top-N largest files/directories, "
|
|
95
|
+
"outliers above size threshold) without ingesting or vectorizing. "
|
|
96
|
+
"In a TTY, prompts to proceed with the real ingest."
|
|
97
|
+
),
|
|
98
|
+
)
|
|
88
99
|
parser.add_argument(
|
|
89
100
|
"--repair-fts",
|
|
90
101
|
action="store_true",
|
|
@@ -187,6 +198,10 @@ def _handle_ingest(args) -> None:
|
|
|
187
198
|
)
|
|
188
199
|
return
|
|
189
200
|
|
|
201
|
+
if getattr(args, "preview", False):
|
|
202
|
+
_ingest_preview(args)
|
|
203
|
+
return
|
|
204
|
+
|
|
190
205
|
action = getattr(args, "ingest_action", None)
|
|
191
206
|
|
|
192
207
|
if action is None:
|
|
@@ -489,6 +504,166 @@ def _ingest_pipeline(args) -> None:
|
|
|
489
504
|
console.print("[dim]Interrupted.[/dim]")
|
|
490
505
|
sys.exit(130)
|
|
491
506
|
|
|
507
|
+
# FPR-1721: vectorization runs as a follow-up stage when local_files was touched.
|
|
508
|
+
if pipes is None or "local_files" in pipes:
|
|
509
|
+
from footprinter.cli._vectorize_stage import run_vectorization_stage
|
|
510
|
+
|
|
511
|
+
run_vectorization_stage(quiet=quiet)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
# Defaults for the preview render. Configurable via the indexing.preview_*
|
|
515
|
+
# config keys; tests pass plain configs without those keys.
|
|
516
|
+
_PREVIEW_TOP_N_DEFAULT = 10
|
|
517
|
+
_PREVIEW_OUTLIER_THRESHOLD_MB_DEFAULT = 50
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def _stdout_is_tty() -> bool:
|
|
521
|
+
"""Patch point for the preview prompt: True iff the user is at a terminal.
|
|
522
|
+
|
|
523
|
+
Wraps ``sys.stdout.isatty()`` so tests can override the result without
|
|
524
|
+
having to dodge ``run_fp``'s in-test stdout swap (which would otherwise
|
|
525
|
+
re-route the patch to the wrong StringIO).
|
|
526
|
+
"""
|
|
527
|
+
return sys.stdout.isatty()
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _format_bytes(n: int) -> str:
|
|
531
|
+
"""Human-readable byte size (binary units)."""
|
|
532
|
+
units = ("B", "KiB", "MiB", "GiB", "TiB")
|
|
533
|
+
size = float(n)
|
|
534
|
+
for unit in units:
|
|
535
|
+
if size < 1024 or unit == units[-1]:
|
|
536
|
+
return f"{size:.1f} {unit}"
|
|
537
|
+
size /= 1024
|
|
538
|
+
return f"{n} B"
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def _render_preview_plain(summary, *, threshold_bytes: int) -> str:
|
|
542
|
+
"""Render a one-line, machine-friendly preview (used in --quiet mode)."""
|
|
543
|
+
by_ext = summary.by_extension()
|
|
544
|
+
ext_part = ", ".join(f"{ext}={n}" for ext, n in sorted(by_ext.items(), key=lambda kv: kv[1], reverse=True))
|
|
545
|
+
return (
|
|
546
|
+
f"preview: files={summary.total_files} bytes={summary.total_bytes} "
|
|
547
|
+
f"outliers={len(summary.outliers())} threshold={threshold_bytes} {ext_part}"
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _render_preview(summary, *, threshold_bytes: int, console_):
|
|
552
|
+
"""Render a ScanSummary to the Rich console."""
|
|
553
|
+
from rich.table import Table
|
|
554
|
+
|
|
555
|
+
console_.print()
|
|
556
|
+
console_.print(
|
|
557
|
+
f"[bold]Preview[/bold] [dim]({summary.total_files} files, "
|
|
558
|
+
f"{_format_bytes(summary.total_bytes)} total)[/dim]"
|
|
559
|
+
)
|
|
560
|
+
console_.print()
|
|
561
|
+
|
|
562
|
+
by_ext = summary.by_extension()
|
|
563
|
+
if by_ext:
|
|
564
|
+
ext_table = Table(title="Files by extension", show_edge=False)
|
|
565
|
+
ext_table.add_column("Extension")
|
|
566
|
+
ext_table.add_column("Count", justify="right")
|
|
567
|
+
for ext, count in sorted(by_ext.items(), key=lambda kv: kv[1], reverse=True):
|
|
568
|
+
ext_table.add_row(ext, str(count))
|
|
569
|
+
console_.print(ext_table)
|
|
570
|
+
console_.print()
|
|
571
|
+
|
|
572
|
+
top_files = summary.top_files()
|
|
573
|
+
if top_files:
|
|
574
|
+
files_table = Table(title=f"Top {len(top_files)} largest files", show_edge=False)
|
|
575
|
+
files_table.add_column("Size", justify="right")
|
|
576
|
+
files_table.add_column("Path")
|
|
577
|
+
for entry in top_files:
|
|
578
|
+
files_table.add_row(_format_bytes(int(entry.get("file_size") or 0)), entry["file_path"])
|
|
579
|
+
console_.print(files_table)
|
|
580
|
+
console_.print()
|
|
581
|
+
|
|
582
|
+
top_dirs = summary.top_directories()
|
|
583
|
+
if top_dirs:
|
|
584
|
+
dirs_table = Table(title=f"Top {len(top_dirs)} largest directories", show_edge=False)
|
|
585
|
+
dirs_table.add_column("Size", justify="right")
|
|
586
|
+
dirs_table.add_column("Directory")
|
|
587
|
+
for path, total in top_dirs:
|
|
588
|
+
dirs_table.add_row(_format_bytes(total), path)
|
|
589
|
+
console_.print(dirs_table)
|
|
590
|
+
console_.print()
|
|
591
|
+
|
|
592
|
+
outliers = summary.outliers()
|
|
593
|
+
if outliers:
|
|
594
|
+
out_table = Table(
|
|
595
|
+
title=f"Outliers ≥ {_format_bytes(threshold_bytes)}",
|
|
596
|
+
show_edge=False,
|
|
597
|
+
)
|
|
598
|
+
out_table.add_column("Size", justify="right")
|
|
599
|
+
out_table.add_column("Path")
|
|
600
|
+
for entry in outliers:
|
|
601
|
+
out_table.add_row(_format_bytes(int(entry.get("file_size") or 0)), entry["file_path"])
|
|
602
|
+
console_.print(out_table)
|
|
603
|
+
console_.print()
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def _ingest_preview(args) -> None:
|
|
607
|
+
"""Pre-scan configured directories and print a summary (FPR-1723).
|
|
608
|
+
|
|
609
|
+
No DB writes, no vectorization. Always prints a summary so ``--preview``
|
|
610
|
+
is meaningful even in scripts: ``--quiet`` switches to a single-line
|
|
611
|
+
plain-text summary, and the interactive prompt is shown only when
|
|
612
|
+
``stdout`` is a TTY and ``--quiet`` is not set.
|
|
613
|
+
|
|
614
|
+
Acquires the same exclusive run lock as ``fp ingest`` so a preview cannot
|
|
615
|
+
race a real ingest scan over the same directories.
|
|
616
|
+
"""
|
|
617
|
+
import fcntl
|
|
618
|
+
|
|
619
|
+
from footprinter.ingest.file_scanner import FileScanner
|
|
620
|
+
from footprinter.ingest.scan_summary import ScanSummary
|
|
621
|
+
from footprinter.paths import get_run_lock_path
|
|
622
|
+
from footprinter.source_registry import ConfigError, get_config
|
|
623
|
+
|
|
624
|
+
quiet = getattr(args, "quiet", False)
|
|
625
|
+
|
|
626
|
+
try:
|
|
627
|
+
config = get_config()
|
|
628
|
+
except ConfigError as e:
|
|
629
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
630
|
+
sys.exit(1)
|
|
631
|
+
|
|
632
|
+
indexing = config.get("indexing", {}) or {}
|
|
633
|
+
top_n = int(indexing.get("preview_top_n") or _PREVIEW_TOP_N_DEFAULT)
|
|
634
|
+
threshold_mb = indexing.get("preview_size_threshold_mb")
|
|
635
|
+
if threshold_mb is None:
|
|
636
|
+
threshold_mb = _PREVIEW_OUTLIER_THRESHOLD_MB_DEFAULT
|
|
637
|
+
threshold_bytes = int(threshold_mb) * 1024 * 1024
|
|
638
|
+
|
|
639
|
+
lock_path = get_run_lock_path()
|
|
640
|
+
lock_fd = open(lock_path, "w")
|
|
641
|
+
try:
|
|
642
|
+
try:
|
|
643
|
+
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
644
|
+
except BlockingIOError:
|
|
645
|
+
console.print("[red]Error:[/red] Another fp ingest is already in progress.")
|
|
646
|
+
sys.exit(1)
|
|
647
|
+
|
|
648
|
+
scanner = FileScanner(config)
|
|
649
|
+
summary = ScanSummary(top_n=top_n, outlier_threshold_bytes=threshold_bytes)
|
|
650
|
+
for entry in scanner.scan_all_directories(skip_hashing=True):
|
|
651
|
+
summary.add(entry)
|
|
652
|
+
finally:
|
|
653
|
+
lock_fd.close()
|
|
654
|
+
|
|
655
|
+
if quiet:
|
|
656
|
+
print(_render_preview_plain(summary, threshold_bytes=threshold_bytes))
|
|
657
|
+
else:
|
|
658
|
+
_render_preview(summary, threshold_bytes=threshold_bytes, console_=console)
|
|
659
|
+
|
|
660
|
+
if quiet or not _stdout_is_tty():
|
|
661
|
+
return
|
|
662
|
+
|
|
663
|
+
answer = input("Proceed with ingest? [y/N] ").strip().lower()
|
|
664
|
+
if answer == "y":
|
|
665
|
+
_ingest_pipeline(args)
|
|
666
|
+
|
|
492
667
|
|
|
493
668
|
def _ingest_status(args) -> None:
|
|
494
669
|
"""Show pipeline diagnostics (data counts)."""
|
|
@@ -540,7 +715,8 @@ def _ingest_import(args) -> None:
|
|
|
540
715
|
messages = result.get("messages_imported", 0)
|
|
541
716
|
errors = result.get("errors", 0)
|
|
542
717
|
console.print(
|
|
543
|
-
f"[green]Imported[/green] {added + updated} chats
|
|
718
|
+
f"[green]Imported[/green] {added + updated} chats"
|
|
719
|
+
f" ({added} new, {updated} updated), {messages} messages"
|
|
544
720
|
)
|
|
545
721
|
if errors:
|
|
546
722
|
console.print(f"[yellow]Warning:[/yellow] {errors} chats failed to import")
|
|
@@ -589,3 +765,9 @@ def _ingest_refresh(args) -> None:
|
|
|
589
765
|
except KeyboardInterrupt:
|
|
590
766
|
console.print("[dim]Interrupted.[/dim]")
|
|
591
767
|
sys.exit(130)
|
|
768
|
+
|
|
769
|
+
# FPR-1721: vectorization follow-up when this refresh touched local_files.
|
|
770
|
+
if "local_files" in stages:
|
|
771
|
+
from footprinter.cli._vectorize_stage import run_vectorization_stage
|
|
772
|
+
|
|
773
|
+
run_vectorization_stage(quiet=quiet)
|
|
@@ -845,6 +845,12 @@ def run_interactive_wizard():
|
|
|
845
845
|
# _offer_csv_import_wizard can open it and insert rows. Asking earlier
|
|
846
846
|
# (in Data Sources) would silently skip on fresh installs.
|
|
847
847
|
_offer_csv_import_wizard()
|
|
848
|
+
# FPR-1721: phased ingest — main pipeline returned, so the index is
|
|
849
|
+
# usable now. Print the "ready" line then run vectorization with its
|
|
850
|
+
# own progress UI as a follow-up.
|
|
851
|
+
from footprinter.cli._vectorize_stage import run_vectorization_stage
|
|
852
|
+
|
|
853
|
+
run_vectorization_stage()
|
|
848
854
|
else:
|
|
849
855
|
console.print(" [dim]Skipped. Run later: fp ingest[/dim]")
|
|
850
856
|
|