footprinter-cli 1.0.2__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. {footprinter_cli-1.0.2/footprinter_cli.egg-info → footprinter_cli-1.0.3}/PKG-INFO +18 -18
  2. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/README.md +17 -17
  3. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/config.example.yaml +9 -1
  4. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/__init__.py +1 -2
  5. footprinter_cli-1.0.3/footprinter/cli/_vectorize_stage.py +117 -0
  6. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/doctor.py +6 -3
  7. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/ingest.py +183 -1
  8. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/setup.py +6 -0
  9. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/status.py +60 -1
  10. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/upsert.py +2 -3
  11. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/browser.py +2 -3
  12. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/chats.py +28 -17
  13. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/clients.py +2 -2
  14. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/files.py +11 -5
  15. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/folders.py +10 -14
  16. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/projects.py +30 -17
  17. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/search.py +1 -0
  18. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/status.py +49 -2
  19. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/cli.py +1 -1
  20. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/file_indexer.py +10 -71
  21. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/file_scanner.py +27 -12
  22. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/folder_indexer.py +2 -3
  23. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/full_content_extractor.py +25 -12
  24. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/orchestrator.py +5 -0
  25. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/processing.py +139 -1
  26. footprinter_cli-1.0.3/footprinter/ingest/scan_summary.py +78 -0
  27. footprinter_cli-1.0.3/footprinter/mcp/resources/discoverability.py +59 -0
  28. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/server.py +4 -0
  29. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/search.py +22 -4
  30. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/vector_store.py +4 -1
  31. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/project_service.py +1 -1
  32. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3/footprinter_cli.egg-info}/PKG-INFO +18 -18
  33. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/SOURCES.txt +3 -0
  34. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/pyproject.toml +1 -1
  35. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/LICENSE +0 -0
  36. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/__init__.py +0 -0
  37. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/access.py +0 -0
  38. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/__init__.py +0 -0
  39. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/db.py +0 -0
  40. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/entities.py +0 -0
  41. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/search.py +0 -0
  42. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/semantic.py +0 -0
  43. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/server.py +0 -0
  44. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/api/status.py +0 -0
  45. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/__init__.py +0 -0
  46. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/context_patterns.yaml +0 -0
  47. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/extensions.yaml +0 -0
  48. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/filename_patterns.yaml +0 -0
  49. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/mime_mappings.yaml +0 -0
  50. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/salesforce_rules.yaml +0 -0
  51. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/bundled/patterns/security_patterns.yaml +0 -0
  52. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/__main__.py +0 -0
  53. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/_common.py +0 -0
  54. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/_policy_helpers.py +0 -0
  55. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/_prompt.py +0 -0
  56. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/api_cmd.py +0 -0
  57. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/connect.py +0 -0
  58. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/data.py +0 -0
  59. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/delete.py +0 -0
  60. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/mcp_cmd.py +0 -0
  61. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/mcp_setup.py +0 -0
  62. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/search.py +0 -0
  63. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/search_cmd.py +0 -0
  64. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/status_cmd.py +0 -0
  65. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/uninstall.py +0 -0
  66. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/vectorize_cmd.py +0 -0
  67. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/cli/view.py +0 -0
  68. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/connectors/__init__.py +0 -0
  69. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/connectors/config_utils.py +0 -0
  70. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/__init__.py +0 -0
  71. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/emails.py +0 -0
  72. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/messages.py +0 -0
  73. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/policies.py +0 -0
  74. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/sql_utils.py +0 -0
  75. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/db/uploads.py +0 -0
  76. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/__init__.py +0 -0
  77. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/__init__.py +0 -0
  78. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/browser.py +0 -0
  79. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/chat.py +0 -0
  80. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/ingest.py +0 -0
  81. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/local_files.py +0 -0
  82. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/local_folders.py +0 -0
  83. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/adapters/protocol.py +0 -0
  84. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/browser_indexer.py +0 -0
  85. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/chat_indexer.py +0 -0
  86. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/chat_parsers/__init__.py +0 -0
  87. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/chat_parsers/chatgpt_parser.py +0 -0
  88. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/chat_parsers/claude_parser.py +0 -0
  89. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/content_extractors.py +0 -0
  90. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/database.py +0 -0
  91. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/__init__.py +0 -0
  92. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/connector_schema.py +0 -0
  93. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/migration.py +0 -0
  94. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/schema.py +0 -0
  95. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/db/security.py +0 -0
  96. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/pipe_runner.py +0 -0
  97. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/registry.py +0 -0
  98. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/run_record.py +0 -0
  99. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/ingest/status.py +0 -0
  100. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/__init__.py +0 -0
  101. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/__main__.py +0 -0
  102. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/db.py +0 -0
  103. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/errors.py +0 -0
  104. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/extraction.py +0 -0
  105. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/resources/__init__.py +0 -0
  106. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/resources/context.py +0 -0
  107. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/__init__.py +0 -0
  108. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/navigation.py +0 -0
  109. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/read.py +0 -0
  110. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/semantic.py +0 -0
  111. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/mcp/tools/status.py +0 -0
  112. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/paths.py +0 -0
  113. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/permissions.py +0 -0
  114. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/__init__.py +0 -0
  115. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/chunking.py +0 -0
  116. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/embeddings.py +0 -0
  117. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/semantic/hybrid_search.py +0 -0
  118. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/__init__.py +0 -0
  119. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/access_service.py +0 -0
  120. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/chat_service.py +0 -0
  121. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/client_service.py +0 -0
  122. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/content_service.py +0 -0
  123. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/email_service.py +0 -0
  124. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/file_service.py +0 -0
  125. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/folder_service.py +0 -0
  126. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/includes.py +0 -0
  127. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/ingest_service.py +0 -0
  128. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/roles.py +0 -0
  129. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/search_service.py +0 -0
  130. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/semantic_service.py +0 -0
  131. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/status_service.py +0 -0
  132. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/services/visit_service.py +0 -0
  133. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/source_registry.py +0 -0
  134. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/__init__.py +0 -0
  135. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/hash_utils.py +0 -0
  136. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/logging_config.py +0 -0
  137. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/mime.py +0 -0
  138. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/text.py +0 -0
  139. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/utils/time.py +0 -0
  140. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter/visibility.py +0 -0
  141. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/dependency_links.txt +0 -0
  142. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/entry_points.txt +0 -0
  143. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/requires.txt +0 -0
  144. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/footprinter_cli.egg-info/top_level.txt +0 -0
  145. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/setup.cfg +0 -0
  146. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_access_control_bypasses.py +0 -0
  147. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_access_control_docs.py +0 -0
  148. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_access_recalculate.py +0 -0
  149. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_build_status_filter.py +0 -0
  150. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_bundled.py +0 -0
  151. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_e2e_install.py +0 -0
  152. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_e2e_pipeline.py +0 -0
  153. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_edit_recalculate.py +0 -0
  154. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_examples.py +0 -0
  155. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_files_rename.py +0 -0
  156. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_files_surface.py +0 -0
  157. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_inherit_resolution.py +0 -0
  158. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_logging.py +0 -0
  159. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_no_project_root.py +0 -0
  160. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_package_init.py +0 -0
  161. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_paths_no_test_marker.py +0 -0
  162. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_pip_install_e2e.py +0 -0
  163. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_prompt_safety.py +0 -0
  164. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_resolver.py +0 -0
  165. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_security_layer.py +0 -0
  166. {footprinter_cli-1.0.2 → footprinter_cli-1.0.3}/tests/test_security_permissions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: footprinter-cli
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: A local context layer for your files, browser history, chats, and email — searchable, user-owned, MCP-served.
5
5
  Author: SwellCity Group
6
6
  License: MIT
@@ -52,12 +52,12 @@ Requires-Dist: httpx<1.0,>=0.27.0; extra == "dev"
52
52
 
53
53
  # Footprinter
54
54
 
55
- [![Tests](https://github.com/swellcitygroup/footprinter/actions/workflows/test.yml/badge.svg)](https://github.com/swellcitygroup/footprinter/actions/workflows/test.yml)
55
+ [![Tests](https://github.com/harringjohn/footprinter-cli/actions/workflows/test.yml/badge.svg)](https://github.com/harringjohn/footprinter-cli/actions/workflows/test.yml)
56
56
  [![PyPI](https://img.shields.io/pypi/v/footprinter-cli)](https://pypi.org/project/footprinter-cli/)
57
57
 
58
58
  **A local context layer for your files, browser history, chats, and email — searchable, user-owned, and served to AI agents through [MCP](https://modelcontextprotocol.io/).**
59
59
 
60
- Your work lives across a filesystem, a browser, an inbox, a chat history, and whatever other tools you reach for. Footprinter indexes those sources into a single local store, organizes them into the projects and groupings *you* define, and serves the result to AI agents through a governed access layer. You control what the agent can see. Everything stays on your machine.
60
+ Your work lives across filesystems, browsers, inboxes, chat histories, and other tools. Footprinter indexes those sources into a single local store, organizes them into the projects and groupings you define, and serves the result to AI agents through a governed access layer. You control what the agent can see. Everything stays on your machine.
61
61
 
62
62
  ## Prerequisites
63
63
 
@@ -71,10 +71,10 @@ The fastest path on a clean machine is the install script — it ensures Python
71
71
 
72
72
  ```bash
73
73
  # Base install (CLI + MCP + HTTP API)
74
- curl -fsSL https://raw.githubusercontent.com/swellcitygroup/footprinter/main/scripts/release/install.sh | bash
74
+ curl -fsSL https://raw.githubusercontent.com/harringjohn/footprinter-cli/main/scripts/release/install.sh | bash
75
75
 
76
76
  # Full install (adds semantic search + document parsing)
77
- curl -fsSL https://raw.githubusercontent.com/swellcitygroup/footprinter/main/scripts/release/install-full.sh | bash
77
+ curl -fsSL https://raw.githubusercontent.com/harringjohn/footprinter-cli/main/scripts/release/install-full.sh | bash
78
78
  ```
79
79
 
80
80
  If you prefer to manage the install yourself, use **pipx** (recommended) — it isolates Footprinter and sidesteps the macOS install caveats noted below:
@@ -160,7 +160,7 @@ Once configured, Claude can search your files, browse projects, and find related
160
160
  | **Documents** | PDF, Word, Excel, PowerPoint content (with `[parse]` extra) |
161
161
  | **Semantic embeddings** | Conceptual similarity across all sources (with `[semantic]` extra) |
162
162
 
163
- What lands in the database — and when — is controlled by the **content storage tier** you opt into. By default, Footprinter only indexes metadata; it does not read your file content until you explicitly enable it. See [Content Storage](https://github.com/swellcitygroup/footprinter/blob/main/reference/content-storage.md) for the full breakdown.
163
+ What lands in the database — and when — is controlled by the **content storage tier** you opt into. By default, Footprinter only indexes metadata; it does not read your file content until you explicitly enable it. See [Content Storage](https://github.com/harringjohn/footprinter-cli/blob/main/reference/content-storage.md) for the full breakdown.
164
164
 
165
165
  Additional sources are available through [connector plugins](#connectors).
166
166
 
@@ -213,22 +213,22 @@ Sources are scanned into SQLite with bidirectional links connecting local files
213
213
 
214
214
  ## Documentation
215
215
 
216
- - [Interfaces](https://github.com/swellcitygroup/footprinter/blob/main/reference/interfaces.md) — CLI commands, MCP tools, Python API
217
- - [Data Model](https://github.com/swellcitygroup/footprinter/blob/main/reference/data-model.md) — database schema
218
- - [Pipeline](https://github.com/swellcitygroup/footprinter/blob/main/reference/pipeline.md) — indexing stages and configuration
219
- - [Content Storage](https://github.com/swellcitygroup/footprinter/blob/main/reference/content-storage.md) — metadata vs. snippet vs. full-content tiers
220
- - [Access Control](https://github.com/swellcitygroup/footprinter/blob/main/reference/mcp-access-control.md) — MCP security model
216
+ - [Interfaces](https://github.com/harringjohn/footprinter-cli/blob/main/reference/interfaces.md) — CLI commands, MCP tools, Python API
217
+ - [Data Model](https://github.com/harringjohn/footprinter-cli/blob/main/reference/data-model.md) — database schema
218
+ - [Pipeline](https://github.com/harringjohn/footprinter-cli/blob/main/reference/pipeline.md) — indexing stages and configuration
219
+ - [Content Storage](https://github.com/harringjohn/footprinter-cli/blob/main/reference/content-storage.md) — metadata vs. snippet vs. full-content tiers
220
+ - [Access Control](https://github.com/harringjohn/footprinter-cli/blob/main/reference/mcp-access-control.md) — MCP security model
221
221
 
222
222
  ## Contributing
223
223
 
224
- Bug fixes, documentation, and tests welcome. For new features or architectural changes, [open an issue](https://github.com/swellcitygroup/footprinter/issues) first to discuss the approach.
224
+ Bug fixes, documentation, and tests welcome. For new features or architectural changes, [open an issue](https://github.com/harringjohn/footprinter-cli/issues) first to discuss the approach.
225
225
 
226
226
  Connector plugins use an internal API that isn't stable yet — we're not accepting connector contributions at this time.
227
227
 
228
228
  ### Development setup
229
229
 
230
230
  ```bash
231
- git clone https://github.com/swellcitygroup/footprinter.git
231
+ git clone https://github.com/harringjohn/footprinter-cli.git
232
232
  cd footprinter
233
233
  python3 -m venv venv
234
234
  ./venv/bin/pip install -e ".[dev]"
@@ -254,7 +254,7 @@ python3 -m venv venv
254
254
  4. Run the test suite
255
255
  5. Submit a PR targeting `main`
256
256
 
257
- Never commit API keys, tokens, or credentials. Report security vulnerabilities privately — see [SECURITY.md](https://github.com/swellcitygroup/footprinter/blob/main/SECURITY.md).
257
+ Never commit API keys, tokens, or credentials. Report security vulnerabilities privately — see [SECURITY.md](https://github.com/harringjohn/footprinter-cli/blob/main/SECURITY.md).
258
258
 
259
259
  ### Pull request expectations
260
260
 
@@ -265,13 +265,13 @@ Never commit API keys, tokens, or credentials. Report security vulnerabilities p
265
265
 
266
266
  All PRs are reviewed by the maintainer. Expect reviews within one week. CI must pass before review begins.
267
267
 
268
- No Contributor License Agreement required. By submitting a PR, you agree your contribution is licensed under the project's [MIT License](https://github.com/swellcitygroup/footprinter/blob/main/LICENSE).
268
+ No Contributor License Agreement required. By submitting a PR, you agree your contribution is licensed under the project's [MIT License](https://github.com/harringjohn/footprinter-cli/blob/main/LICENSE).
269
269
 
270
270
  ## Community
271
271
 
272
- - [Code of Conduct](https://github.com/swellcitygroup/footprinter/blob/main/CODE_OF_CONDUCT.md)
273
- - [Security Policy](https://github.com/swellcitygroup/footprinter/blob/main/SECURITY.md)
272
+ - [Code of Conduct](https://github.com/harringjohn/footprinter-cli/blob/main/CODE_OF_CONDUCT.md)
273
+ - [Security Policy](https://github.com/harringjohn/footprinter-cli/blob/main/SECURITY.md)
274
274
 
275
275
  ## License
276
276
 
277
- MIT — see [LICENSE](https://github.com/swellcitygroup/footprinter/blob/main/LICENSE).
277
+ MIT — see [LICENSE](https://github.com/harringjohn/footprinter-cli/blob/main/LICENSE).
@@ -1,11 +1,11 @@
1
1
  # Footprinter
2
2
 
3
- [![Tests](https://github.com/swellcitygroup/footprinter/actions/workflows/test.yml/badge.svg)](https://github.com/swellcitygroup/footprinter/actions/workflows/test.yml)
3
+ [![Tests](https://github.com/harringjohn/footprinter-cli/actions/workflows/test.yml/badge.svg)](https://github.com/harringjohn/footprinter-cli/actions/workflows/test.yml)
4
4
  [![PyPI](https://img.shields.io/pypi/v/footprinter-cli)](https://pypi.org/project/footprinter-cli/)
5
5
 
6
6
  **A local context layer for your files, browser history, chats, and email — searchable, user-owned, and served to AI agents through [MCP](https://modelcontextprotocol.io/).**
7
7
 
8
- Your work lives across a filesystem, a browser, an inbox, a chat history, and whatever other tools you reach for. Footprinter indexes those sources into a single local store, organizes them into the projects and groupings *you* define, and serves the result to AI agents through a governed access layer. You control what the agent can see. Everything stays on your machine.
8
+ Your work lives across filesystems, browsers, inboxes, chat histories, and other tools. Footprinter indexes those sources into a single local store, organizes them into the projects and groupings you define, and serves the result to AI agents through a governed access layer. You control what the agent can see. Everything stays on your machine.
9
9
 
10
10
  ## Prerequisites
11
11
 
@@ -19,10 +19,10 @@ The fastest path on a clean machine is the install script — it ensures Python
19
19
 
20
20
  ```bash
21
21
  # Base install (CLI + MCP + HTTP API)
22
- curl -fsSL https://raw.githubusercontent.com/swellcitygroup/footprinter/main/scripts/release/install.sh | bash
22
+ curl -fsSL https://raw.githubusercontent.com/harringjohn/footprinter-cli/main/scripts/release/install.sh | bash
23
23
 
24
24
  # Full install (adds semantic search + document parsing)
25
- curl -fsSL https://raw.githubusercontent.com/swellcitygroup/footprinter/main/scripts/release/install-full.sh | bash
25
+ curl -fsSL https://raw.githubusercontent.com/harringjohn/footprinter-cli/main/scripts/release/install-full.sh | bash
26
26
  ```
27
27
 
28
28
  If you prefer to manage the install yourself, use **pipx** (recommended) — it isolates Footprinter and sidesteps the macOS install caveats noted below:
@@ -108,7 +108,7 @@ Once configured, Claude can search your files, browse projects, and find related
108
108
  | **Documents** | PDF, Word, Excel, PowerPoint content (with `[parse]` extra) |
109
109
  | **Semantic embeddings** | Conceptual similarity across all sources (with `[semantic]` extra) |
110
110
 
111
- What lands in the database — and when — is controlled by the **content storage tier** you opt into. By default, Footprinter only indexes metadata; it does not read your file content until you explicitly enable it. See [Content Storage](https://github.com/swellcitygroup/footprinter/blob/main/reference/content-storage.md) for the full breakdown.
111
+ What lands in the database — and when — is controlled by the **content storage tier** you opt into. By default, Footprinter only indexes metadata; it does not read your file content until you explicitly enable it. See [Content Storage](https://github.com/harringjohn/footprinter-cli/blob/main/reference/content-storage.md) for the full breakdown.
112
112
 
113
113
  Additional sources are available through [connector plugins](#connectors).
114
114
 
@@ -161,22 +161,22 @@ Sources are scanned into SQLite with bidirectional links connecting local files
161
161
 
162
162
  ## Documentation
163
163
 
164
- - [Interfaces](https://github.com/swellcitygroup/footprinter/blob/main/reference/interfaces.md) — CLI commands, MCP tools, Python API
165
- - [Data Model](https://github.com/swellcitygroup/footprinter/blob/main/reference/data-model.md) — database schema
166
- - [Pipeline](https://github.com/swellcitygroup/footprinter/blob/main/reference/pipeline.md) — indexing stages and configuration
167
- - [Content Storage](https://github.com/swellcitygroup/footprinter/blob/main/reference/content-storage.md) — metadata vs. snippet vs. full-content tiers
168
- - [Access Control](https://github.com/swellcitygroup/footprinter/blob/main/reference/mcp-access-control.md) — MCP security model
164
+ - [Interfaces](https://github.com/harringjohn/footprinter-cli/blob/main/reference/interfaces.md) — CLI commands, MCP tools, Python API
165
+ - [Data Model](https://github.com/harringjohn/footprinter-cli/blob/main/reference/data-model.md) — database schema
166
+ - [Pipeline](https://github.com/harringjohn/footprinter-cli/blob/main/reference/pipeline.md) — indexing stages and configuration
167
+ - [Content Storage](https://github.com/harringjohn/footprinter-cli/blob/main/reference/content-storage.md) — metadata vs. snippet vs. full-content tiers
168
+ - [Access Control](https://github.com/harringjohn/footprinter-cli/blob/main/reference/mcp-access-control.md) — MCP security model
169
169
 
170
170
  ## Contributing
171
171
 
172
- Bug fixes, documentation, and tests welcome. For new features or architectural changes, [open an issue](https://github.com/swellcitygroup/footprinter/issues) first to discuss the approach.
172
+ Bug fixes, documentation, and tests welcome. For new features or architectural changes, [open an issue](https://github.com/harringjohn/footprinter-cli/issues) first to discuss the approach.
173
173
 
174
174
  Connector plugins use an internal API that isn't stable yet — we're not accepting connector contributions at this time.
175
175
 
176
176
  ### Development setup
177
177
 
178
178
  ```bash
179
- git clone https://github.com/swellcitygroup/footprinter.git
179
+ git clone https://github.com/harringjohn/footprinter-cli.git
180
180
  cd footprinter
181
181
  python3 -m venv venv
182
182
  ./venv/bin/pip install -e ".[dev]"
@@ -202,7 +202,7 @@ python3 -m venv venv
202
202
  4. Run the test suite
203
203
  5. Submit a PR targeting `main`
204
204
 
205
- Never commit API keys, tokens, or credentials. Report security vulnerabilities privately — see [SECURITY.md](https://github.com/swellcitygroup/footprinter/blob/main/SECURITY.md).
205
+ Never commit API keys, tokens, or credentials. Report security vulnerabilities privately — see [SECURITY.md](https://github.com/harringjohn/footprinter-cli/blob/main/SECURITY.md).
206
206
 
207
207
  ### Pull request expectations
208
208
 
@@ -213,13 +213,13 @@ Never commit API keys, tokens, or credentials. Report security vulnerabilities p
213
213
 
214
214
  All PRs are reviewed by the maintainer. Expect reviews within one week. CI must pass before review begins.
215
215
 
216
- No Contributor License Agreement required. By submitting a PR, you agree your contribution is licensed under the project's [MIT License](https://github.com/swellcitygroup/footprinter/blob/main/LICENSE).
216
+ No Contributor License Agreement required. By submitting a PR, you agree your contribution is licensed under the project's [MIT License](https://github.com/harringjohn/footprinter-cli/blob/main/LICENSE).
217
217
 
218
218
  ## Community
219
219
 
220
- - [Code of Conduct](https://github.com/swellcitygroup/footprinter/blob/main/CODE_OF_CONDUCT.md)
221
- - [Security Policy](https://github.com/swellcitygroup/footprinter/blob/main/SECURITY.md)
220
+ - [Code of Conduct](https://github.com/harringjohn/footprinter-cli/blob/main/CODE_OF_CONDUCT.md)
221
+ - [Security Policy](https://github.com/harringjohn/footprinter-cli/blob/main/SECURITY.md)
222
222
 
223
223
  ## License
224
224
 
225
- MIT — see [LICENSE](https://github.com/swellcitygroup/footprinter/blob/main/LICENSE).
225
+ MIT — see [LICENSE](https://github.com/harringjohn/footprinter-cli/blob/main/LICENSE).
@@ -78,9 +78,12 @@ exclusions:
78
78
  # Indexing configuration - INDEX ALL FILE TYPES
79
79
  indexing:
80
80
  supported_extensions: [] # Empty = index ALL file types
81
- max_file_size_mb: 0 # 0 = no size limit (index everything)
81
+ max_file_size_mb: 50 # MB; 0 = no size limit. 50 is generous for prose/docs.
82
82
  lookback_days: 14 # Browser history window (days back to index)
83
83
  content_snippets: false # Extract file/email content previews for keyword search
84
+ # `fp ingest --preview` (FPR-1723) — pre-scan summary tuning
85
+ preview_top_n: 10 # rows shown for top files / top directories
86
+ preview_size_threshold_mb: 50 # files at or above this size are flagged as outliers
84
87
 
85
88
  # Semantic search — stores content as embeddings in a local ChromaDB database
86
89
  # Enables finding files and chats by meaning, not just keywords
@@ -104,6 +107,11 @@ vectorization:
104
107
  - .txt
105
108
  - .pdf
106
109
  - .docx
110
+ # Maximum file size (MB) eligible for vectorization. Applies even when
111
+ # indexing.max_file_size_mb is 0 (no limit). Chunking a multi-GB file
112
+ # produces millions of chunks and can OOM the host — this guardrail is
113
+ # always on. Skipped files appear in the post-ingest summary.
114
+ max_vectorize_size_mb: 100
107
115
  # Chunk size in characters — tuned for MiniLM-L6-v2 (256-token input window).
108
116
  # ~1000 chars ≈ 250 tokens. Larger chunks get silently truncated by the model,
109
117
  # meaning content past the window is invisible to semantic search.
@@ -29,10 +29,9 @@ def main(argv=None) -> None:
29
29
 
30
30
  if argv is None:
31
31
  argv = _sys.argv[1:]
32
- from footprinter.source_registry import ConfigError as _ConfigError
33
-
34
32
  from footprinter import __version__
35
33
  from footprinter.cli._common import FORMATTER
34
+ from footprinter.source_registry import ConfigError as _ConfigError
36
35
 
37
36
  parser = argparse.ArgumentParser(
38
37
  prog="fp",
@@ -0,0 +1,117 @@
1
+ """Shared post-ingest vectorization stage helper (FPR-1721).
2
+
3
+ Phased ingest: ``fp setup`` and ``fp ingest`` print "index is ready"
4
+ once the main pipeline returns, then run this follow-up stage with its
5
+ own progress UI. Centralized here so both call sites stay consistent.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from footprinter.cli._common import console
11
+
12
+
13
+ def _file_vectorization_in_config() -> bool:
14
+ """Check config without touching the vector store."""
15
+ try:
16
+ from footprinter.source_registry import get_config
17
+
18
+ return bool(get_config().get("semantic", {}).get("file_vectorization", False))
19
+ except Exception: # ConfigError, ImportError, etc. — treat as disabled
20
+ return False
21
+
22
+
23
+ def run_vectorization_stage(*, quiet: bool = False) -> None:
24
+ """Run vectorization as a follow-up stage after the main pipeline.
25
+
26
+ No-op when ``semantic.file_vectorization`` is disabled. On failure
27
+ the wizard/ingest run continues — vectorization is best-effort.
28
+ """
29
+ if not _file_vectorization_in_config():
30
+ return
31
+
32
+ from footprinter.ingest.orchestrator import DataPipelineOrchestrator
33
+
34
+ if not quiet:
35
+ console.print()
36
+ console.print("[green]✓ Your Footprinter index is ready to use.[/green]")
37
+ console.print()
38
+ console.print("[bold]Deep Read (semantic search)[/bold] is running in the background.")
39
+ console.print(
40
+ "A message will appear here when complete. [yellow]Do not close this window.[/yellow]"
41
+ )
42
+
43
+ orchestrator = DataPipelineOrchestrator()
44
+ progress = None
45
+ task_id = None
46
+ try:
47
+ if not quiet:
48
+ from rich.progress import (
49
+ BarColumn,
50
+ MofNCompleteColumn,
51
+ Progress,
52
+ SpinnerColumn,
53
+ TextColumn,
54
+ )
55
+
56
+ progress = Progress(
57
+ SpinnerColumn(),
58
+ TextColumn("{task.description}"),
59
+ BarColumn(),
60
+ MofNCompleteColumn(),
61
+ console=console,
62
+ transient=True,
63
+ )
64
+ progress.start()
65
+ task_id = progress.add_task("[cyan]Vectorizing files[/cyan]", total=None)
66
+
67
+ def on_progress(count: int) -> None:
68
+ if progress is not None and task_id is not None:
69
+ progress.update(task_id, completed=count)
70
+
71
+ result = orchestrator.run_vectorization(on_progress=on_progress)
72
+
73
+ if progress is not None:
74
+ progress.stop()
75
+
76
+ if quiet:
77
+ return
78
+
79
+ status = result.status.value
80
+ data = result.data or {}
81
+ if status == "skipped":
82
+ return
83
+ new = data.get("vectorized_new", 0)
84
+ failed = data.get("vectorized_failed", 0)
85
+ skipped_missing = data.get("vectorized_skipped_missing", 0)
86
+ skipped_large = data.get("vectorized_skipped_large", 0)
87
+ skipped_large_files = data.get("skipped_large_files") or []
88
+ if status == "completed_with_errors" or failed:
89
+ console.print(
90
+ f" [yellow]⚠[/yellow] Deep Read: {new} embedded, {failed} failed, "
91
+ f"{skipped_missing} skipped"
92
+ + (f", {skipped_large} too large" if skipped_large else "")
93
+ )
94
+ else:
95
+ trail = ""
96
+ if skipped_missing:
97
+ trail += f", {skipped_missing} skipped"
98
+ if skipped_large:
99
+ trail += f", {skipped_large} too large"
100
+ console.print(f" [green]✓[/green] Deep Read complete: {new} embedded" + trail)
101
+ if skipped_large_files:
102
+ from footprinter.cli._policy_helpers import abbreviate_home
103
+
104
+ console.print(
105
+ f" [yellow]⚠[/yellow] Skipped {len(skipped_large_files)}"
106
+ f" file(s) over vectorize cap:"
107
+ )
108
+ for entry in skipped_large_files:
109
+ size_mb = entry.get("size_bytes", 0) / (1024 * 1024)
110
+ console.print(f" {size_mb:>7.1f} MB {abbreviate_home(entry.get('path', ''))}")
111
+ except Exception as e: # Intentional broad catch: follow-up stage must not crash setup/ingest
112
+ if progress is not None:
113
+ progress.stop()
114
+ if not quiet:
115
+ console.print(f" [yellow]Vectorization warning:[/yellow] {e}")
116
+ finally:
117
+ orchestrator.close()
@@ -113,7 +113,8 @@ def _check_fda() -> Check:
113
113
  return Check(
114
114
  "fda",
115
115
  "WARN",
116
- "Cannot read Safari History.db — grant Full Disk Access to your terminal in System Settings > Privacy & Security",
116
+ "Cannot read Safari History.db — grant Full Disk Access to your terminal in"
117
+ " System Settings > Privacy & Security",
117
118
  )
118
119
 
119
120
 
@@ -127,7 +128,8 @@ def _check_semantic_deps() -> Check:
127
128
  return Check(
128
129
  "semantic_deps",
129
130
  "WARN",
130
- f"Optional semantic search dependencies not installed: {', '.join(missing)} — install with: pipx install --force 'footprinter-cli[full]'",
131
+ f"Optional semantic search dependencies not installed: {', '.join(missing)}"
132
+ " — install with: pipx install --force 'footprinter-cli[full]'",
131
133
  )
132
134
 
133
135
 
@@ -141,7 +143,8 @@ def _check_parse_deps() -> Check:
141
143
  return Check(
142
144
  "parse_deps",
143
145
  "WARN",
144
- f"Optional parsing dependencies not installed: {', '.join(missing)} — install with: pipx install --force 'footprinter-cli[full]'",
146
+ f"Optional parsing dependencies not installed: {', '.join(missing)}"
147
+ " — install with: pipx install --force 'footprinter-cli[full]'",
145
148
  )
146
149
 
147
150
 
@@ -34,6 +34,7 @@ def _build_parser(subparsers, name):
34
34
  " fp ingest --pipe local_files,browser Specific internal pipes\n"
35
35
  " fp ingest --rebuild-vectors Rebuild vectors (incremental)\n"
36
36
  " fp ingest --rebuild-vectors full Rebuild vectors (full reset)\n"
37
+ " fp ingest --preview Pre-scan summary (no ingest)\n"
37
38
  " fp ingest status Show pipeline diagnostics\n"
38
39
  " fp ingest import export.zip Import a chat export"
39
40
  ),
@@ -85,6 +86,16 @@ def _build_parser(subparsers, name):
85
86
  default=None,
86
87
  help="Run a single rebuild phase (default: all). Only used with --rebuild-vectors",
87
88
  )
89
+ parser.add_argument(
90
+ "--preview",
91
+ action="store_true",
92
+ help=(
93
+ "Pre-scan configured directories and print a summary "
94
+ "(file counts by extension, top-N largest files/directories, "
95
+ "outliers above size threshold) without ingesting or vectorizing. "
96
+ "In a TTY, prompts to proceed with the real ingest."
97
+ ),
98
+ )
88
99
  parser.add_argument(
89
100
  "--repair-fts",
90
101
  action="store_true",
@@ -187,6 +198,10 @@ def _handle_ingest(args) -> None:
187
198
  )
188
199
  return
189
200
 
201
+ if getattr(args, "preview", False):
202
+ _ingest_preview(args)
203
+ return
204
+
190
205
  action = getattr(args, "ingest_action", None)
191
206
 
192
207
  if action is None:
@@ -489,6 +504,166 @@ def _ingest_pipeline(args) -> None:
489
504
  console.print("[dim]Interrupted.[/dim]")
490
505
  sys.exit(130)
491
506
 
507
+ # FPR-1721: vectorization runs as a follow-up stage when local_files was touched.
508
+ if pipes is None or "local_files" in pipes:
509
+ from footprinter.cli._vectorize_stage import run_vectorization_stage
510
+
511
+ run_vectorization_stage(quiet=quiet)
512
+
513
+
514
+ # Defaults for the preview render. Configurable via the indexing.preview_*
515
+ # config keys; tests pass plain configs without those keys.
516
+ _PREVIEW_TOP_N_DEFAULT = 10
517
+ _PREVIEW_OUTLIER_THRESHOLD_MB_DEFAULT = 50
518
+
519
+
520
+ def _stdout_is_tty() -> bool:
521
+ """Patch point for the preview prompt: True iff the user is at a terminal.
522
+
523
+ Wraps ``sys.stdout.isatty()`` so tests can override the result without
524
+ having to dodge ``run_fp``'s in-test stdout swap (which would otherwise
525
+ re-route the patch to the wrong StringIO).
526
+ """
527
+ return sys.stdout.isatty()
528
+
529
+
530
+ def _format_bytes(n: int) -> str:
531
+ """Human-readable byte size (binary units)."""
532
+ units = ("B", "KiB", "MiB", "GiB", "TiB")
533
+ size = float(n)
534
+ for unit in units:
535
+ if size < 1024 or unit == units[-1]:
536
+ return f"{size:.1f} {unit}"
537
+ size /= 1024
538
+ return f"{n} B"
539
+
540
+
541
+ def _render_preview_plain(summary, *, threshold_bytes: int) -> str:
542
+ """Render a one-line, machine-friendly preview (used in --quiet mode)."""
543
+ by_ext = summary.by_extension()
544
+ ext_part = ", ".join(f"{ext}={n}" for ext, n in sorted(by_ext.items(), key=lambda kv: kv[1], reverse=True))
545
+ return (
546
+ f"preview: files={summary.total_files} bytes={summary.total_bytes} "
547
+ f"outliers={len(summary.outliers())} threshold={threshold_bytes} {ext_part}"
548
+ )
549
+
550
+
551
+ def _render_preview(summary, *, threshold_bytes: int, console_):
552
+ """Render a ScanSummary to the Rich console."""
553
+ from rich.table import Table
554
+
555
+ console_.print()
556
+ console_.print(
557
+ f"[bold]Preview[/bold] [dim]({summary.total_files} files, "
558
+ f"{_format_bytes(summary.total_bytes)} total)[/dim]"
559
+ )
560
+ console_.print()
561
+
562
+ by_ext = summary.by_extension()
563
+ if by_ext:
564
+ ext_table = Table(title="Files by extension", show_edge=False)
565
+ ext_table.add_column("Extension")
566
+ ext_table.add_column("Count", justify="right")
567
+ for ext, count in sorted(by_ext.items(), key=lambda kv: kv[1], reverse=True):
568
+ ext_table.add_row(ext, str(count))
569
+ console_.print(ext_table)
570
+ console_.print()
571
+
572
+ top_files = summary.top_files()
573
+ if top_files:
574
+ files_table = Table(title=f"Top {len(top_files)} largest files", show_edge=False)
575
+ files_table.add_column("Size", justify="right")
576
+ files_table.add_column("Path")
577
+ for entry in top_files:
578
+ files_table.add_row(_format_bytes(int(entry.get("file_size") or 0)), entry["file_path"])
579
+ console_.print(files_table)
580
+ console_.print()
581
+
582
+ top_dirs = summary.top_directories()
583
+ if top_dirs:
584
+ dirs_table = Table(title=f"Top {len(top_dirs)} largest directories", show_edge=False)
585
+ dirs_table.add_column("Size", justify="right")
586
+ dirs_table.add_column("Directory")
587
+ for path, total in top_dirs:
588
+ dirs_table.add_row(_format_bytes(total), path)
589
+ console_.print(dirs_table)
590
+ console_.print()
591
+
592
+ outliers = summary.outliers()
593
+ if outliers:
594
+ out_table = Table(
595
+ title=f"Outliers ≥ {_format_bytes(threshold_bytes)}",
596
+ show_edge=False,
597
+ )
598
+ out_table.add_column("Size", justify="right")
599
+ out_table.add_column("Path")
600
+ for entry in outliers:
601
+ out_table.add_row(_format_bytes(int(entry.get("file_size") or 0)), entry["file_path"])
602
+ console_.print(out_table)
603
+ console_.print()
604
+
605
+
606
+ def _ingest_preview(args) -> None:
607
+ """Pre-scan configured directories and print a summary (FPR-1723).
608
+
609
+ No DB writes, no vectorization. Always prints a summary so ``--preview``
610
+ is meaningful even in scripts: ``--quiet`` switches to a single-line
611
+ plain-text summary, and the interactive prompt is shown only when
612
+ ``stdout`` is a TTY and ``--quiet`` is not set.
613
+
614
+ Acquires the same exclusive run lock as ``fp ingest`` so a preview cannot
615
+ race a real ingest scan over the same directories.
616
+ """
617
+ import fcntl
618
+
619
+ from footprinter.ingest.file_scanner import FileScanner
620
+ from footprinter.ingest.scan_summary import ScanSummary
621
+ from footprinter.paths import get_run_lock_path
622
+ from footprinter.source_registry import ConfigError, get_config
623
+
624
+ quiet = getattr(args, "quiet", False)
625
+
626
+ try:
627
+ config = get_config()
628
+ except ConfigError as e:
629
+ console.print(f"[red]Error:[/red] {e}")
630
+ sys.exit(1)
631
+
632
+ indexing = config.get("indexing", {}) or {}
633
+ top_n = int(indexing.get("preview_top_n") or _PREVIEW_TOP_N_DEFAULT)
634
+ threshold_mb = indexing.get("preview_size_threshold_mb")
635
+ if threshold_mb is None:
636
+ threshold_mb = _PREVIEW_OUTLIER_THRESHOLD_MB_DEFAULT
637
+ threshold_bytes = int(threshold_mb) * 1024 * 1024
638
+
639
+ lock_path = get_run_lock_path()
640
+ lock_fd = open(lock_path, "w")
641
+ try:
642
+ try:
643
+ fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
644
+ except BlockingIOError:
645
+ console.print("[red]Error:[/red] Another fp ingest is already in progress.")
646
+ sys.exit(1)
647
+
648
+ scanner = FileScanner(config)
649
+ summary = ScanSummary(top_n=top_n, outlier_threshold_bytes=threshold_bytes)
650
+ for entry in scanner.scan_all_directories(skip_hashing=True):
651
+ summary.add(entry)
652
+ finally:
653
+ lock_fd.close()
654
+
655
+ if quiet:
656
+ print(_render_preview_plain(summary, threshold_bytes=threshold_bytes))
657
+ else:
658
+ _render_preview(summary, threshold_bytes=threshold_bytes, console_=console)
659
+
660
+ if quiet or not _stdout_is_tty():
661
+ return
662
+
663
+ answer = input("Proceed with ingest? [y/N] ").strip().lower()
664
+ if answer == "y":
665
+ _ingest_pipeline(args)
666
+
492
667
 
493
668
  def _ingest_status(args) -> None:
494
669
  """Show pipeline diagnostics (data counts)."""
@@ -540,7 +715,8 @@ def _ingest_import(args) -> None:
540
715
  messages = result.get("messages_imported", 0)
541
716
  errors = result.get("errors", 0)
542
717
  console.print(
543
- f"[green]Imported[/green] {added + updated} chats ({added} new, {updated} updated), {messages} messages"
718
+ f"[green]Imported[/green] {added + updated} chats"
719
+ f" ({added} new, {updated} updated), {messages} messages"
544
720
  )
545
721
  if errors:
546
722
  console.print(f"[yellow]Warning:[/yellow] {errors} chats failed to import")
@@ -589,3 +765,9 @@ def _ingest_refresh(args) -> None:
589
765
  except KeyboardInterrupt:
590
766
  console.print("[dim]Interrupted.[/dim]")
591
767
  sys.exit(130)
768
+
769
+ # FPR-1721: vectorization follow-up when this refresh touched local_files.
770
+ if "local_files" in stages:
771
+ from footprinter.cli._vectorize_stage import run_vectorization_stage
772
+
773
+ run_vectorization_stage(quiet=quiet)
@@ -845,6 +845,12 @@ def run_interactive_wizard():
845
845
  # _offer_csv_import_wizard can open it and insert rows. Asking earlier
846
846
  # (in Data Sources) would silently skip on fresh installs.
847
847
  _offer_csv_import_wizard()
848
+ # FPR-1721: phased ingest — main pipeline returned, so the index is
849
+ # usable now. Print the "ready" line then run vectorization with its
850
+ # own progress UI as a follow-up.
851
+ from footprinter.cli._vectorize_stage import run_vectorization_stage
852
+
853
+ run_vectorization_stage()
848
854
  else:
849
855
  console.print(" [dim]Skipped. Run later: fp ingest[/dim]")
850
856