footprinter-cli 1.1.0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/PKG-INFO +1 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/bundled/config.example.yaml +31 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/mcp_setup.py +13 -13
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/setup.py +21 -18
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/clients.py +3 -2
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/folders.py +2 -2
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/projects.py +1 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/search.py +186 -28
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/db/ddl.py +27 -7
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/tools/navigation.py +15 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/tools/read.py +15 -2
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/tools/search.py +7 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/tools/semantic.py +5 -3
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/semantic/hybrid_search.py +8 -2
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/semantic/vector_store.py +64 -4
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/access_service.py +92 -6
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/chat_service.py +4 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/client_service.py +2 -0
- footprinter_cli-1.1.1/footprinter/services/content_service.py +429 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/email_service.py +4 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/file_service.py +4 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/folder_service.py +2 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/project_service.py +2 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/search_service.py +10 -2
- footprinter_cli-1.1.1/footprinter/services/semantic_service.py +716 -0
- footprinter_cli-1.1.1/footprinter/utils/context_md.py +172 -0
- footprinter_cli-1.1.1/footprinter/utils/text.py +66 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter_cli.egg-info/PKG-INFO +1 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter_cli.egg-info/SOURCES.txt +1 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/pyproject.toml +1 -1
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_e2e_install.py +16 -16
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_inherit_resolution.py +8 -6
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_security_layer.py +3 -2
- footprinter_cli-1.1.1/tests/test_verify_install.py +443 -0
- footprinter_cli-1.1.0/footprinter/services/content_service.py +0 -191
- footprinter_cli-1.1.0/footprinter/services/semantic_service.py +0 -380
- footprinter_cli-1.1.0/footprinter/utils/text.py +0 -6
- footprinter_cli-1.1.0/tests/test_verify_install.py +0 -136
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/LICENSE +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/README.md +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/access_stamper.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/api/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/api/__main__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/api/db.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/api/entities.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/api/search.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/api/semantic.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/api/server.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/api/status.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/bundled/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/__main__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/_common.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/_policy_helpers.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/_prompt.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/_vectorize_stage.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/add.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/connect.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/delete.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/diagnostics.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/doctor.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/ingest.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/permission_cmd.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/search.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/status.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/uninstall.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/update.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/cli/view.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/connectors/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/connectors/config_utils.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/browser.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/chats.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/emails.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/files.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/messages.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/policies.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/protocols.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/sql_utils.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/status.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db/uploads.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/db_base.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/adapters/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/adapters/browser.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/adapters/chat.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/adapters/ingest.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/adapters/local_files.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/adapters/local_folders.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/adapters/protocol.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/browser_indexer.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/chat_indexer.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/chat_parsers/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/chat_parsers/chatgpt_parser.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/chat_parsers/claude_code_parser.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/chat_parsers/claude_parser.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/content_extractors.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/database.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/db/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/db/connector_schema.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/db/fts.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/db/schema.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/file_indexer.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/file_scanner.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/folder_indexer.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/full_content_extractor.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/orchestrator.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/pipe_runner.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/processing.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/registry.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/run_record.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/scan_summary.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/status.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/ingest/vector_ops.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/__main__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/db.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/errors.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/resources/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/resources/context.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/resources/discoverability.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/server.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/tools/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/mcp/tools/status.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/paths.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/permissions.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/policy_resolver.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/semantic/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/semantic/chunking.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/semantic/embeddings.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/includes.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/ingest_service.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/roles.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/status_service.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/services/visit_service.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/source_registry.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/__init__.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/exceptions.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/extraction.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/hash_utils.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/logging_config.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/mime.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/paths.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/sqlite_errors.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/utils/time.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter/visibility.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter_cli.egg-info/dependency_links.txt +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter_cli.egg-info/entry_points.txt +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter_cli.egg-info/requires.txt +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/footprinter_cli.egg-info/top_level.txt +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/setup.cfg +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_access_control_bypasses.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_access_control_docs.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_access_recalculate.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_access_source_provenance.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_build_status_filter.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_bundled.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_config_limits.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_conftest_config.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_db_base.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_e2e_pipeline.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_edit_recalculate.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_examples.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_files_rename.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_files_surface.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_logging.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_no_issue_ids.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_no_project_root.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_package_init.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_paths_no_test_marker.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_pip_install_e2e.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_policy_resolver.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_prompt_safety.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_qa_dispatch.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_resolver.py +0 -0
- {footprinter_cli-1.1.0 → footprinter_cli-1.1.1}/tests/test_security_permissions.py +0 -0
|
@@ -86,8 +86,27 @@ exclusions:
|
|
|
86
86
|
indexing:
|
|
87
87
|
supported_extensions: [] # Empty = index ALL file types
|
|
88
88
|
max_file_size_mb: 50 # MB; 0 = no size limit. 50 is generous for prose/docs.
|
|
89
|
+
# Cap on how many bytes the MCP read path (footprinter_read) pulls from a local
|
|
90
|
+
# file before decode/extraction. MB; 0 = no cap (read the whole file).
|
|
91
|
+
# Default 10 MB closes the old 500 KB silent-truncation gap so the read tool can
|
|
92
|
+
# return content the index already holds.
|
|
93
|
+
# The returned content is ALWAYS bounded to a payload-safe size regardless of
|
|
94
|
+
# this cap: a single MCP tool result has a ~1 MB protocol payload ceiling, so an
|
|
95
|
+
# oversized read returns a marked, in-budget slice (output_truncated) plus a
|
|
96
|
+
# pointer to semantic/keyword search — never a hard failure. That bound is a
|
|
97
|
+
# UTF-8 BYTE budget (sliced on a code-point boundary) sized under the ~1 MB JSON
|
|
98
|
+
# wall with headroom for JSON-escape expansion of multibyte content (CJK/emoji)
|
|
99
|
+
# and the result envelope — not a character count, so non-Latin scripts stay
|
|
100
|
+
# safe. Raise this cap to read more bytes from large files; for large documents,
|
|
101
|
+
# prefer search.
|
|
102
|
+
max_read_size_mb: 10
|
|
89
103
|
lookback_days: 14 # Browser history window (days back to index)
|
|
90
|
-
|
|
104
|
+
# Opt-in (default off). When true, ingest reads each file/email and stores a
|
|
105
|
+
# short preview (~1000 chars/file) so keyword search matches file content, not
|
|
106
|
+
# just names. Cost: extra read work during ingest + the stored preview in SQLite.
|
|
107
|
+
# Enabling this after the first ingest backfills already-indexed files only on
|
|
108
|
+
# the next `fp ingest` run; new ingests populate previews automatically.
|
|
109
|
+
content_snippets: false
|
|
91
110
|
|
|
92
111
|
# Semantic search — stores content as embeddings in a local ChromaDB database
|
|
93
112
|
# Enables finding files and chats by meaning, not just keywords
|
|
@@ -99,6 +118,17 @@ semantic:
|
|
|
99
118
|
# Add 'unlisted' to also embed hidden/dot-files.
|
|
100
119
|
# vectorize_statuses:
|
|
101
120
|
# - listed
|
|
121
|
+
# Semantic file results — how much matched content to return per result.
|
|
122
|
+
# A chunk is ~1000 chars ≈ 250 tokens. Payload size ≈ max_chunks_per_file ×
|
|
123
|
+
# max_chunk_chars, so raise these for more context at higher token cost.
|
|
124
|
+
# Max characters of each matched chunk to return. Default 1000. 0 = no cap
|
|
125
|
+
# (return the whole chunk).
|
|
126
|
+
max_chunk_chars: 1000
|
|
127
|
+
# How many matched chunks to return per file, best-relevance first. Default 3.
|
|
128
|
+
# Clamped to [1, 20] — values above the cap are bounded to keep payloads sane.
|
|
129
|
+
# The per-result `chunks` list is returned only on the vector (semantic) path;
|
|
130
|
+
# the FTS5 keyword fallback (used when semantic search is unavailable) omits it.
|
|
131
|
+
max_chunks_per_file: 3
|
|
102
132
|
|
|
103
133
|
# Vectorization — controls what gets embedded for semantic search
|
|
104
134
|
# Requires semantic.file_vectorization: true to take effect for files
|
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
MCP Configuration Helper for AI clients.
|
|
3
3
|
|
|
4
4
|
Detects config paths for MCP clients, generates the correct
|
|
5
|
-
MCP server
|
|
5
|
+
MCP server config block for this Footprinter installation, and optionally writes it.
|
|
6
6
|
|
|
7
7
|
Usage:
|
|
8
|
-
fp setup mcp # Print MCP
|
|
9
|
-
fp setup mcp --claude # Write/merge
|
|
8
|
+
fp setup mcp # Print MCP config block to paste
|
|
9
|
+
fp setup mcp --claude # Write/merge config block into Claude Desktop config (with backup)
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import json
|
|
@@ -125,8 +125,8 @@ def get_mcp_command(project_root: Path = None) -> tuple[str, list[str]]:
|
|
|
125
125
|
return sys.executable, ["-m", "footprinter.mcp"]
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def
|
|
129
|
-
"""Generate the MCP server config
|
|
128
|
+
def generate_config_block(project_root: Path = None) -> dict:
|
|
129
|
+
"""Generate the MCP server config block as a dict.
|
|
130
130
|
|
|
131
131
|
Args:
|
|
132
132
|
project_root: Override project root (default: auto-detected).
|
|
@@ -151,13 +151,13 @@ def generate_snippet(project_root: Path = None) -> dict:
|
|
|
151
151
|
return {"mcpServers": {"footprinter": server_config}}
|
|
152
152
|
|
|
153
153
|
|
|
154
|
-
def write_config(
|
|
155
|
-
"""Write or merge the MCP
|
|
154
|
+
def write_config(config_block: dict, config_path: Path = None) -> bool:
|
|
155
|
+
"""Write or merge the MCP config block into Claude Desktop config.
|
|
156
156
|
|
|
157
157
|
Creates a backup before modifying an existing file.
|
|
158
158
|
|
|
159
159
|
Args:
|
|
160
|
-
|
|
160
|
+
config_block: The config block dict from generate_config_block().
|
|
161
161
|
config_path: Override config path (default: auto-detected).
|
|
162
162
|
|
|
163
163
|
Returns:
|
|
@@ -182,7 +182,7 @@ def write_config(snippet: dict, config_path: Path = None) -> bool:
|
|
|
182
182
|
# Merge: add/update mcpServers.footprinter
|
|
183
183
|
if "mcpServers" not in existing:
|
|
184
184
|
existing["mcpServers"] = {}
|
|
185
|
-
existing["mcpServers"]["footprinter"] =
|
|
185
|
+
existing["mcpServers"]["footprinter"] = config_block["mcpServers"]["footprinter"]
|
|
186
186
|
|
|
187
187
|
# Backup existing file
|
|
188
188
|
if path.exists():
|
|
@@ -298,13 +298,13 @@ def print_client_paths():
|
|
|
298
298
|
console.print(table)
|
|
299
299
|
|
|
300
300
|
|
|
301
|
-
def
|
|
302
|
-
"""Display the MCP
|
|
301
|
+
def print_config_block(config_block: dict):
|
|
302
|
+
"""Display the MCP config block for manual pasting.
|
|
303
303
|
|
|
304
304
|
Args:
|
|
305
|
-
|
|
305
|
+
config_block: The config block dict from generate_config_block().
|
|
306
306
|
"""
|
|
307
|
-
json_str = json.dumps(
|
|
307
|
+
json_str = json.dumps(config_block, indent=2)
|
|
308
308
|
console.print()
|
|
309
309
|
console.print("Add this to your MCP client config:")
|
|
310
310
|
console.print(Panel(json_str, title="MCP Config"))
|
|
@@ -233,17 +233,17 @@ def _handle_setup(args) -> None:
|
|
|
233
233
|
|
|
234
234
|
def _add_mcp_parser(subparsers, *, formatter_class=None):
|
|
235
235
|
"""Add the MCP subparser with --claude flag."""
|
|
236
|
-
kwargs = {"help": "Show or install the MCP server
|
|
236
|
+
kwargs = {"help": "Show or install the MCP server config block"}
|
|
237
237
|
if formatter_class:
|
|
238
238
|
kwargs.update(
|
|
239
239
|
description=(
|
|
240
|
-
"Show the MCP server
|
|
241
|
-
"Bare command prints the JSON
|
|
240
|
+
"Show the MCP server config block for AI clients, or write it automatically.\n\n"
|
|
241
|
+
"Bare command prints the JSON config block for manual copy/paste into any MCP client.\n"
|
|
242
242
|
"Use --claude to write it directly to Claude Desktop config (creates a backup)."
|
|
243
243
|
),
|
|
244
244
|
epilog=(
|
|
245
245
|
"examples:\n"
|
|
246
|
-
" fp setup mcp Print MCP
|
|
246
|
+
" fp setup mcp Print MCP config block for manual config\n"
|
|
247
247
|
" fp setup mcp --claude Write to Claude Desktop config (creates backup)"
|
|
248
248
|
),
|
|
249
249
|
formatter_class=formatter_class,
|
|
@@ -252,7 +252,7 @@ def _add_mcp_parser(subparsers, *, formatter_class=None):
|
|
|
252
252
|
parser.add_argument(
|
|
253
253
|
"--claude",
|
|
254
254
|
action="store_true",
|
|
255
|
-
help="Write/merge
|
|
255
|
+
help="Write/merge config block into Claude Desktop config (creates backup)",
|
|
256
256
|
)
|
|
257
257
|
return parser
|
|
258
258
|
|
|
@@ -263,14 +263,14 @@ def _dispatch_mcp(args) -> None:
|
|
|
263
263
|
console.print("[red]MCP protocol library not installed.[/red] Run: pip install mcp")
|
|
264
264
|
sys.exit(1)
|
|
265
265
|
|
|
266
|
-
|
|
266
|
+
config_block = mcp_setup.generate_config_block()
|
|
267
267
|
|
|
268
268
|
if getattr(args, "claude", False):
|
|
269
|
-
ok = mcp_setup.write_config(
|
|
269
|
+
ok = mcp_setup.write_config(config_block)
|
|
270
270
|
sys.exit(0 if ok else 1)
|
|
271
271
|
|
|
272
|
-
# Default: print
|
|
273
|
-
mcp_setup.
|
|
272
|
+
# Default: print config block
|
|
273
|
+
mcp_setup.print_config_block(config_block)
|
|
274
274
|
|
|
275
275
|
|
|
276
276
|
def _handle_setup_inner(args) -> None:
|
|
@@ -842,12 +842,13 @@ def collect_vectorization_answers(
|
|
|
842
842
|
"""
|
|
843
843
|
existing_vec = (existing or {}).get("vectorization", {})
|
|
844
844
|
existing_semantic = (existing or {}).get("semantic", {})
|
|
845
|
-
# Fresh installs default to
|
|
846
|
-
#
|
|
845
|
+
# Fresh installs default to OFF: content snippets are an informed opt-in
|
|
846
|
+
# (they read and store file content), so the wizard presents the choice
|
|
847
|
+
# without pre-selecting it. Reconfigure runs preserve the user's prior choice.
|
|
847
848
|
if existing is not None and "indexing" in existing and "content_snippets" in existing["indexing"]:
|
|
848
849
|
snippets_default = existing["indexing"]["content_snippets"]
|
|
849
850
|
else:
|
|
850
|
-
snippets_default =
|
|
851
|
+
snippets_default = False
|
|
851
852
|
file_types = existing_vec.get("file_types", list(DEFAULT_FILE_TYPES))
|
|
852
853
|
existing_excludes = existing_vec.get("exclude_patterns", [])
|
|
853
854
|
|
|
@@ -868,7 +869,9 @@ def collect_vectorization_answers(
|
|
|
868
869
|
" [bold]Local only[/bold]: previews are written to a local SQLite database\n"
|
|
869
870
|
" on your machine. Nothing is uploaded or shared, and the MCP client only sees\n"
|
|
870
871
|
" content when you grant explicit permission via fp permission.\n"
|
|
871
|
-
" [dim]Trade-off: Footprinter keeps a stored copy of file (and connector) previews on disk.[/dim]"
|
|
872
|
+
" [dim]Trade-off: Footprinter keeps a stored copy of file (and connector) previews on disk.[/dim]\n"
|
|
873
|
+
" [dim]Enabling this later only backfills already-indexed files when you\n"
|
|
874
|
+
" re-run `fp ingest`; new ingests populate previews automatically.[/dim]"
|
|
872
875
|
)
|
|
873
876
|
content_snippets = Confirm.ask(" Enable file content snippets?", default=snippets_default)
|
|
874
877
|
|
|
@@ -1333,25 +1336,25 @@ def offer_setup_claude() -> bool:
|
|
|
1333
1336
|
return False
|
|
1334
1337
|
|
|
1335
1338
|
try:
|
|
1336
|
-
|
|
1339
|
+
config_block = mcp_setup.generate_config_block()
|
|
1337
1340
|
except Exception as e: # Intentional broad catch: user-facing CLI; errors shown to console, not re-raised
|
|
1338
1341
|
console.print(f" [yellow]MCP setup failed: {e}[/yellow]")
|
|
1339
1342
|
console.print(" [dim]Run manually: fp setup mcp --claude[/dim]")
|
|
1340
1343
|
return False
|
|
1341
1344
|
|
|
1342
|
-
# Offer
|
|
1345
|
+
# Offer config block for manual copy/paste (Cursor, Windsurf, etc.)
|
|
1343
1346
|
if Confirm.ask(
|
|
1344
|
-
"\nView MCP config
|
|
1347
|
+
"\nView MCP config block (for Claude Code, Cursor, VS Code, and other clients)?",
|
|
1345
1348
|
default=True,
|
|
1346
1349
|
):
|
|
1347
|
-
mcp_setup.
|
|
1350
|
+
mcp_setup.print_config_block(config_block)
|
|
1348
1351
|
|
|
1349
1352
|
# Offer Claude Desktop auto-config
|
|
1350
1353
|
if not Confirm.ask("\nConfigure Claude Desktop automatically?", default=False):
|
|
1351
1354
|
return False
|
|
1352
1355
|
|
|
1353
1356
|
try:
|
|
1354
|
-
mcp_setup.write_config(
|
|
1357
|
+
mcp_setup.write_config(config_block)
|
|
1355
1358
|
console.print(" [green]Claude Desktop MCP configured.[/green]")
|
|
1356
1359
|
return True
|
|
1357
1360
|
except Exception as e: # Intentional broad catch: user-facing CLI; errors shown to console, not re-raised
|
|
@@ -227,7 +227,7 @@ def get_client(conn: sqlite3.Connection, client_id: int) -> Optional[dict]:
|
|
|
227
227
|
cursor.execute(
|
|
228
228
|
"""SELECT id, name, slug, client_type, status,
|
|
229
229
|
visibility, access,
|
|
230
|
-
visibility_source, access_source
|
|
230
|
+
visibility_source, access_source, context_path
|
|
231
231
|
FROM clients WHERE id = ?""",
|
|
232
232
|
(client_id,),
|
|
233
233
|
)
|
|
@@ -245,6 +245,7 @@ def get_client(conn: sqlite3.Connection, client_id: int) -> Optional[dict]:
|
|
|
245
245
|
"access": row["access"] or "inherit",
|
|
246
246
|
"visibility_source": row["visibility_source"],
|
|
247
247
|
"access_source": row["access_source"],
|
|
248
|
+
"context_path": row["context_path"],
|
|
248
249
|
}
|
|
249
250
|
|
|
250
251
|
# Attached projects
|
|
@@ -282,7 +283,7 @@ def find_by_name_fuzzy(conn: sqlite3.Connection, name: str) -> list[dict]:
|
|
|
282
283
|
rows = conn.execute(
|
|
283
284
|
"""SELECT id, name, slug, client_type, status,
|
|
284
285
|
created_at, visibility, access,
|
|
285
|
-
visibility_source, access_source
|
|
286
|
+
visibility_source, access_source, context_path
|
|
286
287
|
FROM clients WHERE name LIKE ?""",
|
|
287
288
|
(f"%{name}%",),
|
|
288
289
|
).fetchall()
|
|
@@ -171,7 +171,7 @@ def get_folder_by_path(conn: sqlite3.Connection, path: str) -> dict | None:
|
|
|
171
171
|
"""SELECT id, path, relative_path, name, source, status,
|
|
172
172
|
direct_file_count, total_size_bytes, scanned_at,
|
|
173
173
|
project_id, external_id, account, visibility, access,
|
|
174
|
-
visibility_source, access_source
|
|
174
|
+
visibility_source, access_source, context_path
|
|
175
175
|
FROM folders WHERE path = ?""",
|
|
176
176
|
(path,),
|
|
177
177
|
).fetchone()
|
|
@@ -184,7 +184,7 @@ def get_folder_by_relative_path(conn: sqlite3.Connection, relative_path: str) ->
|
|
|
184
184
|
"""SELECT id, path, relative_path, name, source, status,
|
|
185
185
|
direct_file_count, total_size_bytes, scanned_at,
|
|
186
186
|
project_id, external_id, account, visibility, access,
|
|
187
|
-
visibility_source, access_source
|
|
187
|
+
visibility_source, access_source, context_path
|
|
188
188
|
FROM folders WHERE relative_path = ?""",
|
|
189
189
|
(relative_path,),
|
|
190
190
|
).fetchone()
|
|
@@ -65,7 +65,7 @@ def find_by_name_fuzzy(conn: sqlite3.Connection, name: str) -> list[dict]:
|
|
|
65
65
|
rows = conn.execute(
|
|
66
66
|
"""SELECT id, name, status, client,
|
|
67
67
|
description, visibility, access,
|
|
68
|
-
visibility_source, access_source
|
|
68
|
+
visibility_source, access_source, context_path
|
|
69
69
|
FROM projects
|
|
70
70
|
WHERE name LIKE ?""",
|
|
71
71
|
(f"%{name}%",),
|
|
@@ -18,6 +18,7 @@ from footprinter.db.sql_utils import (
|
|
|
18
18
|
paginated_response,
|
|
19
19
|
split_query_terms,
|
|
20
20
|
)
|
|
21
|
+
from footprinter.utils.text import EXCERPT_BUDGET, build_excerpt
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
def _status_clause(
|
|
@@ -196,7 +197,8 @@ def search_files_keyword(
|
|
|
196
197
|
f"""
|
|
197
198
|
SELECT file.id, file.source, file.name, file.path, file.content_type,
|
|
198
199
|
file.size_bytes, file.modified_at, file.account, file.mime_type,
|
|
199
|
-
file.visibility, file.status, file.status_reason,
|
|
200
|
+
file.visibility, file.access, file.status, file.status_reason,
|
|
201
|
+
file.content_preview,
|
|
200
202
|
project.name AS project_name, client.name AS client
|
|
201
203
|
FROM files file
|
|
202
204
|
{fts_join}
|
|
@@ -209,25 +211,44 @@ def search_files_keyword(
|
|
|
209
211
|
params,
|
|
210
212
|
).fetchall()
|
|
211
213
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
214
|
+
# Function-local import: services.access_service pulls in db.chats/files,
|
|
215
|
+
# which load this module via db/__init__, so a module-level import here
|
|
216
|
+
# would close an import cycle. Deferring to call time breaks the loop while
|
|
217
|
+
# still using the one canonical permission resolver.
|
|
218
|
+
from footprinter.services.access_service import resolve_inherit_permission
|
|
219
|
+
|
|
220
|
+
results = []
|
|
221
|
+
for r in rows:
|
|
222
|
+
# File-excerpt precedence: content_preview when present and the
|
|
223
|
+
# canonical resolver puts access at 'allow', else name/path. Using
|
|
224
|
+
# resolve_inherit_permission honors a global-deny policy for 'inherit'
|
|
225
|
+
# files and fails closed on NULL/unexpected values, so the in-DB
|
|
226
|
+
# excerpt decision agrees with the service-layer content-gating net.
|
|
227
|
+
if r["content_preview"] and resolve_inherit_permission(r["access"]) == "allow":
|
|
228
|
+
excerpt_fields = build_excerpt(r["content_preview"], source="content_preview")
|
|
229
|
+
else:
|
|
230
|
+
excerpt_fields = build_excerpt(f"{r['name']} — {r['path'] or ''}", source="title")
|
|
231
|
+
results.append(
|
|
232
|
+
{
|
|
233
|
+
"id": r["id"],
|
|
234
|
+
"source": r["source"],
|
|
235
|
+
"name": r["name"],
|
|
236
|
+
"path": r["path"],
|
|
237
|
+
"content_type": r["content_type"],
|
|
238
|
+
"size_bytes": r["size_bytes"],
|
|
239
|
+
"modified_at": r["modified_at"],
|
|
240
|
+
"account": r["account"],
|
|
241
|
+
"mime_type": r["mime_type"],
|
|
242
|
+
"project": r["project_name"],
|
|
243
|
+
"client": r["client"],
|
|
244
|
+
"visibility": r["visibility"],
|
|
245
|
+
"access": r["access"],
|
|
246
|
+
"status": r["status"],
|
|
247
|
+
"status_reason": r["status_reason"],
|
|
248
|
+
**excerpt_fields,
|
|
249
|
+
}
|
|
250
|
+
)
|
|
251
|
+
return results
|
|
231
252
|
|
|
232
253
|
|
|
233
254
|
def search_emails_keyword(
|
|
@@ -322,7 +343,7 @@ def search_emails_keyword(
|
|
|
322
343
|
"received_at": r["received_at"],
|
|
323
344
|
"account": r["account"],
|
|
324
345
|
"labels": r["labels"],
|
|
325
|
-
|
|
346
|
+
**build_excerpt(r["body_preview"] or "", source="body_preview"),
|
|
326
347
|
"project_name": r["project_name"],
|
|
327
348
|
"client_name": r["client_name"],
|
|
328
349
|
"visibility": r["visibility"],
|
|
@@ -335,6 +356,122 @@ def search_emails_keyword(
|
|
|
335
356
|
]
|
|
336
357
|
|
|
337
358
|
|
|
359
|
+
# Bounds on the per-hit message fetch in ``chat_message_excerpt``. The excerpt
|
|
360
|
+
# only ever surfaces a ~``EXCERPT_BUDGET``-char window (centered on the first
|
|
361
|
+
# query-term match, or position 0 for title-only hits), so loading the whole
|
|
362
|
+
# conversation is wasted CPU/memory. Two guards cap the fetch:
|
|
363
|
+
# ``_CHAT_EXCERPT_MSG_LIMIT`` caps row transfer for chats with many short
|
|
364
|
+
# messages, and ``_CHAT_EXCERPT_FETCH_BUDGET`` caps total characters for chats
|
|
365
|
+
# with a few enormous messages. The char budget
|
|
366
|
+
# is a comfortable multiple of ``EXCERPT_BUDGET`` so a normal chat's window is
|
|
367
|
+
# byte-for-byte unchanged; the row cap is large enough that no normal chat's
|
|
368
|
+
# leading content is truncated before the char budget is hit.
|
|
369
|
+
_CHAT_EXCERPT_MSG_LIMIT = 64
|
|
370
|
+
_CHAT_EXCERPT_FETCH_BUDGET = EXCERPT_BUDGET * 4
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def chat_message_excerpt(
|
|
374
|
+
conn: sqlite3.Connection,
|
|
375
|
+
chat_id: int,
|
|
376
|
+
query: str,
|
|
377
|
+
title: str,
|
|
378
|
+
*,
|
|
379
|
+
status: "str | list[str] | None" = None,
|
|
380
|
+
) -> dict:
|
|
381
|
+
"""Re-derive a chat's excerpt from its message content at query time.
|
|
382
|
+
|
|
383
|
+
Keyword chat search matches the title only, so the re-derived excerpt is a
|
|
384
|
+
query-centered window over the conversation's message content (the opening
|
|
385
|
+
of the conversation when the hit was on the title). Falls back to the chat
|
|
386
|
+
``title`` only when the chat has no listed message content. The excerpt
|
|
387
|
+
fields are built via the shared :func:`build_excerpt` so the size budget and
|
|
388
|
+
provenance match every other content-bearing source.
|
|
389
|
+
|
|
390
|
+
The message fetch is **bounded** to ``_CHAT_EXCERPT_MSG_LIMIT`` leading rows
|
|
391
|
+
(SQL ``LIMIT``, kept in ``ORDER BY id`` so the *opening* of the conversation
|
|
392
|
+
is what's loaded) and to ``_CHAT_EXCERPT_FETCH_BUDGET`` characters of
|
|
393
|
+
accumulated content. The excerpt only ever surfaces a ~``EXCERPT_BUDGET``
|
|
394
|
+
window, so a bound several times that budget reproduces the identical window
|
|
395
|
+
for any normal-sized chat while stopping a single huge conversation from
|
|
396
|
+
ballooning memory. Because only the bounded slice is loaded,
|
|
397
|
+
``chars_available`` / ``has_more`` describe that bounded window, not the
|
|
398
|
+
entire conversation: for a chat longer than the fetch budget ``has_more``
|
|
399
|
+
can under-report. This is the documented trade of bounding the fetch; a
|
|
400
|
+
separate ``COUNT``/``SUM(length)`` query to recover the true total is
|
|
401
|
+
deliberately avoided because it reintroduces the per-hit DB work this bound
|
|
402
|
+
removes.
|
|
403
|
+
|
|
404
|
+
The lookup stays a single bounded query **per chat hit**
|
|
405
|
+
(``search_chats_keyword`` calls this once per result row). Batching across
|
|
406
|
+
hits into one ``WHERE chat_id IN (...)`` query is deferred: it would
|
|
407
|
+
complicate the per-message visibility/access gating applied here, and
|
|
408
|
+
bounding each query already addresses the memory/CPU concern. The
|
|
409
|
+
``messages.list_messages`` accessor is not reused — it orders newest-first
|
|
410
|
+
(``ORDER BY message.id DESC``), the wrong order for leading-content
|
|
411
|
+
windowing, and returns extra columns the excerpt does not need.
|
|
412
|
+
"""
|
|
413
|
+
# Imported here to avoid a module-level dependency on the optional
|
|
414
|
+
# semantic package (chromadb/onnx) from the always-loaded db layer.
|
|
415
|
+
from footprinter.semantic.hybrid_search import extract_snippet
|
|
416
|
+
|
|
417
|
+
# Function-local import: services.access_service pulls in db.chats/files,
|
|
418
|
+
# which load this module via db/__init__, so a module-level import here
|
|
419
|
+
# would close an import cycle. Deferring to call time breaks the loop while
|
|
420
|
+
# still using the one canonical resolver. (Same pattern as the file paths.)
|
|
421
|
+
from footprinter.services.access_service import (
|
|
422
|
+
resolve_inherit_permission,
|
|
423
|
+
resolve_inherit_visibility,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
status_conds, status_params = _status_clause(status, column="status")
|
|
427
|
+
# Per-row content gating now defers to the canonical resolvers rather than an
|
|
428
|
+
# inline ``access IN (...)`` / ``visibility IN (...)`` SQL predicate. The old
|
|
429
|
+
# predicate admitted ``inherit`` unconditionally; the resolvers map
|
|
430
|
+
# ``inherit`` to the cached global policy, so an ``inherit`` row resolves to
|
|
431
|
+
# deny/hidden under a global-deny/hidden policy and is dropped here — the SQL
|
|
432
|
+
# only bounds and orders the candidate rows, the visibility/access decision
|
|
433
|
+
# runs in Python below. Status filtering stays in SQL (global policy does not
|
|
434
|
+
# affect it). The resolvers carry the fail-closed posture: NULL access →
|
|
435
|
+
# deny, NULL visibility → opaque, and explicit hidden/opaque/deny still drop.
|
|
436
|
+
where = " AND ".join(["chat_id = ?", *status_conds])
|
|
437
|
+
rows = conn.execute(
|
|
438
|
+
f"""
|
|
439
|
+
SELECT content, access, visibility
|
|
440
|
+
FROM messages
|
|
441
|
+
WHERE {where}
|
|
442
|
+
ORDER BY id
|
|
443
|
+
LIMIT ?
|
|
444
|
+
""",
|
|
445
|
+
[chat_id, *status_params, _CHAT_EXCERPT_MSG_LIMIT],
|
|
446
|
+
).fetchall()
|
|
447
|
+
|
|
448
|
+
# Accumulate leading message content only until the char budget is reached,
|
|
449
|
+
# then stop — so even a single enormous message is sliced to the budget. The
|
|
450
|
+
# per-row visibility/access gate runs before accumulation, so the budget
|
|
451
|
+
# bounds only admitted content.
|
|
452
|
+
pieces: list[str] = []
|
|
453
|
+
accumulated = 0
|
|
454
|
+
for r in rows:
|
|
455
|
+
content = r["content"]
|
|
456
|
+
if not content:
|
|
457
|
+
continue
|
|
458
|
+
if (
|
|
459
|
+
resolve_inherit_permission(r["access"]) != "allow"
|
|
460
|
+
or resolve_inherit_visibility(r["visibility"]) != "full"
|
|
461
|
+
):
|
|
462
|
+
continue
|
|
463
|
+
pieces.append(content)
|
|
464
|
+
accumulated += len(content) + 1 # +1 for the "\n" join separator
|
|
465
|
+
if accumulated >= _CHAT_EXCERPT_FETCH_BUDGET:
|
|
466
|
+
break
|
|
467
|
+
full_content = "\n".join(pieces).strip()[:_CHAT_EXCERPT_FETCH_BUDGET]
|
|
468
|
+
if not full_content:
|
|
469
|
+
return build_excerpt(title or "", source="title")
|
|
470
|
+
|
|
471
|
+
window = extract_snippet(full_content, query)
|
|
472
|
+
return build_excerpt(window, source="message", chars_available=len(full_content))
|
|
473
|
+
|
|
474
|
+
|
|
338
475
|
def search_chats_keyword(
|
|
339
476
|
conn: sqlite3.Connection,
|
|
340
477
|
*,
|
|
@@ -383,6 +520,8 @@ def search_chats_keyword(
|
|
|
383
520
|
|
|
384
521
|
where_sql = (" AND ".join(where)) if where else "1=1"
|
|
385
522
|
|
|
523
|
+
query_text = " ".join(terms)
|
|
524
|
+
|
|
386
525
|
rows = conn.execute(
|
|
387
526
|
f"""
|
|
388
527
|
SELECT chat.id, chat.external_id, chat.account, chat.title,
|
|
@@ -415,6 +554,9 @@ def search_chats_keyword(
|
|
|
415
554
|
"visibility_source": r["visibility_source"],
|
|
416
555
|
"access_source": r["access_source"],
|
|
417
556
|
"status": r["status"],
|
|
557
|
+
# Re-derive the excerpt from message content; fall back to the
|
|
558
|
+
# title only when the chat has no listed message content.
|
|
559
|
+
**chat_message_excerpt(conn, r["id"], query_text, r["title"] or ""),
|
|
418
560
|
}
|
|
419
561
|
for r in rows
|
|
420
562
|
]
|
|
@@ -506,7 +648,9 @@ def chat_fts5_fallback(
|
|
|
506
648
|
"""FTS5 keyword fallback for chat search.
|
|
507
649
|
|
|
508
650
|
Returns dicts shaped for semantic_service consumption: chat_id, chat_title,
|
|
509
|
-
|
|
651
|
+
relevance_score, source, created_at, message_id, plus a ``snippet`` staging
|
|
652
|
+
field (the chat title) that ``semantic_service`` converts into the excerpt
|
|
653
|
+
contract (``excerpt_source="title"``) and removes before returning results.
|
|
510
654
|
The ``status`` kwarg defaults to listed-only; pass ``"all"`` or a list to widen.
|
|
511
655
|
"""
|
|
512
656
|
safe_query = query.replace('"', '""')
|
|
@@ -554,8 +698,14 @@ def file_fts5_fallback(
|
|
|
554
698
|
"""FTS5 keyword fallback for file search.
|
|
555
699
|
|
|
556
700
|
Returns dicts shaped for semantic_service consumption: id, source, name,
|
|
557
|
-
path, content_type, size_bytes, modified_at, relevance_score,
|
|
558
|
-
|
|
701
|
+
path, content_type, size_bytes, modified_at, relevance_score, the excerpt
|
|
702
|
+
contract (excerpt, excerpt_source, chars_returned, chars_available,
|
|
703
|
+
has_more), visibility, access.
|
|
704
|
+
|
|
705
|
+
Shape divergence: this degraded path carries **no** ``chunks`` key. The
|
|
706
|
+
per-file ``chunks`` list (top-N matched chunks) is a vector-path-only
|
|
707
|
+
enrichment attached by ``semantic_service._top_chunks_by_file``; the keyword
|
|
708
|
+
fallback has no chunk-level matches to surface.
|
|
559
709
|
The ``status`` kwarg defaults to listed-only; pass ``"all"`` or a list to widen.
|
|
560
710
|
"""
|
|
561
711
|
terms = split_query_terms(query)
|
|
@@ -579,12 +729,20 @@ def file_fts5_fallback(
|
|
|
579
729
|
[match_str, *status_params, limit],
|
|
580
730
|
).fetchall()
|
|
581
731
|
|
|
732
|
+
# Function-local import — see search_files_keyword for the cycle rationale.
|
|
733
|
+
from footprinter.services.access_service import resolve_inherit_permission
|
|
734
|
+
|
|
582
735
|
results = []
|
|
583
736
|
for row in rows:
|
|
584
|
-
|
|
585
|
-
|
|
737
|
+
# File-excerpt precedence on the keyword fallback path: content_preview
|
|
738
|
+
# when present and the canonical resolver puts access at 'allow', else
|
|
739
|
+
# name/path. resolve_inherit_permission honors a global-deny policy for
|
|
740
|
+
# 'inherit' files and fails closed on NULL — the weaker `!= "deny"`
|
|
741
|
+
# gate leaked content for NULL/inherit-under-global-deny rows.
|
|
742
|
+
if row["content_preview"] and resolve_inherit_permission(row["access"]) == "allow":
|
|
743
|
+
excerpt_fields = build_excerpt(row["content_preview"], source="content_preview")
|
|
586
744
|
else:
|
|
587
|
-
|
|
745
|
+
excerpt_fields = build_excerpt(f"{row['name']} — {row['path'] or ''}", source="title")
|
|
588
746
|
results.append(
|
|
589
747
|
{
|
|
590
748
|
"id": row["id"],
|
|
@@ -595,7 +753,7 @@ def file_fts5_fallback(
|
|
|
595
753
|
"size_bytes": row["size_bytes"],
|
|
596
754
|
"modified_at": row["modified_at"],
|
|
597
755
|
"relevance_score": 0.5,
|
|
598
|
-
|
|
756
|
+
**excerpt_fields,
|
|
599
757
|
"visibility": row["visibility"],
|
|
600
758
|
"access": row["access"],
|
|
601
759
|
"visibility_source": row["visibility_source"],
|
|
@@ -241,7 +241,10 @@ def init_schema(conn):
|
|
|
241
241
|
access_source TEXT,
|
|
242
242
|
|
|
243
243
|
-- Display
|
|
244
|
-
display_name TEXT
|
|
244
|
+
display_name TEXT,
|
|
245
|
+
|
|
246
|
+
-- Curated context (pointer to a Markdown file; nullable override)
|
|
247
|
+
context_path TEXT
|
|
245
248
|
)
|
|
246
249
|
"""
|
|
247
250
|
)
|
|
@@ -336,7 +339,10 @@ def init_schema(conn):
|
|
|
336
339
|
visibility_source TEXT,
|
|
337
340
|
|
|
338
341
|
-- Display
|
|
339
|
-
display_name TEXT
|
|
342
|
+
display_name TEXT,
|
|
343
|
+
|
|
344
|
+
-- Curated context (pointer to a Markdown file; nullable override)
|
|
345
|
+
context_path TEXT
|
|
340
346
|
)
|
|
341
347
|
"""
|
|
342
348
|
)
|
|
@@ -542,7 +548,10 @@ def init_schema(conn):
|
|
|
542
548
|
visibility_source TEXT,
|
|
543
549
|
|
|
544
550
|
-- Display
|
|
545
|
-
display_name TEXT
|
|
551
|
+
display_name TEXT,
|
|
552
|
+
|
|
553
|
+
-- Curated context (pointer to a Markdown file; nullable override)
|
|
554
|
+
context_path TEXT
|
|
546
555
|
)
|
|
547
556
|
"""
|
|
548
557
|
)
|
|
@@ -794,14 +803,25 @@ def _ensure_source_columns(conn):
|
|
|
794
803
|
|
|
795
804
|
|
|
796
805
|
_SUPER_ENTITY_COLUMNS: dict[str, list[tuple[str, str]]] = {
|
|
797
|
-
"projects": [
|
|
798
|
-
|
|
799
|
-
|
|
806
|
+
"projects": [
|
|
807
|
+
("slug", "TEXT"),
|
|
808
|
+
("status_changed_at", "DATETIME"),
|
|
809
|
+
("context_path", "TEXT"),
|
|
810
|
+
],
|
|
811
|
+
"clients": [
|
|
812
|
+
("slug", "TEXT"),
|
|
813
|
+
("status_changed_at", "DATETIME"),
|
|
814
|
+
("context_path", "TEXT"),
|
|
815
|
+
],
|
|
816
|
+
"folders": [
|
|
817
|
+
("status_changed_at", "DATETIME"),
|
|
818
|
+
("context_path", "TEXT"),
|
|
819
|
+
],
|
|
800
820
|
}
|
|
801
821
|
|
|
802
822
|
|
|
803
823
|
def _ensure_super_entity_columns(conn):
|
|
804
|
-
"""Add slug/status_changed_at to super-entity tables (idempotent
|
|
824
|
+
"""Add slug/status_changed_at/context_path to super-entity tables (idempotent)."""
|
|
805
825
|
for table, columns in _SUPER_ENTITY_COLUMNS.items():
|
|
806
826
|
for col_name, col_type in columns:
|
|
807
827
|
try:
|