footprinter-cli 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +431 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
- footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
- footprinter/bundled/samples/visible-file-sample.txt +2 -0
- footprinter/cli/__init__.py +135 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +327 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/_sample_seed.py +204 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +543 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +2001 -0
- footprinter/cli/status.py +747 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +602 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +724 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +487 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +315 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +223 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +112 -0
- footprinter/ingest/pipe_runner.py +200 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +186 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +67 -0
- footprinter/mcp/errors.py +105 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +19 -0
- footprinter/paths.py +117 -0
- footprinter/permissions.py +1152 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1264 -0
- footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
- footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
- footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hash computation utilities for Footprinter.
|
|
3
|
+
|
|
4
|
+
Provides consistent hash computation for both local files and Google Drive matching.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def compute_md5(file_path: str) -> Optional[str]:
|
|
15
|
+
"""
|
|
16
|
+
Compute MD5 hash matching Google Drive's md5Checksum.
|
|
17
|
+
|
|
18
|
+
Google Drive uses MD5 for file checksums, so this enables
|
|
19
|
+
hash-based matching between local files and Drive files.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
file_path: Path to the file to hash
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
32-character lowercase hex MD5 hash, or None on error
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
hash_md5 = hashlib.md5()
|
|
29
|
+
with open(file_path, "rb") as f:
|
|
30
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
31
|
+
hash_md5.update(chunk)
|
|
32
|
+
return hash_md5.hexdigest()
|
|
33
|
+
except (IOError, OSError) as e:
|
|
34
|
+
logger.debug(f"Could not compute MD5 for {file_path}: {e}")
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def compute_sha256(file_path: str) -> Optional[str]:
|
|
39
|
+
"""
|
|
40
|
+
Compute SHA-256 hash for content deduplication.
|
|
41
|
+
|
|
42
|
+
SHA-256 is used for content deduplication and integrity checks
|
|
43
|
+
within the local file system.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
file_path: Path to the file to hash
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
64-character lowercase hex SHA-256 hash, or None on error
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
hash_sha256 = hashlib.sha256()
|
|
53
|
+
with open(file_path, "rb") as f:
|
|
54
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
55
|
+
hash_sha256.update(chunk)
|
|
56
|
+
return hash_sha256.hexdigest()
|
|
57
|
+
except (IOError, OSError) as e:
|
|
58
|
+
logger.debug(f"Could not compute SHA-256 for {file_path}: {e}")
|
|
59
|
+
return None
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Centralized logging configuration for Footprinter."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
_configured = False
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def setup_logging(level=None):
|
|
12
|
+
"""Configure the root logger. Only the first call takes effect.
|
|
13
|
+
|
|
14
|
+
Level resolution order:
|
|
15
|
+
1. Explicit ``level`` argument (if provided)
|
|
16
|
+
2. ``LOG_LEVEL`` environment variable (e.g. ``LOG_LEVEL=DEBUG``)
|
|
17
|
+
3. Falls back to INFO
|
|
18
|
+
"""
|
|
19
|
+
global _configured
|
|
20
|
+
if _configured:
|
|
21
|
+
return
|
|
22
|
+
_configured = True
|
|
23
|
+
|
|
24
|
+
if level is None:
|
|
25
|
+
env_level = os.environ.get("LOG_LEVEL", "").upper()
|
|
26
|
+
level = getattr(logging, env_level, None) if env_level else None
|
|
27
|
+
if level is None:
|
|
28
|
+
level = logging.INFO
|
|
29
|
+
|
|
30
|
+
logging.basicConfig(
|
|
31
|
+
level=level,
|
|
32
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
33
|
+
stream=sys.stderr,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def add_file_handler(log_path: Path, level: int = logging.DEBUG) -> logging.FileHandler:
|
|
38
|
+
"""Add a file handler to the root logger for pipeline run logging.
|
|
39
|
+
|
|
40
|
+
Creates parent directories, sets a timestamped format, and suppresses
|
|
41
|
+
noisy schema migration logs. Returns the handler so it can be removed
|
|
42
|
+
after the run.
|
|
43
|
+
"""
|
|
44
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
|
|
46
|
+
handler = logging.FileHandler(str(log_path))
|
|
47
|
+
handler.setLevel(level)
|
|
48
|
+
handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
|
|
49
|
+
logging.root.addHandler(handler)
|
|
50
|
+
|
|
51
|
+
# Ensure root logger level doesn't gate the file handler.
|
|
52
|
+
# --quiet suppresses Rich console output but NOT file logging.
|
|
53
|
+
if logging.root.level > level:
|
|
54
|
+
logging.root.setLevel(level)
|
|
55
|
+
|
|
56
|
+
# Suppress schema migration noise (INFO-level chatter on every run).
|
|
57
|
+
# Uses a handler filter instead of mutating the logger level so the
|
|
58
|
+
# suppression disappears when the handler is removed.
|
|
59
|
+
handler.addFilter(_schema_noise_filter)
|
|
60
|
+
|
|
61
|
+
return handler
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _schema_noise_filter(record: logging.LogRecord) -> bool:
|
|
65
|
+
"""Allow all records except low-level schema migration noise."""
|
|
66
|
+
if record.name.startswith("footprinter.ingest.db.schema"):
|
|
67
|
+
return record.levelno >= logging.WARNING
|
|
68
|
+
return True
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""MIME type to content type mapping.
|
|
2
|
+
|
|
3
|
+
Used by both the orchestrator and Drive files adapter.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def mime_to_content_type(mime_type: str) -> str:
|
|
8
|
+
"""Convert MIME type to short content type string.
|
|
9
|
+
|
|
10
|
+
Returns a short label for known types (e.g. "pdf", "gdoc"),
|
|
11
|
+
or truncates the MIME subtype to 8 chars for unknown types.
|
|
12
|
+
Returns "unknown" for falsy input.
|
|
13
|
+
"""
|
|
14
|
+
if not mime_type:
|
|
15
|
+
return "unknown"
|
|
16
|
+
mime_map = {
|
|
17
|
+
"application/pdf": "pdf",
|
|
18
|
+
"application/vnd.google-apps.document": "gdoc",
|
|
19
|
+
"application/vnd.google-apps.spreadsheet": "gsheet",
|
|
20
|
+
"application/vnd.google-apps.presentation": "gslides",
|
|
21
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
|
22
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
|
23
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
|
24
|
+
"text/plain": "txt",
|
|
25
|
+
"text/csv": "csv",
|
|
26
|
+
"image/jpeg": "jpg",
|
|
27
|
+
"image/png": "png",
|
|
28
|
+
"video/mp4": "mp4",
|
|
29
|
+
}
|
|
30
|
+
return mime_map.get(mime_type, mime_type.split("/")[-1][:8])
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Time utilities for consistent UTC timestamp handling."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
5
|
+
UTC_FMT = "%Y-%m-%d %H:%M:%S"
|
|
6
|
+
"""Timestamp format matching SQLite CURRENT_TIMESTAMP: YYYY-MM-DD HH:MM:SS (UTC)."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def utc_now_iso() -> str:
|
|
10
|
+
"""Current UTC time in SQLite CURRENT_TIMESTAMP format."""
|
|
11
|
+
return datetime.now(timezone.utc).strftime(UTC_FMT)
|