footprinter-cli 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. footprinter/__init__.py +8 -0
  2. footprinter/access.py +431 -0
  3. footprinter/api/__init__.py +1 -0
  4. footprinter/api/db.py +61 -0
  5. footprinter/api/entities.py +250 -0
  6. footprinter/api/search.py +47 -0
  7. footprinter/api/semantic.py +33 -0
  8. footprinter/api/server.py +66 -0
  9. footprinter/api/status.py +15 -0
  10. footprinter/bundled/__init__.py +0 -0
  11. footprinter/bundled/config.example.yaml +161 -0
  12. footprinter/bundled/patterns/context_patterns.yaml +18 -0
  13. footprinter/bundled/patterns/extensions.yaml +283 -0
  14. footprinter/bundled/patterns/filename_patterns.yaml +61 -0
  15. footprinter/bundled/patterns/mime_mappings.yaml +68 -0
  16. footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
  17. footprinter/bundled/patterns/security_patterns.yaml +27 -0
  18. footprinter/bundled/samples/hidden-client-file-sample.txt +2 -0
  19. footprinter/bundled/samples/opaque-project-file-sample.txt +2 -0
  20. footprinter/bundled/samples/visible-file-sample.txt +2 -0
  21. footprinter/cli/__init__.py +135 -0
  22. footprinter/cli/__main__.py +6 -0
  23. footprinter/cli/_common.py +327 -0
  24. footprinter/cli/_policy_helpers.py +646 -0
  25. footprinter/cli/_prompt.py +220 -0
  26. footprinter/cli/_sample_seed.py +204 -0
  27. footprinter/cli/api_cmd.py +32 -0
  28. footprinter/cli/connect.py +591 -0
  29. footprinter/cli/data.py +879 -0
  30. footprinter/cli/delete.py +128 -0
  31. footprinter/cli/ingest.py +543 -0
  32. footprinter/cli/mcp_cmd.py +750 -0
  33. footprinter/cli/mcp_setup.py +306 -0
  34. footprinter/cli/search.py +393 -0
  35. footprinter/cli/search_cmd.py +69 -0
  36. footprinter/cli/setup.py +2001 -0
  37. footprinter/cli/status.py +747 -0
  38. footprinter/cli/status_cmd.py +104 -0
  39. footprinter/cli/upsert.py +794 -0
  40. footprinter/cli/vectorize_cmd.py +215 -0
  41. footprinter/cli/view.py +322 -0
  42. footprinter/connectors/__init__.py +171 -0
  43. footprinter/connectors/config_utils.py +141 -0
  44. footprinter/db/__init__.py +37 -0
  45. footprinter/db/browser.py +198 -0
  46. footprinter/db/chats.py +602 -0
  47. footprinter/db/clients.py +307 -0
  48. footprinter/db/emails.py +279 -0
  49. footprinter/db/files.py +724 -0
  50. footprinter/db/folders.py +659 -0
  51. footprinter/db/messages.py +192 -0
  52. footprinter/db/policies.py +151 -0
  53. footprinter/db/projects.py +673 -0
  54. footprinter/db/search.py +573 -0
  55. footprinter/db/sql_utils.py +168 -0
  56. footprinter/db/status.py +320 -0
  57. footprinter/db/uploads.py +70 -0
  58. footprinter/ingest/__init__.py +0 -0
  59. footprinter/ingest/adapters/__init__.py +33 -0
  60. footprinter/ingest/adapters/browser.py +54 -0
  61. footprinter/ingest/adapters/chat.py +57 -0
  62. footprinter/ingest/adapters/ingest.py +146 -0
  63. footprinter/ingest/adapters/local_files.py +68 -0
  64. footprinter/ingest/adapters/local_folders.py +52 -0
  65. footprinter/ingest/adapters/protocol.py +174 -0
  66. footprinter/ingest/browser_indexer.py +216 -0
  67. footprinter/ingest/chat_dedup.py +156 -0
  68. footprinter/ingest/chat_indexer.py +487 -0
  69. footprinter/ingest/chat_parsers/__init__.py +8 -0
  70. footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
  71. footprinter/ingest/chat_parsers/claude_parser.py +161 -0
  72. footprinter/ingest/cli.py +827 -0
  73. footprinter/ingest/content_extractors.py +117 -0
  74. footprinter/ingest/database.py +36 -0
  75. footprinter/ingest/db/__init__.py +1 -0
  76. footprinter/ingest/db/connector_schema.py +47 -0
  77. footprinter/ingest/db/migration.py +315 -0
  78. footprinter/ingest/db/schema.py +1043 -0
  79. footprinter/ingest/db/security.py +6 -0
  80. footprinter/ingest/file_indexer.py +223 -0
  81. footprinter/ingest/file_scanner.py +277 -0
  82. footprinter/ingest/folder_indexer.py +226 -0
  83. footprinter/ingest/full_content_extractor.py +321 -0
  84. footprinter/ingest/orchestrator.py +112 -0
  85. footprinter/ingest/pipe_runner.py +200 -0
  86. footprinter/ingest/processing.py +165 -0
  87. footprinter/ingest/registry.py +186 -0
  88. footprinter/ingest/run_record.py +91 -0
  89. footprinter/ingest/status.py +346 -0
  90. footprinter/mcp/__init__.py +0 -0
  91. footprinter/mcp/__main__.py +5 -0
  92. footprinter/mcp/db.py +67 -0
  93. footprinter/mcp/errors.py +105 -0
  94. footprinter/mcp/extraction.py +226 -0
  95. footprinter/mcp/server.py +39 -0
  96. footprinter/mcp/tools/__init__.py +0 -0
  97. footprinter/mcp/tools/navigation.py +70 -0
  98. footprinter/mcp/tools/read.py +75 -0
  99. footprinter/mcp/tools/search.py +158 -0
  100. footprinter/mcp/tools/semantic.py +79 -0
  101. footprinter/mcp/tools/status.py +19 -0
  102. footprinter/paths.py +117 -0
  103. footprinter/permissions.py +1152 -0
  104. footprinter/semantic/__init__.py +13 -0
  105. footprinter/semantic/chunking.py +52 -0
  106. footprinter/semantic/embeddings.py +23 -0
  107. footprinter/semantic/hybrid_search.py +273 -0
  108. footprinter/semantic/vector_store.py +471 -0
  109. footprinter/services/__init__.py +49 -0
  110. footprinter/services/access_service.py +342 -0
  111. footprinter/services/chat_service.py +85 -0
  112. footprinter/services/client_service.py +267 -0
  113. footprinter/services/content_service.py +181 -0
  114. footprinter/services/email_service.py +89 -0
  115. footprinter/services/file_service.py +83 -0
  116. footprinter/services/folder_service.py +122 -0
  117. footprinter/services/includes.py +19 -0
  118. footprinter/services/ingest_service.py +231 -0
  119. footprinter/services/project_service.py +262 -0
  120. footprinter/services/roles.py +25 -0
  121. footprinter/services/search_service.py +177 -0
  122. footprinter/services/semantic_service.py +360 -0
  123. footprinter/services/status_service.py +18 -0
  124. footprinter/services/visit_service.py +65 -0
  125. footprinter/source_registry.py +194 -0
  126. footprinter/utils/__init__.py +7 -0
  127. footprinter/utils/hash_utils.py +59 -0
  128. footprinter/utils/logging_config.py +68 -0
  129. footprinter/utils/mime.py +30 -0
  130. footprinter/utils/text.py +6 -0
  131. footprinter/utils/time.py +11 -0
  132. footprinter/visibility.py +1264 -0
  133. footprinter_cli-1.0.0rc1.dist-info/LICENSE +21 -0
  134. footprinter_cli-1.0.0rc1.dist-info/METADATA +223 -0
  135. footprinter_cli-1.0.0rc1.dist-info/RECORD +138 -0
  136. footprinter_cli-1.0.0rc1.dist-info/WHEEL +5 -0
  137. footprinter_cli-1.0.0rc1.dist-info/entry_points.txt +2 -0
  138. footprinter_cli-1.0.0rc1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,59 @@
1
+ """
2
+ Hash computation utilities for Footprinter.
3
+
4
+ Provides consistent hash computation for both local files and Google Drive matching.
5
+ """
6
+
7
+ import hashlib
8
+ import logging
9
+ from typing import Optional
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def compute_md5(file_path: str) -> Optional[str]:
15
+ """
16
+ Compute MD5 hash matching Google Drive's md5Checksum.
17
+
18
+ Google Drive uses MD5 for file checksums, so this enables
19
+ hash-based matching between local files and Drive files.
20
+
21
+ Args:
22
+ file_path: Path to the file to hash
23
+
24
+ Returns:
25
+ 32-character lowercase hex MD5 hash, or None on error
26
+ """
27
+ try:
28
+ hash_md5 = hashlib.md5()
29
+ with open(file_path, "rb") as f:
30
+ for chunk in iter(lambda: f.read(8192), b""):
31
+ hash_md5.update(chunk)
32
+ return hash_md5.hexdigest()
33
+ except (IOError, OSError) as e:
34
+ logger.debug(f"Could not compute MD5 for {file_path}: {e}")
35
+ return None
36
+
37
+
38
+ def compute_sha256(file_path: str) -> Optional[str]:
39
+ """
40
+ Compute SHA-256 hash for content deduplication.
41
+
42
+ SHA-256 is used for content deduplication and integrity checks
43
+ within the local file system.
44
+
45
+ Args:
46
+ file_path: Path to the file to hash
47
+
48
+ Returns:
49
+ 64-character lowercase hex SHA-256 hash, or None on error
50
+ """
51
+ try:
52
+ hash_sha256 = hashlib.sha256()
53
+ with open(file_path, "rb") as f:
54
+ for chunk in iter(lambda: f.read(8192), b""):
55
+ hash_sha256.update(chunk)
56
+ return hash_sha256.hexdigest()
57
+ except (IOError, OSError) as e:
58
+ logger.debug(f"Could not compute SHA-256 for {file_path}: {e}")
59
+ return None
@@ -0,0 +1,68 @@
1
+ """Centralized logging configuration for Footprinter."""
2
+
3
+ import logging
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ _configured = False
9
+
10
+
11
+ def setup_logging(level=None):
12
+ """Configure the root logger. Only the first call takes effect.
13
+
14
+ Level resolution order:
15
+ 1. Explicit ``level`` argument (if provided)
16
+ 2. ``LOG_LEVEL`` environment variable (e.g. ``LOG_LEVEL=DEBUG``)
17
+ 3. Falls back to INFO
18
+ """
19
+ global _configured
20
+ if _configured:
21
+ return
22
+ _configured = True
23
+
24
+ if level is None:
25
+ env_level = os.environ.get("LOG_LEVEL", "").upper()
26
+ level = getattr(logging, env_level, None) if env_level else None
27
+ if level is None:
28
+ level = logging.INFO
29
+
30
+ logging.basicConfig(
31
+ level=level,
32
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
33
+ stream=sys.stderr,
34
+ )
35
+
36
+
37
+ def add_file_handler(log_path: Path, level: int = logging.DEBUG) -> logging.FileHandler:
38
+ """Add a file handler to the root logger for pipeline run logging.
39
+
40
+ Creates parent directories, sets a timestamped format, and suppresses
41
+ noisy schema migration logs. Returns the handler so it can be removed
42
+ after the run.
43
+ """
44
+ log_path.parent.mkdir(parents=True, exist_ok=True)
45
+
46
+ handler = logging.FileHandler(str(log_path))
47
+ handler.setLevel(level)
48
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
49
+ logging.root.addHandler(handler)
50
+
51
+ # Ensure root logger level doesn't gate the file handler.
52
+ # --quiet suppresses Rich console output but NOT file logging.
53
+ if logging.root.level > level:
54
+ logging.root.setLevel(level)
55
+
56
+ # Suppress schema migration noise (INFO-level chatter on every run).
57
+ # Uses a handler filter instead of mutating the logger level so the
58
+ # suppression disappears when the handler is removed.
59
+ handler.addFilter(_schema_noise_filter)
60
+
61
+ return handler
62
+
63
+
64
+ def _schema_noise_filter(record: logging.LogRecord) -> bool:
65
+ """Allow all records except low-level schema migration noise."""
66
+ if record.name.startswith("footprinter.ingest.db.schema"):
67
+ return record.levelno >= logging.WARNING
68
+ return True
@@ -0,0 +1,30 @@
1
+ """MIME type to content type mapping.
2
+
3
+ Used by both the orchestrator and Drive files adapter.
4
+ """
5
+
6
+
7
+ def mime_to_content_type(mime_type: str) -> str:
8
+ """Convert MIME type to short content type string.
9
+
10
+ Returns a short label for known types (e.g. "pdf", "gdoc"),
11
+ or truncates the MIME subtype to 8 chars for unknown types.
12
+ Returns "unknown" for falsy input.
13
+ """
14
+ if not mime_type:
15
+ return "unknown"
16
+ mime_map = {
17
+ "application/pdf": "pdf",
18
+ "application/vnd.google-apps.document": "gdoc",
19
+ "application/vnd.google-apps.spreadsheet": "gsheet",
20
+ "application/vnd.google-apps.presentation": "gslides",
21
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
22
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
23
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
24
+ "text/plain": "txt",
25
+ "text/csv": "csv",
26
+ "image/jpeg": "jpg",
27
+ "image/png": "png",
28
+ "video/mp4": "mp4",
29
+ }
30
+ return mime_map.get(mime_type, mime_type.split("/")[-1][:8])
@@ -0,0 +1,6 @@
1
+ import re
2
+
3
+
4
+ def _make_slug(name: str) -> str:
5
+ """Convert a display name to a URL-safe slug."""
6
+ return re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")
@@ -0,0 +1,11 @@
1
+ """Time utilities for consistent UTC timestamp handling."""
2
+
3
+ from datetime import datetime, timezone
4
+
5
+ UTC_FMT = "%Y-%m-%d %H:%M:%S"
6
+ """Timestamp format matching SQLite CURRENT_TIMESTAMP: YYYY-MM-DD HH:MM:SS (UTC)."""
7
+
8
+
9
+ def utc_now_iso() -> str:
10
+ """Current UTC time in SQLite CURRENT_TIMESTAMP format."""
11
+ return datetime.now(timezone.utc).strftime(UTC_FMT)