footprinter-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- footprinter/__init__.py +8 -0
- footprinter/access.py +444 -0
- footprinter/api/__init__.py +1 -0
- footprinter/api/db.py +61 -0
- footprinter/api/entities.py +250 -0
- footprinter/api/search.py +47 -0
- footprinter/api/semantic.py +33 -0
- footprinter/api/server.py +66 -0
- footprinter/api/status.py +15 -0
- footprinter/bundled/__init__.py +0 -0
- footprinter/bundled/config.example.yaml +161 -0
- footprinter/bundled/patterns/context_patterns.yaml +18 -0
- footprinter/bundled/patterns/extensions.yaml +283 -0
- footprinter/bundled/patterns/filename_patterns.yaml +61 -0
- footprinter/bundled/patterns/mime_mappings.yaml +68 -0
- footprinter/bundled/patterns/salesforce_rules.yaml +84 -0
- footprinter/bundled/patterns/security_patterns.yaml +27 -0
- footprinter/cli/__init__.py +128 -0
- footprinter/cli/__main__.py +6 -0
- footprinter/cli/_common.py +332 -0
- footprinter/cli/_policy_helpers.py +646 -0
- footprinter/cli/_prompt.py +220 -0
- footprinter/cli/api_cmd.py +32 -0
- footprinter/cli/connect.py +591 -0
- footprinter/cli/data.py +879 -0
- footprinter/cli/delete.py +128 -0
- footprinter/cli/ingest.py +579 -0
- footprinter/cli/mcp_cmd.py +750 -0
- footprinter/cli/mcp_setup.py +306 -0
- footprinter/cli/search.py +393 -0
- footprinter/cli/search_cmd.py +69 -0
- footprinter/cli/setup.py +1836 -0
- footprinter/cli/status.py +729 -0
- footprinter/cli/status_cmd.py +104 -0
- footprinter/cli/upsert.py +794 -0
- footprinter/cli/vectorize_cmd.py +215 -0
- footprinter/cli/view.py +322 -0
- footprinter/connectors/__init__.py +171 -0
- footprinter/connectors/config_utils.py +141 -0
- footprinter/db/__init__.py +37 -0
- footprinter/db/browser.py +198 -0
- footprinter/db/chats.py +610 -0
- footprinter/db/clients.py +307 -0
- footprinter/db/emails.py +279 -0
- footprinter/db/files.py +741 -0
- footprinter/db/folders.py +659 -0
- footprinter/db/messages.py +192 -0
- footprinter/db/policies.py +151 -0
- footprinter/db/projects.py +673 -0
- footprinter/db/search.py +573 -0
- footprinter/db/sql_utils.py +168 -0
- footprinter/db/status.py +320 -0
- footprinter/db/uploads.py +70 -0
- footprinter/ingest/__init__.py +0 -0
- footprinter/ingest/adapters/__init__.py +33 -0
- footprinter/ingest/adapters/browser.py +54 -0
- footprinter/ingest/adapters/chat.py +57 -0
- footprinter/ingest/adapters/ingest.py +146 -0
- footprinter/ingest/adapters/local_files.py +68 -0
- footprinter/ingest/adapters/local_folders.py +52 -0
- footprinter/ingest/adapters/protocol.py +174 -0
- footprinter/ingest/browser_indexer.py +216 -0
- footprinter/ingest/chat_dedup.py +156 -0
- footprinter/ingest/chat_indexer.py +515 -0
- footprinter/ingest/chat_parsers/__init__.py +8 -0
- footprinter/ingest/chat_parsers/chatgpt_parser.py +229 -0
- footprinter/ingest/chat_parsers/claude_parser.py +161 -0
- footprinter/ingest/cli.py +827 -0
- footprinter/ingest/content_extractors.py +117 -0
- footprinter/ingest/database.py +36 -0
- footprinter/ingest/db/__init__.py +1 -0
- footprinter/ingest/db/connector_schema.py +47 -0
- footprinter/ingest/db/migration.py +328 -0
- footprinter/ingest/db/schema.py +1043 -0
- footprinter/ingest/db/security.py +6 -0
- footprinter/ingest/file_indexer.py +261 -0
- footprinter/ingest/file_scanner.py +277 -0
- footprinter/ingest/folder_indexer.py +226 -0
- footprinter/ingest/full_content_extractor.py +321 -0
- footprinter/ingest/orchestrator.py +125 -0
- footprinter/ingest/pipe_runner.py +217 -0
- footprinter/ingest/processing.py +165 -0
- footprinter/ingest/registry.py +201 -0
- footprinter/ingest/run_record.py +91 -0
- footprinter/ingest/status.py +346 -0
- footprinter/mcp/__init__.py +0 -0
- footprinter/mcp/__main__.py +5 -0
- footprinter/mcp/db.py +57 -0
- footprinter/mcp/errors.py +102 -0
- footprinter/mcp/extraction.py +226 -0
- footprinter/mcp/server.py +39 -0
- footprinter/mcp/tools/__init__.py +0 -0
- footprinter/mcp/tools/navigation.py +70 -0
- footprinter/mcp/tools/read.py +75 -0
- footprinter/mcp/tools/search.py +158 -0
- footprinter/mcp/tools/semantic.py +79 -0
- footprinter/mcp/tools/status.py +15 -0
- footprinter/paths.py +91 -0
- footprinter/permissions.py +1160 -0
- footprinter/semantic/__init__.py +13 -0
- footprinter/semantic/chunking.py +52 -0
- footprinter/semantic/embeddings.py +23 -0
- footprinter/semantic/hybrid_search.py +273 -0
- footprinter/semantic/vector_store.py +471 -0
- footprinter/services/__init__.py +49 -0
- footprinter/services/access_service.py +342 -0
- footprinter/services/chat_service.py +85 -0
- footprinter/services/client_service.py +267 -0
- footprinter/services/content_service.py +181 -0
- footprinter/services/email_service.py +89 -0
- footprinter/services/file_service.py +83 -0
- footprinter/services/folder_service.py +122 -0
- footprinter/services/includes.py +19 -0
- footprinter/services/ingest_service.py +231 -0
- footprinter/services/project_service.py +262 -0
- footprinter/services/roles.py +25 -0
- footprinter/services/search_service.py +177 -0
- footprinter/services/semantic_service.py +360 -0
- footprinter/services/status_service.py +18 -0
- footprinter/services/visit_service.py +65 -0
- footprinter/source_registry.py +194 -0
- footprinter/utils/__init__.py +7 -0
- footprinter/utils/hash_utils.py +59 -0
- footprinter/utils/logging_config.py +68 -0
- footprinter/utils/mime.py +30 -0
- footprinter/utils/text.py +6 -0
- footprinter/utils/time.py +11 -0
- footprinter/visibility.py +1272 -0
- footprinter_cli-1.0.0.dist-info/LICENSE +21 -0
- footprinter_cli-1.0.0.dist-info/METADATA +229 -0
- footprinter_cli-1.0.0.dist-info/RECORD +134 -0
- footprinter_cli-1.0.0.dist-info/WHEEL +5 -0
- footprinter_cli-1.0.0.dist-info/entry_points.txt +2 -0
- footprinter_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Chat history adapter.
|
|
2
|
+
|
|
3
|
+
Wraps ChatIndexer to conform to PipeAdapter protocol.
|
|
4
|
+
Chat imports are manual — this adapter provides read-only status.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, Dict, List
|
|
11
|
+
|
|
12
|
+
from footprinter.ingest.adapters.protocol import ErrorType, PipeContext, PipeResult
|
|
13
|
+
from footprinter.ingest.chat_indexer import ChatIndexer
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ChatAdapter:
|
|
19
|
+
"""Adapter wrapping ChatIndexer for the chat stage."""
|
|
20
|
+
|
|
21
|
+
name = "chat"
|
|
22
|
+
pipe_name = "chat"
|
|
23
|
+
required_extras: List[str] = []
|
|
24
|
+
|
|
25
|
+
def run(self, db: Any, ctx: PipeContext) -> PipeResult:
|
|
26
|
+
"""Report chat history stats (read-only).
|
|
27
|
+
|
|
28
|
+
Chat imports are manual via the chat_indexer CLI, so this
|
|
29
|
+
just reports current counts.
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
manager = ChatIndexer(db)
|
|
33
|
+
stats = manager.get_stats()
|
|
34
|
+
|
|
35
|
+
return PipeResult.info(
|
|
36
|
+
"chat",
|
|
37
|
+
note="Chat imports are manual - run chat_indexer import-claude or import-chatgpt",
|
|
38
|
+
current_chats=stats.get("total_chats", 0),
|
|
39
|
+
current_messages=stats.get("total_messages", 0),
|
|
40
|
+
by_account=stats.get("by_account", {}),
|
|
41
|
+
)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
logger.error(f"chat stage failed: {e}")
|
|
44
|
+
return PipeResult.make_error(
|
|
45
|
+
"chat",
|
|
46
|
+
error=str(e),
|
|
47
|
+
error_type=ErrorType.RUNTIME,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def status(self, db: Any) -> Dict[str, Any]:
|
|
51
|
+
"""Return chat and message counts."""
|
|
52
|
+
cursor = db.conn.cursor()
|
|
53
|
+
cursor.execute("SELECT COUNT(*) FROM chats")
|
|
54
|
+
chats = cursor.fetchone()[0]
|
|
55
|
+
cursor.execute("SELECT COUNT(*) FROM messages")
|
|
56
|
+
messages = cursor.fetchone()[0]
|
|
57
|
+
return {"chats": chats, "messages": messages}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Shared ingest loop helper for pipeline adapters.
|
|
2
|
+
|
|
3
|
+
Extracts the common iterate-try-count-log pattern used by Browser, Email,
|
|
4
|
+
DriveFiles, and DriveFolders adapters into a single function.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, Callable, Iterable
|
|
11
|
+
|
|
12
|
+
from footprinter.ingest.adapters.protocol import PipeResult
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def ingest_entries(
|
|
18
|
+
stage: str,
|
|
19
|
+
entries: Iterable,
|
|
20
|
+
insert_fn: Callable[[Any], Any],
|
|
21
|
+
*,
|
|
22
|
+
count_label: str = "items_indexed",
|
|
23
|
+
max_logged_errors: int = 5,
|
|
24
|
+
progress_interval: int | None = None,
|
|
25
|
+
conn: Any | None = None,
|
|
26
|
+
batch_size: int = 1000,
|
|
27
|
+
on_progress: Callable[[int], None] | None = None,
|
|
28
|
+
) -> PipeResult:
|
|
29
|
+
"""Iterate *entries*, calling *insert_fn* per entry with error resilience.
|
|
30
|
+
|
|
31
|
+
Returns a PipeResult with:
|
|
32
|
+
- ``count_label``: number of successful inserts
|
|
33
|
+
- ``skipped``: number of entries the insert_fn chose not to process
|
|
34
|
+
- ``errors``: number of failed inserts
|
|
35
|
+
- Status ``completed`` or ``completed_with_errors``
|
|
36
|
+
|
|
37
|
+
**Skip contract:** if *insert_fn* returns ``False`` (identity check, not
|
|
38
|
+
truthiness), the entry is counted as *skipped* rather than a success.
|
|
39
|
+
Any other return value (``None``, ``True``, etc.) counts as a success.
|
|
40
|
+
This lets adapters signal "I intentionally didn't process this" without
|
|
41
|
+
post-correcting counts.
|
|
42
|
+
|
|
43
|
+
**Batch commits:** when *conn* is provided, ``conn.commit()`` is called
|
|
44
|
+
every *batch_size* successful inserts and once after the loop for any
|
|
45
|
+
remainder. On insert error, pending successes are committed before
|
|
46
|
+
continuing. When *conn* is ``None``, no commits are issued.
|
|
47
|
+
|
|
48
|
+
**Commit failures:** if ``conn.commit()`` itself raises, the error is
|
|
49
|
+
caught and logged (warning for mid-loop commits, error for the final
|
|
50
|
+
commit). Processing continues — uncommitted rows stay in the open
|
|
51
|
+
transaction and are flushed by the next successful commit or by a
|
|
52
|
+
retry commit after the loop. The ``count_label`` value counts entries
|
|
53
|
+
where *insert_fn* succeeded, not entries durably committed; when
|
|
54
|
+
``commit_errors`` is present in the result data, some inserts may not
|
|
55
|
+
have been persisted.
|
|
56
|
+
|
|
57
|
+
Note: 100% failure still returns ``completed_with_errors`` (not ``error``).
|
|
58
|
+
``error`` is reserved for stage-level failures (database, config, etc.).
|
|
59
|
+
``completed_with_errors`` means the loop completed — individual entries failed.
|
|
60
|
+
|
|
61
|
+
Errors are logged up to *max_logged_errors* to avoid flooding.
|
|
62
|
+
If *progress_interval* is set, logs a progress message every N successes.
|
|
63
|
+
"""
|
|
64
|
+
success_count = 0
|
|
65
|
+
skip_count = 0
|
|
66
|
+
error_count = 0
|
|
67
|
+
commit_error_count = 0
|
|
68
|
+
batch_count = 0
|
|
69
|
+
processed_count = 0
|
|
70
|
+
|
|
71
|
+
for entry in entries:
|
|
72
|
+
try:
|
|
73
|
+
result = insert_fn(entry)
|
|
74
|
+
if result is False:
|
|
75
|
+
skip_count += 1
|
|
76
|
+
else:
|
|
77
|
+
success_count += 1
|
|
78
|
+
batch_count += 1
|
|
79
|
+
if conn is not None and batch_count >= batch_size:
|
|
80
|
+
try:
|
|
81
|
+
conn.commit()
|
|
82
|
+
except Exception as exc:
|
|
83
|
+
commit_error_count += 1
|
|
84
|
+
logger.warning(
|
|
85
|
+
"%s: batch commit failed (%d pending): %s",
|
|
86
|
+
stage,
|
|
87
|
+
batch_count,
|
|
88
|
+
exc,
|
|
89
|
+
)
|
|
90
|
+
batch_count = 0
|
|
91
|
+
if progress_interval and success_count % progress_interval == 0:
|
|
92
|
+
logger.info(f"Indexed {success_count} {count_label}...")
|
|
93
|
+
except Exception as e:
|
|
94
|
+
error_count += 1
|
|
95
|
+
if conn is not None and batch_count > 0:
|
|
96
|
+
try:
|
|
97
|
+
conn.commit()
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
commit_error_count += 1
|
|
100
|
+
logger.warning(
|
|
101
|
+
"%s: error-recovery commit failed (%d pending): %s",
|
|
102
|
+
stage,
|
|
103
|
+
batch_count,
|
|
104
|
+
exc,
|
|
105
|
+
)
|
|
106
|
+
batch_count = 0
|
|
107
|
+
if error_count <= max_logged_errors:
|
|
108
|
+
logger.error(f"Error in {stage} ingest: {e}")
|
|
109
|
+
finally:
|
|
110
|
+
processed_count += 1
|
|
111
|
+
if on_progress is not None:
|
|
112
|
+
on_progress(processed_count)
|
|
113
|
+
|
|
114
|
+
if conn is not None and (batch_count > 0 or commit_error_count > 0):
|
|
115
|
+
try:
|
|
116
|
+
conn.commit()
|
|
117
|
+
except Exception as exc:
|
|
118
|
+
commit_error_count += 1
|
|
119
|
+
logger.error(
|
|
120
|
+
"%s: final commit failed (%d pending): %s",
|
|
121
|
+
stage,
|
|
122
|
+
batch_count,
|
|
123
|
+
exc,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
suppressed = error_count - max_logged_errors
|
|
127
|
+
if suppressed > 0:
|
|
128
|
+
logger.warning(f"{stage}: {suppressed} more errors not shown")
|
|
129
|
+
|
|
130
|
+
data = {count_label: success_count, "skipped": skip_count, "errors": error_count}
|
|
131
|
+
if commit_error_count > 0:
|
|
132
|
+
data["commit_errors"] = commit_error_count
|
|
133
|
+
|
|
134
|
+
if error_count > 0 or commit_error_count > 0:
|
|
135
|
+
error_parts = []
|
|
136
|
+
if error_count > 0:
|
|
137
|
+
error_parts.append(f"{error_count} entries failed")
|
|
138
|
+
if commit_error_count > 0:
|
|
139
|
+
error_parts.append(f"{commit_error_count} commit errors")
|
|
140
|
+
return PipeResult.completed_with_errors(
|
|
141
|
+
stage,
|
|
142
|
+
error=", ".join(error_parts),
|
|
143
|
+
**data,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return PipeResult.completed(stage, **data)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Local files adapter.
|
|
2
|
+
|
|
3
|
+
Wraps FileIndexer to conform to PipeAdapter protocol.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, List
|
|
10
|
+
|
|
11
|
+
from footprinter.db import files as files_db
|
|
12
|
+
from footprinter.ingest.adapters.protocol import ErrorType, PipeContext, PipeResult
|
|
13
|
+
from footprinter.ingest.file_indexer import FileIndexer
|
|
14
|
+
from footprinter.source_registry import SourceRegistry
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LocalFilesAdapter:
|
|
20
|
+
"""Adapter wrapping FileIndexer for the local_files stage."""
|
|
21
|
+
|
|
22
|
+
name = "local_files"
|
|
23
|
+
pipe_name = "local_files"
|
|
24
|
+
required_extras: List[str] = []
|
|
25
|
+
|
|
26
|
+
def run(self, db: Any, ctx: PipeContext) -> PipeResult:
|
|
27
|
+
"""Index local files into files table."""
|
|
28
|
+
try:
|
|
29
|
+
last_run = None if ctx.full_mode else ctx.last_run
|
|
30
|
+
indexer = FileIndexer(config_path=ctx.config_path, last_run=last_run, db=db)
|
|
31
|
+
|
|
32
|
+
# Build in-memory maps once before ingest
|
|
33
|
+
registry = SourceRegistry(db.conn)
|
|
34
|
+
folder_path_map, folder_project_map = files_db.build_folder_maps(db.conn)
|
|
35
|
+
relationship_maps = {
|
|
36
|
+
"project_prefix_map": files_db.build_project_prefix_map(db.conn),
|
|
37
|
+
"folder_path_map": folder_path_map,
|
|
38
|
+
"folder_project_map": folder_project_map,
|
|
39
|
+
"remote_source_names": frozenset(registry.remote_source_names()),
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
counts = indexer.index_files(
|
|
43
|
+
relationship_maps=relationship_maps,
|
|
44
|
+
on_progress=ctx.on_progress,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
return PipeResult.completed(
|
|
48
|
+
"local_files",
|
|
49
|
+
inserted=counts["inserted"],
|
|
50
|
+
updated=counts["updated"],
|
|
51
|
+
skipped=counts["skipped"],
|
|
52
|
+
errors=counts["errors"],
|
|
53
|
+
mode="full" if ctx.full_mode else "incremental",
|
|
54
|
+
)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logger.error(f"local_files stage failed: {e}")
|
|
57
|
+
return PipeResult.make_error(
|
|
58
|
+
"local_files",
|
|
59
|
+
error=str(e),
|
|
60
|
+
error_type=ErrorType.RUNTIME,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def status(self, db: Any) -> Dict[str, Any]:
|
|
64
|
+
"""Return local file count."""
|
|
65
|
+
cursor = db.conn.cursor()
|
|
66
|
+
cursor.execute("SELECT COUNT(*) FROM files WHERE source = 'local' AND status != 'removed'")
|
|
67
|
+
count = cursor.fetchone()[0]
|
|
68
|
+
return {"local_files": count}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Local folders adapter.
|
|
2
|
+
|
|
3
|
+
Wraps FolderIndexer to conform to PipeAdapter protocol.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, List
|
|
10
|
+
|
|
11
|
+
from footprinter.ingest.adapters.protocol import ErrorType, PipeContext, PipeResult
|
|
12
|
+
from footprinter.ingest.folder_indexer import FolderIndexer
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LocalFoldersAdapter:
|
|
18
|
+
"""Adapter wrapping FolderIndexer for the local_folders stage."""
|
|
19
|
+
|
|
20
|
+
name = "local_folders"
|
|
21
|
+
pipe_name = "local_folders"
|
|
22
|
+
required_extras: List[str] = []
|
|
23
|
+
|
|
24
|
+
def run(self, db: Any, ctx: PipeContext) -> PipeResult:
|
|
25
|
+
"""Scan local folder structure into folders."""
|
|
26
|
+
try:
|
|
27
|
+
indexer = FolderIndexer(ctx.source_config, db)
|
|
28
|
+
root_paths = ctx.source_config.get("directories", ["~/Work", "~/Personal"])
|
|
29
|
+
|
|
30
|
+
folders = indexer.scan_folders(root_paths)
|
|
31
|
+
inserted, updated = indexer.save_folders(folders)
|
|
32
|
+
|
|
33
|
+
return PipeResult.completed(
|
|
34
|
+
"local_folders",
|
|
35
|
+
folders_found=len(folders),
|
|
36
|
+
inserted=inserted,
|
|
37
|
+
updated=updated,
|
|
38
|
+
)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
logger.error(f"local_folders stage failed: {e}")
|
|
41
|
+
return PipeResult.make_error(
|
|
42
|
+
"local_folders",
|
|
43
|
+
error=str(e),
|
|
44
|
+
error_type=ErrorType.RUNTIME,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def status(self, db: Any) -> Dict[str, Any]:
|
|
48
|
+
"""Return folders count."""
|
|
49
|
+
cursor = db.conn.cursor()
|
|
50
|
+
cursor.execute("SELECT COUNT(*) FROM folders")
|
|
51
|
+
count = cursor.fetchone()[0]
|
|
52
|
+
return {"folders": count}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Adapter protocol types for the pipeline refactor.
|
|
2
|
+
|
|
3
|
+
Defines the formal types that all pipe adapters implement:
|
|
4
|
+
- PipeStatus: enum of result statuses matching current orchestrator strings
|
|
5
|
+
- ErrorType: enum of error categories used for halt decisions
|
|
6
|
+
- PipeResult: typed replacement for ad-hoc result dicts
|
|
7
|
+
- PipeContext: typed runtime context replacing the convention-based config dict
|
|
8
|
+
- PipeAdapter: Protocol that all adapters implement
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Protocol, runtime_checkable
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from footprinter.ingest.database import Database
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PipeStatus(Enum):
|
|
23
|
+
"""Pipe result status.
|
|
24
|
+
|
|
25
|
+
Values match the status strings in the current orchestrator result dicts.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
COMPLETED = "completed"
|
|
29
|
+
COMPLETED_WITH_ERRORS = "completed_with_errors"
|
|
30
|
+
SKIPPED = "skipped"
|
|
31
|
+
ERROR = "error"
|
|
32
|
+
INFO = "info"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ErrorType(Enum):
|
|
36
|
+
"""Error categories for pipeline halt decisions.
|
|
37
|
+
|
|
38
|
+
The orchestrator uses error_type to decide whether to halt the pipeline:
|
|
39
|
+
database and config errors are fatal; missing_dependency and runtime are not.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
MISSING_DEPENDENCY = "missing_dependency"
|
|
43
|
+
DATABASE = "database"
|
|
44
|
+
CONFIG = "config"
|
|
45
|
+
RUNTIME = "runtime"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class PipeResult:
|
|
50
|
+
"""Typed result from a pipeline pipe.
|
|
51
|
+
|
|
52
|
+
Replaces the ad-hoc Dict[str, Any] returned by orchestrator pipe methods.
|
|
53
|
+
Factory classmethods reduce boilerplate in adapter implementations.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
stage: str
|
|
57
|
+
status: PipeStatus
|
|
58
|
+
elapsed_seconds: float = 0.0
|
|
59
|
+
data: Dict[str, Any] = field(default_factory=dict)
|
|
60
|
+
error: Optional[str] = None
|
|
61
|
+
error_type: Optional[ErrorType] = None
|
|
62
|
+
|
|
63
|
+
# -- Factory classmethods --------------------------------------------------
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def completed(cls, stage: str, **data: Any) -> PipeResult:
|
|
67
|
+
"""Create a result indicating the stage completed successfully."""
|
|
68
|
+
return cls(stage=stage, status=PipeStatus.COMPLETED, data=data)
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def completed_with_errors(cls, stage: str, error: str, **data: Any) -> PipeResult:
|
|
72
|
+
"""Create a result indicating the stage completed with non-fatal errors."""
|
|
73
|
+
return cls(
|
|
74
|
+
stage=stage,
|
|
75
|
+
status=PipeStatus.COMPLETED_WITH_ERRORS,
|
|
76
|
+
data=data,
|
|
77
|
+
error=error,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def skipped(cls, stage: str, reason: str, **data: Any) -> PipeResult:
|
|
82
|
+
"""Create a result indicating the stage was skipped."""
|
|
83
|
+
return cls(
|
|
84
|
+
stage=stage,
|
|
85
|
+
status=PipeStatus.SKIPPED,
|
|
86
|
+
data={"reason": reason, **data},
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def make_error(
|
|
91
|
+
cls,
|
|
92
|
+
stage: str,
|
|
93
|
+
error: str,
|
|
94
|
+
error_type: Optional[ErrorType] = None,
|
|
95
|
+
**data: Any,
|
|
96
|
+
) -> PipeResult:
|
|
97
|
+
"""Create a result indicating the stage failed with an error."""
|
|
98
|
+
return cls(
|
|
99
|
+
stage=stage,
|
|
100
|
+
status=PipeStatus.ERROR,
|
|
101
|
+
data=data,
|
|
102
|
+
error=error,
|
|
103
|
+
error_type=error_type,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def info(cls, stage: str, **data: Any) -> PipeResult:
|
|
108
|
+
"""Create an informational result (no processing occurred)."""
|
|
109
|
+
return cls(stage=stage, status=PipeStatus.INFO, data=data)
|
|
110
|
+
|
|
111
|
+
# -- Serialization ---------------------------------------------------------
|
|
112
|
+
|
|
113
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
114
|
+
"""Flatten to the dict shape expected by the orchestrator.
|
|
115
|
+
|
|
116
|
+
Data keys are spread to the top level first, then reserved fields
|
|
117
|
+
overlay them so an adapter can't accidentally clobber stage/status.
|
|
118
|
+
"""
|
|
119
|
+
result = {**self.data}
|
|
120
|
+
result["stage"] = self.stage
|
|
121
|
+
result["status"] = self.status.value
|
|
122
|
+
result["elapsed_seconds"] = self.elapsed_seconds
|
|
123
|
+
if self.error is not None:
|
|
124
|
+
result["error"] = self.error
|
|
125
|
+
if self.error_type is not None:
|
|
126
|
+
result["error_type"] = self.error_type.value
|
|
127
|
+
return result
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass
|
|
131
|
+
class PipeContext:
|
|
132
|
+
"""Typed runtime context passed to adapter.run().
|
|
133
|
+
|
|
134
|
+
Replaces the convention-based Dict[str, Any] config parameter.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
source_config: Dict[str, Any]
|
|
138
|
+
config_path: str = ""
|
|
139
|
+
full_mode: bool = False
|
|
140
|
+
last_run: Optional[datetime] = None
|
|
141
|
+
on_progress: Optional[Callable[[int], None]] = None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@runtime_checkable
|
|
145
|
+
class PipeAdapter(Protocol):
|
|
146
|
+
"""Protocol that all pipe adapters implement.
|
|
147
|
+
|
|
148
|
+
Enables isinstance() validation in the adapter registry.
|
|
149
|
+
Implementors can use either @property decorators or class attributes
|
|
150
|
+
for the metadata fields.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def name(self) -> str:
|
|
155
|
+
"""Human-readable adapter name."""
|
|
156
|
+
...
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def pipe_name(self) -> str:
|
|
160
|
+
"""Pipe identifier used by the orchestrator."""
|
|
161
|
+
...
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def required_extras(self) -> List[str]:
|
|
165
|
+
"""Pip extras that must be installed for this adapter to run."""
|
|
166
|
+
...
|
|
167
|
+
|
|
168
|
+
def run(self, db: Database, ctx: PipeContext) -> PipeResult:
|
|
169
|
+
"""Execute the adapter's pipe."""
|
|
170
|
+
...
|
|
171
|
+
|
|
172
|
+
def status(self, db: Database) -> Dict[str, Any]:
|
|
173
|
+
"""Return current data counts and health for this pipe."""
|
|
174
|
+
...
|