pylantir 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,117 @@
1
+ """
2
+ Data Source Plugin Interface
3
+
4
+ This module defines the abstract base class that all Pylantir data source
5
+ plugins must implement. It serves as the contract between the core sync
6
+ orchestration logic and plugin implementations.
7
+
8
+ Version: 1.0.0
9
+ Stability: Stable (no breaking changes allowed without major version bump)
10
+ Constitutional Compliance: Minimalist Dependencies (stdlib only)
11
+
12
+ USAGE:
13
+ from pylantir.data_sources.base import DataSourcePlugin
14
+
15
+ class MyPlugin(DataSourcePlugin):
16
+ def validate_config(self, config):
17
+ # Implementation
18
+ return (True, "")
19
+
20
+ def fetch_entries(self, field_mapping, interval):
21
+ # Implementation
22
+ return [{"patient_id": "...", ...}]
23
+
24
+ def get_source_name(self):
25
+ return "MySource"
26
+ """
27
+
28
+ from abc import ABC, abstractmethod
29
+ from typing import Tuple, List, Dict
30
+ import logging
31
+
32
+ lgr = logging.getLogger(__name__)
33
+
34
+
35
+ class DataSourcePlugin(ABC):
36
+ """
37
+ Abstract base class for all data source plugins.
38
+
39
+ Plugins provide the interface between external data sources (REDCap, CSV,
40
+ databases, APIs) and Pylantir's worklist database. Each plugin is responsible
41
+ for fetching, validating, and transforming data from its specific source into
42
+ the standardized WorklistItem format.
43
+
44
+ THREAD SAFETY:
45
+ Plugins must be thread-safe as multiple instances may run concurrently
46
+ when multiple sources are configured. Avoid shared mutable state.
47
+
48
+ MEMORY MANAGEMENT:
49
+ Plugins must follow Pylantir's memory efficiency patterns:
50
+ - Avoid pandas DataFrames (use list[dict] instead)
51
+ - Call gc.collect() explicitly in cleanup()
52
+ - Process data in streaming fashion when possible
53
+ - Follow example from redcap_to_db.py (50-100x memory improvement)
54
+ """
55
+
56
+ def __init__(self):
57
+ """Initialize the plugin. Override to set up source-specific state."""
58
+ self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
59
+
60
+ @abstractmethod
61
+ def validate_config(self, config: Dict) -> Tuple[bool, str]:
62
+ """
63
+ Validate plugin-specific configuration before sync starts.
64
+
65
+ Args:
66
+ config: Dictionary from data_sources[].config in JSON configuration
67
+
68
+ Returns:
69
+ Tuple of (is_valid, error_message) where error_message is "" if valid
70
+ """
71
+ pass
72
+
73
+ @abstractmethod
74
+ def fetch_entries(
75
+ self,
76
+ field_mapping: Dict[str, str],
77
+ interval: float
78
+ ) -> List[Dict]:
79
+ """
80
+ Fetch worklist entries from the data source.
81
+
82
+ Args:
83
+ field_mapping: Maps source field names to WorklistItem field names
84
+ interval: Seconds since last sync (for incremental sync support)
85
+
86
+ Returns:
87
+ List of dictionaries with WorklistItem field names as keys
88
+ """
89
+ pass
90
+
91
+ @abstractmethod
92
+ def get_source_name(self) -> str:
93
+ """Return human-readable source type identifier (e.g., 'REDCap')."""
94
+ pass
95
+
96
+ def supports_incremental_sync(self) -> bool:
97
+ """Override to return True if plugin supports incremental sync."""
98
+ return False
99
+
100
+ def cleanup(self) -> None:
101
+ """Perform cleanup after sync (close connections, free memory)."""
102
+ pass
103
+
104
+
105
+ class PluginError(Exception):
106
+ """Base exception for plugin-related errors."""
107
+ pass
108
+
109
+
110
+ class PluginConfigError(PluginError):
111
+ """Raised when plugin configuration is invalid."""
112
+ pass
113
+
114
+
115
+ class PluginFetchError(PluginError):
116
+ """Raised when plugin fails to fetch data from source."""
117
+ pass