ergon-framework-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. ergon/__init__.py +13 -0
  2. ergon/bootstrap/src/__project__/__init__.py +0 -0
  3. ergon/bootstrap/src/__project__/_observability/docker-compose.telemetry.yml +124 -0
  4. ergon/bootstrap/src/__project__/_observability/grafana.yaml +17 -0
  5. ergon/bootstrap/src/__project__/_observability/loki.yaml +48 -0
  6. ergon/bootstrap/src/__project__/_observability/otel-collector-config.yaml +53 -0
  7. ergon/bootstrap/src/__project__/_observability/prometheus.yaml +11 -0
  8. ergon/bootstrap/src/__project__/_observability/tempo.yaml +24 -0
  9. ergon/bootstrap/src/__project__/connectors/__init__.py +0 -0
  10. ergon/bootstrap/src/__project__/main.py +9 -0
  11. ergon/bootstrap/src/__project__/tasks/__init__.py +0 -0
  12. ergon/bootstrap/src/__project__/tasks/constants.py +13 -0
  13. ergon/bootstrap/src/__project__/tasks/example_task/__init__.py +0 -0
  14. ergon/bootstrap/src/__project__/tasks/example_task/config.py +4 -0
  15. ergon/bootstrap/src/__project__/tasks/example_task/exceptions.py +4 -0
  16. ergon/bootstrap/src/__project__/tasks/example_task/helpers.py +4 -0
  17. ergon/bootstrap/src/__project__/tasks/example_task/schemas.py +5 -0
  18. ergon/bootstrap/src/__project__/tasks/example_task/task.py +1 -0
  19. ergon/bootstrap/src/__project__/tasks/exceptions.py +0 -0
  20. ergon/bootstrap/src/__project__/tasks/helpers.py +0 -0
  21. ergon/bootstrap/src/__project__/tasks/schemas.py +0 -0
  22. ergon/bootstrap/src/__project__/tasks/settings.py +5 -0
  23. ergon/cli.py +174 -0
  24. ergon/connector/__init__.py +64 -0
  25. ergon/connector/connector.py +97 -0
  26. ergon/connector/excel/__init__.py +18 -0
  27. ergon/connector/excel/connector.py +175 -0
  28. ergon/connector/excel/models.py +24 -0
  29. ergon/connector/excel/service.py +98 -0
  30. ergon/connector/pipefy/__init__.py +21 -0
  31. ergon/connector/pipefy/async_connector.py +48 -0
  32. ergon/connector/pipefy/async_service.py +907 -0
  33. ergon/connector/pipefy/connector.py +36 -0
  34. ergon/connector/pipefy/models.py +48 -0
  35. ergon/connector/pipefy/service.py +1016 -0
  36. ergon/connector/pipefy/version.py +1 -0
  37. ergon/connector/postgres/__init__.py +11 -0
  38. ergon/connector/postgres/async_connector.py +119 -0
  39. ergon/connector/postgres/async_service.py +116 -0
  40. ergon/connector/postgres/models.py +34 -0
  41. ergon/connector/rabbitmq/__init__.py +25 -0
  42. ergon/connector/rabbitmq/async_connector.py +120 -0
  43. ergon/connector/rabbitmq/async_service.py +417 -0
  44. ergon/connector/rabbitmq/connector.py +54 -0
  45. ergon/connector/rabbitmq/helper.py +14 -0
  46. ergon/connector/rabbitmq/models.py +92 -0
  47. ergon/connector/rabbitmq/service.py +199 -0
  48. ergon/connector/sqs/__init__.py +15 -0
  49. ergon/connector/sqs/async_connector.py +120 -0
  50. ergon/connector/sqs/async_service.py +246 -0
  51. ergon/connector/sqs/connector.py +120 -0
  52. ergon/connector/sqs/models.py +36 -0
  53. ergon/connector/sqs/service.py +219 -0
  54. ergon/connector/transaction.py +14 -0
  55. ergon/py.typed +0 -0
  56. ergon/service/__init__.py +5 -0
  57. ergon/service/service.py +17 -0
  58. ergon/task/__init__.py +13 -0
  59. ergon/task/base.py +222 -0
  60. ergon/task/exceptions.py +217 -0
  61. ergon/task/helpers.py +691 -0
  62. ergon/task/manager.py +85 -0
  63. ergon/task/mixins/__init__.py +13 -0
  64. ergon/task/mixins/consumer.py +858 -0
  65. ergon/task/mixins/metrics.py +457 -0
  66. ergon/task/mixins/producer.py +486 -0
  67. ergon/task/policies.py +229 -0
  68. ergon/task/runner.py +386 -0
  69. ergon/task/utils.py +64 -0
  70. ergon/telemetry/__init__.py +7 -0
  71. ergon/telemetry/_resource.py +13 -0
  72. ergon/telemetry/logging.py +370 -0
  73. ergon/telemetry/metrics.py +101 -0
  74. ergon/telemetry/tracing.py +152 -0
  75. ergon/utils/__init__.py +5 -0
  76. ergon/utils/env.py +26 -0
  77. ergon_framework_python-0.1.0.dist-info/METADATA +449 -0
  78. ergon_framework_python-0.1.0.dist-info/RECORD +82 -0
  79. ergon_framework_python-0.1.0.dist-info/WHEEL +5 -0
  80. ergon_framework_python-0.1.0.dist-info/entry_points.txt +2 -0
  81. ergon_framework_python-0.1.0.dist-info/licenses/LICENSE +21 -0
  82. ergon_framework_python-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,97 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, List, Union
3
+
4
+ from pydantic import BaseModel, model_validator
5
+
6
+ from . import transaction
7
+
8
+
9
+ # ---------------------------------------------------------
10
+ # SYNC CONNECTOR
11
+ # ---------------------------------------------------------
12
+ class Connector(ABC):
13
+ @abstractmethod
14
+ def fetch_transactions(self, *args, **kwargs) -> List[transaction.Transaction]:
15
+ """Fetch one or many transactions (sync)."""
16
+
17
+ @abstractmethod
18
+ def dispatch_transactions(self, transactions: List[transaction.Transaction], *args, **kwargs) -> Any:
19
+ """Publish a transaction (sync)."""
20
+ pass
21
+
22
+ def fetch_transaction_by_id(self, transaction_id: str, *args, **kwargs) -> transaction.Transaction:
23
+ """
24
+ Fetches a transaction by its id for individual processing.
25
+ """
26
+ raise NotImplementedError
27
+
28
+ def get_transactions_count(self, *args, **kwargs) -> int:
29
+ """Get the total number of transactions in the connector target system."""
30
+ raise NotImplementedError
31
+
32
+
33
+ # ---------------------------------------------------------
34
+ # ASYNC CONNECTOR
35
+ # ---------------------------------------------------------
36
+ class AsyncConnector(ABC):
37
+ @abstractmethod
38
+ async def fetch_transactions_async(self, *args, **kwargs) -> List[transaction.Transaction]:
39
+ """Fetch transactions asynchronously."""
40
+ pass
41
+
42
+ @abstractmethod
43
+ async def dispatch_transactions_async(self, transactions: List[transaction.Transaction], *args, **kwargs) -> Any:
44
+ """Publish a transaction asynchronously."""
45
+ pass
46
+
47
+ async def fetch_transaction_by_id_async(self, transaction_id: str, *args, **kwargs) -> transaction.Transaction:
48
+ """Fetches a transaction by its id for individual processing asynchronously."""
49
+ raise NotImplementedError
50
+
51
+ async def get_transactions_count_async(self, *args, **kwargs) -> int:
52
+ """Get the total number of transactions in the connector target system asynchronously."""
53
+ raise NotImplementedError
54
+
55
+ async def ack_transaction(self, transaction: transaction.Transaction) -> None:
56
+ """Acknowledge a successfully processed transaction.
57
+
58
+ Default raises NotImplementedError so non-broker connectors (database,
59
+ HTTP, file-based, etc.) can ignore it. Broker connectors that have
60
+ per-message ack semantics (RabbitMQ, SQS, Kafka, ...) must override.
61
+ """
62
+ raise NotImplementedError
63
+
64
+ async def nack_transaction(self, transaction: transaction.Transaction, requeue: bool = True) -> None:
65
+ """Negatively acknowledge a transaction.
66
+
67
+ ``requeue=True`` asks the broker to redeliver; ``requeue=False`` routes
68
+ to the broker's dead-letter destination if one is configured. Default
69
+ raises NotImplementedError; broker connectors must override.
70
+ """
71
+ raise NotImplementedError
72
+
73
+
74
+ # ---------------------------------------------------------
75
+ # CONNECTOR CONFIG (supports both sync + async)
76
+ # ---------------------------------------------------------
77
+ class ConnectorConfig(BaseModel):
78
+ """
79
+ Generic connector configuration.
80
+
81
+ connector: class implementing either Connector or AsyncConnector
82
+ args: positional arguments passed to connector's __init__
83
+ kwargs: keyword arguments passed to connector's __init__
84
+ """
85
+
86
+ connector: Union[type[Connector], type[AsyncConnector]]
87
+ args: tuple[Any, ...] = ()
88
+ kwargs: dict[str, Any] = {}
89
+
90
+ @model_validator(mode="after")
91
+ def validate_connector(self) -> "ConnectorConfig":
92
+ """Validate that the connector inherits from Connector or AsyncConnector."""
93
+ if not issubclass(self.connector, Connector) and not issubclass(self.connector, AsyncConnector):
94
+ raise ValueError(
95
+ f"Connector '{self.connector}' must inherit from Connector or AsyncConnector. Got: {self.connector}"
96
+ )
97
+ return self
@@ -0,0 +1,18 @@
1
+ """
2
+ Excel connector for ergon-framework.
3
+ Provides transaction-based access to Excel files with support for:
4
+ - Batch mode (multiple rows per transaction for high-throughput processing)
5
+ - Normal mode (one row per transaction)
6
+ - Sharding for parallel processing
7
+ """
8
+
9
+ from .connector import ExcelConnector
10
+ from .models import ExcelFetchConfig, ExcelRow
11
+ from .service import ExcelService
12
+
13
+ __all__ = [
14
+ "ExcelConnector",
15
+ "ExcelService",
16
+ "ExcelFetchConfig",
17
+ "ExcelRow",
18
+ ]
@@ -0,0 +1,175 @@
1
+ """
2
+ Excel connector for ergon-framework.
3
+ Provides transaction-based access to Excel files with support for:
4
+ - Batch mode (multiple rows per transaction)
5
+ - Normal mode (one row per transaction)
6
+ - Sharding for parallel processing
7
+ """
8
+
9
+ import logging
10
+ import uuid
11
+ from typing import Any, List, Optional
12
+
13
+ from ..connector import Connector
14
+ from ..transaction import Transaction
15
+ from .service import ExcelService
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ExcelConnector(Connector):
21
+ """
22
+ Connector for reading transactions from Excel files.
23
+ Features:
24
+ - Iterator-based reading (memory efficient)
25
+ - Batch mode for high-throughput processing
26
+ - Automatic sharding support for parallel workers
27
+ Example:
28
+ service = ExcelService()
29
+ connector = ExcelConnector(service)
30
+ transactions = connector.fetch_transactions(
31
+ limit=100,
32
+ file_path="data.xlsx",
33
+ filter_col="Type",
34
+ filter_val="Active",
35
+ batch_mode=True,
36
+ )
37
+ """
38
+
39
+ def __init__(self, service: ExcelService):
40
+ """
41
+ Initialize the Excel connector.
42
+ Args:
43
+ service: ExcelService instance for reading Excel files.
44
+ """
45
+ self.service = service
46
+ self._iterators = {}
47
+
48
+ def fetch_transactions(
49
+ self,
50
+ limit: int = 10,
51
+ file_path: Optional[str] = None,
52
+ filter_col: Optional[str] = None,
53
+ filter_val: Optional[Any] = None,
54
+ metadata: Optional[dict] = None,
55
+ worker_id: Optional[int] = None,
56
+ max_workers: Optional[int] = None,
57
+ batch_mode: bool = False,
58
+ sheet_name: Optional[str] = None,
59
+ *args,
60
+ **kwargs,
61
+ ) -> List[Transaction]:
62
+ """
63
+ Fetch transactions from Excel file.
64
+ Args:
65
+ limit: Number of rows to fetch per call (batch size).
66
+ file_path: Path to Excel file.
67
+ filter_col: Column to filter by.
68
+ filter_val: Value to filter for.
69
+ metadata: Metadata to attach to transactions.
70
+ worker_id: Worker ID for sharding (0-indexed).
71
+ max_workers: Total number of workers for sharding.
72
+ batch_mode: When True, returns a single transaction with payload
73
+ containing a list of rows instead of individual transactions.
74
+ This enables batch processing (e.g., batch embedding, batch upsert).
75
+ sheet_name: Optional sheet name to read.
76
+ Returns:
77
+ List of Transaction objects.
78
+ """
79
+ if not file_path:
80
+ return []
81
+
82
+ # Create a key to identify this specific data stream
83
+ key = (file_path, filter_col, filter_val, worker_id, max_workers, sheet_name)
84
+
85
+ # Create iterator if needed
86
+ if key not in self._iterators:
87
+ data = self.service.read_excel(
88
+ file_path,
89
+ filter_col,
90
+ filter_val,
91
+ shard_id=worker_id,
92
+ total_shards=max_workers,
93
+ sheet_name=sheet_name,
94
+ )
95
+ self._iterators[key] = iter(data)
96
+
97
+ iterator = self._iterators[key]
98
+ base_metadata = metadata or {}
99
+
100
+ if batch_mode:
101
+ return self._fetch_batch_mode(iterator, key, limit, base_metadata)
102
+ else:
103
+ return self._fetch_normal_mode(iterator, key, limit, base_metadata)
104
+
105
+ def _fetch_batch_mode(
106
+ self,
107
+ iterator,
108
+ key,
109
+ limit: int,
110
+ base_metadata: dict,
111
+ ) -> List[Transaction]:
112
+ """Bundle multiple rows into a single transaction."""
113
+ rows = []
114
+ try:
115
+ for _ in range(limit):
116
+ row = next(iterator)
117
+ rows.append(row)
118
+ except StopIteration:
119
+ pass
120
+
121
+ if not rows:
122
+ if key in self._iterators:
123
+ del self._iterators[key]
124
+ return []
125
+
126
+ batch_id = str(uuid.uuid4())
127
+ return [
128
+ Transaction(
129
+ id=batch_id,
130
+ payload=rows,
131
+ metadata={**base_metadata, "batch_mode": True, "batch_size": len(rows)},
132
+ )
133
+ ]
134
+
135
+ def _fetch_normal_mode(
136
+ self,
137
+ iterator,
138
+ key,
139
+ limit: int,
140
+ base_metadata: dict,
141
+ ) -> List[Transaction]:
142
+ """Return one transaction per row."""
143
+ transactions = []
144
+ try:
145
+ for _ in range(limit):
146
+ row = next(iterator)
147
+ row_id = str(uuid.uuid4())
148
+ transactions.append(
149
+ Transaction(
150
+ id=row_id,
151
+ payload=row,
152
+ metadata=base_metadata,
153
+ )
154
+ )
155
+ except StopIteration:
156
+ pass
157
+
158
+ if not transactions:
159
+ if key in self._iterators:
160
+ del self._iterators[key]
161
+
162
+ return transactions
163
+
164
+ def dispatch_transactions(
165
+ self,
166
+ transactions: List[Transaction],
167
+ *args,
168
+ **kwargs,
169
+ ) -> None:
170
+ """
171
+ Excel connector does not support writing transactions.
172
+ Raises:
173
+ NotImplementedError: Always raised.
174
+ """
175
+ raise NotImplementedError("ExcelConnector does not support producing transactions.")
@@ -0,0 +1,24 @@
1
+ """
2
+ Excel connector models for ergon-framework.
3
+ """
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class ExcelFetchConfig(BaseModel):
11
+ """Configuration for fetching transactions from an Excel file."""
12
+
13
+ file_path: str = Field(..., description="Path to the Excel file")
14
+ filter_col: Optional[str] = Field(None, description="Column name to filter by")
15
+ filter_val: Optional[Any] = Field(None, description="Value to filter for in filter_col")
16
+ sheet_name: Optional[str] = Field(None, description="Sheet name to read (defaults to active sheet)")
17
+ batch_mode: bool = Field(False, description="When True, bundle rows into single transaction")
18
+
19
+
20
+ class ExcelRow(BaseModel):
21
+ """Represents a row from an Excel file."""
22
+
23
+ data: Dict[str, Any] = Field(default_factory=dict)
24
+ row_index: int = Field(..., description="Original row index in the Excel file")
@@ -0,0 +1,98 @@
1
+ """
2
+ Excel service (client) for reading Excel files.
3
+ """
4
+
5
+ import logging
6
+ from typing import Any, Generator, Optional
7
+
8
+ from openpyxl import load_workbook
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ExcelService:
14
+ """
15
+ Service for reading Excel files with streaming support.
16
+ Supports:
17
+ - Streaming row-by-row reading (memory efficient)
18
+ - Column-based filtering
19
+ - Sharding for parallel processing
20
+ """
21
+
22
+ def __init__(self):
23
+ # Cache for loaded data (optional future enhancement)
24
+ self._cache = {}
25
+
26
+ def read_excel(
27
+ self,
28
+ file_path: str,
29
+ filter_col: Optional[str] = None,
30
+ filter_val: Optional[Any] = None,
31
+ shard_id: Optional[int] = None,
32
+ total_shards: Optional[int] = None,
33
+ sheet_name: Optional[str] = None,
34
+ ) -> Generator[dict, None, None]:
35
+ """
36
+ Read Excel file and yield rows as dictionaries.
37
+ Args:
38
+ file_path: Path to the Excel file.
39
+ filter_col: Optional column name to filter by.
40
+ filter_val: Optional value to filter for in filter_col.
41
+ shard_id: Worker shard ID (0-indexed). Used for parallel processing.
42
+ total_shards: Total number of shards/workers. Used for parallel processing.
43
+ sheet_name: Optional sheet name to read (defaults to active sheet).
44
+ When shard_id and total_shards are provided, only rows where
45
+ (matched_index % total_shards == shard_id) are yielded. This allows
46
+ multiple workers to process different partitions of the same file.
47
+ Yields:
48
+ dict: Row data as a dictionary with column headers as keys.
49
+ """
50
+ wb = load_workbook(filename=file_path, read_only=True, data_only=True)
51
+ try:
52
+ # Select sheet
53
+ if sheet_name:
54
+ ws = wb[sheet_name]
55
+ else:
56
+ ws = wb.active
57
+
58
+ if ws is None:
59
+ return
60
+
61
+ rows = ws.rows
62
+
63
+ try:
64
+ headers = [cell.value for cell in next(rows)]
65
+ except StopIteration:
66
+ return
67
+
68
+ sharding_enabled = shard_id is not None and total_shards is not None
69
+ matched_index = 0
70
+ yielded_count = 0
71
+
72
+ logger.debug(f"Sharding enabled: {sharding_enabled}")
73
+ if sharding_enabled:
74
+ logger.info(f"[EXCEL] Worker {shard_id}/{total_shards} starting to read {file_path}")
75
+
76
+ for row in rows:
77
+ row_values = [cell.value for cell in row]
78
+ row_dict = dict(zip(headers, row_values))
79
+
80
+ # Apply filter first
81
+ if filter_col and filter_val:
82
+ val = row_dict.get(filter_col)
83
+ if val != filter_val:
84
+ continue
85
+
86
+ # Apply sharding after filtering
87
+ if sharding_enabled and total_shards is not None and (matched_index % total_shards != shard_id):
88
+ matched_index += 1
89
+ continue
90
+
91
+ matched_index += 1
92
+ yielded_count += 1
93
+ yield row_dict
94
+
95
+ if sharding_enabled:
96
+ logger.info(f"[EXCEL] Worker {shard_id} finished. Matched={matched_index}, Yielded={yielded_count}")
97
+ finally:
98
+ wb.close()
@@ -0,0 +1,21 @@
1
+ from .async_connector import AsyncPipefyConnector
2
+ from .async_service import AsyncPipefyService
3
+ from .connector import PipefyConnector
4
+ from .models import (
5
+ CreateCardInput,
6
+ FieldFilter,
7
+ FieldFilterOperator,
8
+ PipefyClient,
9
+ )
10
+ from .service import PipefyService
11
+
12
+ __all__ = [
13
+ "AsyncPipefyConnector",
14
+ "AsyncPipefyService",
15
+ "PipefyConnector",
16
+ "PipefyService",
17
+ "PipefyClient",
18
+ "CreateCardInput",
19
+ "FieldFilter",
20
+ "FieldFilterOperator",
21
+ ]
@@ -0,0 +1,48 @@
1
+ from typing import List, Optional
2
+
3
+ from ..connector import AsyncConnector
4
+ from ..transaction import Transaction
5
+ from .async_service import AsyncPipefyService
6
+ from .models import PipefyClient
7
+
8
+
9
+ class AsyncPipefyConnector(AsyncConnector):
10
+ service: AsyncPipefyService
11
+
12
+ def __init__(self, client: PipefyClient, *args, **kwargs):
13
+ self.service = AsyncPipefyService(client)
14
+ self._authenticated = False
15
+
16
+ async def _ensure_authenticated(self):
17
+ if not self._authenticated:
18
+ await self.service.authenticate()
19
+ self._authenticated = True
20
+
21
+ async def fetch_transactions_async(
22
+ self, batch_size: int = 1, phase_id: Optional[str] = None, field_filters=None, **kwargs
23
+ ) -> List[Transaction]:
24
+ if not phase_id:
25
+ raise ValueError("AsyncPipefyConnector.fetch requires a phase_id")
26
+
27
+ await self._ensure_authenticated()
28
+
29
+ cards = await self.service.get_next_card(
30
+ phase_id=phase_id, field_filters=field_filters, batch_size=batch_size, **kwargs
31
+ )
32
+ if not cards:
33
+ return []
34
+
35
+ return [Transaction(id=card.get("id"), payload=card) for card in cards]
36
+
37
+ async def dispatch_transactions_async(self, transactions: List[Transaction], *args, **kwargs):
38
+ await self._ensure_authenticated()
39
+
40
+ for transaction in transactions:
41
+ await self.service.create_card(transaction.payload)
42
+ return True
43
+
44
+ async def fetch_transaction_by_id_async(self, transaction_id: str, *args, **kwargs) -> Transaction:
45
+ await self._ensure_authenticated()
46
+
47
+ transaction = await self.service.get_card_by_id(transaction_id)
48
+ return Transaction(id=transaction.get("id"), payload=transaction)