ergon-framework-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ergon/__init__.py +13 -0
- ergon/bootstrap/src/__project__/__init__.py +0 -0
- ergon/bootstrap/src/__project__/_observability/docker-compose.telemetry.yml +124 -0
- ergon/bootstrap/src/__project__/_observability/grafana.yaml +17 -0
- ergon/bootstrap/src/__project__/_observability/loki.yaml +48 -0
- ergon/bootstrap/src/__project__/_observability/otel-collector-config.yaml +53 -0
- ergon/bootstrap/src/__project__/_observability/prometheus.yaml +11 -0
- ergon/bootstrap/src/__project__/_observability/tempo.yaml +24 -0
- ergon/bootstrap/src/__project__/connectors/__init__.py +0 -0
- ergon/bootstrap/src/__project__/main.py +9 -0
- ergon/bootstrap/src/__project__/tasks/__init__.py +0 -0
- ergon/bootstrap/src/__project__/tasks/constants.py +13 -0
- ergon/bootstrap/src/__project__/tasks/example_task/__init__.py +0 -0
- ergon/bootstrap/src/__project__/tasks/example_task/config.py +4 -0
- ergon/bootstrap/src/__project__/tasks/example_task/exceptions.py +4 -0
- ergon/bootstrap/src/__project__/tasks/example_task/helpers.py +4 -0
- ergon/bootstrap/src/__project__/tasks/example_task/schemas.py +5 -0
- ergon/bootstrap/src/__project__/tasks/example_task/task.py +1 -0
- ergon/bootstrap/src/__project__/tasks/exceptions.py +0 -0
- ergon/bootstrap/src/__project__/tasks/helpers.py +0 -0
- ergon/bootstrap/src/__project__/tasks/schemas.py +0 -0
- ergon/bootstrap/src/__project__/tasks/settings.py +5 -0
- ergon/cli.py +174 -0
- ergon/connector/__init__.py +64 -0
- ergon/connector/connector.py +97 -0
- ergon/connector/excel/__init__.py +18 -0
- ergon/connector/excel/connector.py +175 -0
- ergon/connector/excel/models.py +24 -0
- ergon/connector/excel/service.py +98 -0
- ergon/connector/pipefy/__init__.py +21 -0
- ergon/connector/pipefy/async_connector.py +48 -0
- ergon/connector/pipefy/async_service.py +907 -0
- ergon/connector/pipefy/connector.py +36 -0
- ergon/connector/pipefy/models.py +48 -0
- ergon/connector/pipefy/service.py +1016 -0
- ergon/connector/pipefy/version.py +1 -0
- ergon/connector/postgres/__init__.py +11 -0
- ergon/connector/postgres/async_connector.py +119 -0
- ergon/connector/postgres/async_service.py +116 -0
- ergon/connector/postgres/models.py +34 -0
- ergon/connector/rabbitmq/__init__.py +25 -0
- ergon/connector/rabbitmq/async_connector.py +120 -0
- ergon/connector/rabbitmq/async_service.py +417 -0
- ergon/connector/rabbitmq/connector.py +54 -0
- ergon/connector/rabbitmq/helper.py +14 -0
- ergon/connector/rabbitmq/models.py +92 -0
- ergon/connector/rabbitmq/service.py +199 -0
- ergon/connector/sqs/__init__.py +15 -0
- ergon/connector/sqs/async_connector.py +120 -0
- ergon/connector/sqs/async_service.py +246 -0
- ergon/connector/sqs/connector.py +120 -0
- ergon/connector/sqs/models.py +36 -0
- ergon/connector/sqs/service.py +219 -0
- ergon/connector/transaction.py +14 -0
- ergon/py.typed +0 -0
- ergon/service/__init__.py +5 -0
- ergon/service/service.py +17 -0
- ergon/task/__init__.py +13 -0
- ergon/task/base.py +222 -0
- ergon/task/exceptions.py +217 -0
- ergon/task/helpers.py +691 -0
- ergon/task/manager.py +85 -0
- ergon/task/mixins/__init__.py +13 -0
- ergon/task/mixins/consumer.py +858 -0
- ergon/task/mixins/metrics.py +457 -0
- ergon/task/mixins/producer.py +486 -0
- ergon/task/policies.py +229 -0
- ergon/task/runner.py +386 -0
- ergon/task/utils.py +64 -0
- ergon/telemetry/__init__.py +7 -0
- ergon/telemetry/_resource.py +13 -0
- ergon/telemetry/logging.py +370 -0
- ergon/telemetry/metrics.py +101 -0
- ergon/telemetry/tracing.py +152 -0
- ergon/utils/__init__.py +5 -0
- ergon/utils/env.py +26 -0
- ergon_framework_python-0.1.0.dist-info/METADATA +449 -0
- ergon_framework_python-0.1.0.dist-info/RECORD +82 -0
- ergon_framework_python-0.1.0.dist-info/WHEEL +5 -0
- ergon_framework_python-0.1.0.dist-info/entry_points.txt +2 -0
- ergon_framework_python-0.1.0.dist-info/licenses/LICENSE +21 -0
- ergon_framework_python-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any, List, Union
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, model_validator
|
|
5
|
+
|
|
6
|
+
from . import transaction
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# ---------------------------------------------------------
|
|
10
|
+
# SYNC CONNECTOR
|
|
11
|
+
# ---------------------------------------------------------
|
|
12
|
+
class Connector(ABC):
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def fetch_transactions(self, *args, **kwargs) -> List[transaction.Transaction]:
|
|
15
|
+
"""Fetch one or many transactions (sync)."""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def dispatch_transactions(self, transactions: List[transaction.Transaction], *args, **kwargs) -> Any:
|
|
19
|
+
"""Publish a transaction (sync)."""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
def fetch_transaction_by_id(self, transaction_id: str, *args, **kwargs) -> transaction.Transaction:
|
|
23
|
+
"""
|
|
24
|
+
Fetches a transaction by its id for individual processing.
|
|
25
|
+
"""
|
|
26
|
+
raise NotImplementedError
|
|
27
|
+
|
|
28
|
+
def get_transactions_count(self, *args, **kwargs) -> int:
|
|
29
|
+
"""Get the total number of transactions in the connector target system."""
|
|
30
|
+
raise NotImplementedError
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------
|
|
34
|
+
# ASYNC CONNECTOR
|
|
35
|
+
# ---------------------------------------------------------
|
|
36
|
+
class AsyncConnector(ABC):
|
|
37
|
+
@abstractmethod
|
|
38
|
+
async def fetch_transactions_async(self, *args, **kwargs) -> List[transaction.Transaction]:
|
|
39
|
+
"""Fetch transactions asynchronously."""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
async def dispatch_transactions_async(self, transactions: List[transaction.Transaction], *args, **kwargs) -> Any:
|
|
44
|
+
"""Publish a transaction asynchronously."""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
async def fetch_transaction_by_id_async(self, transaction_id: str, *args, **kwargs) -> transaction.Transaction:
|
|
48
|
+
"""Fetches a transaction by its id for individual processing asynchronously."""
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
|
|
51
|
+
async def get_transactions_count_async(self, *args, **kwargs) -> int:
|
|
52
|
+
"""Get the total number of transactions in the connector target system asynchronously."""
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
async def ack_transaction(self, transaction: transaction.Transaction) -> None:
|
|
56
|
+
"""Acknowledge a successfully processed transaction.
|
|
57
|
+
|
|
58
|
+
Default raises NotImplementedError so non-broker connectors (database,
|
|
59
|
+
HTTP, file-based, etc.) can ignore it. Broker connectors that have
|
|
60
|
+
per-message ack semantics (RabbitMQ, SQS, Kafka, ...) must override.
|
|
61
|
+
"""
|
|
62
|
+
raise NotImplementedError
|
|
63
|
+
|
|
64
|
+
async def nack_transaction(self, transaction: transaction.Transaction, requeue: bool = True) -> None:
|
|
65
|
+
"""Negatively acknowledge a transaction.
|
|
66
|
+
|
|
67
|
+
``requeue=True`` asks the broker to redeliver; ``requeue=False`` routes
|
|
68
|
+
to the broker's dead-letter destination if one is configured. Default
|
|
69
|
+
raises NotImplementedError; broker connectors must override.
|
|
70
|
+
"""
|
|
71
|
+
raise NotImplementedError
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------
|
|
75
|
+
# CONNECTOR CONFIG (supports both sync + async)
|
|
76
|
+
# ---------------------------------------------------------
|
|
77
|
+
class ConnectorConfig(BaseModel):
|
|
78
|
+
"""
|
|
79
|
+
Generic connector configuration.
|
|
80
|
+
|
|
81
|
+
connector: class implementing either Connector or AsyncConnector
|
|
82
|
+
args: positional arguments passed to connector's __init__
|
|
83
|
+
kwargs: keyword arguments passed to connector's __init__
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
connector: Union[type[Connector], type[AsyncConnector]]
|
|
87
|
+
args: tuple[Any, ...] = ()
|
|
88
|
+
kwargs: dict[str, Any] = {}
|
|
89
|
+
|
|
90
|
+
@model_validator(mode="after")
|
|
91
|
+
def validate_connector(self) -> "ConnectorConfig":
|
|
92
|
+
"""Validate that the connector inherits from Connector or AsyncConnector."""
|
|
93
|
+
if not issubclass(self.connector, Connector) and not issubclass(self.connector, AsyncConnector):
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Connector '{self.connector}' must inherit from Connector or AsyncConnector. Got: {self.connector}"
|
|
96
|
+
)
|
|
97
|
+
return self
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Excel connector for ergon-framework.
|
|
3
|
+
Provides transaction-based access to Excel files with support for:
|
|
4
|
+
- Batch mode (multiple rows per transaction for high-throughput processing)
|
|
5
|
+
- Normal mode (one row per transaction)
|
|
6
|
+
- Sharding for parallel processing
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .connector import ExcelConnector
|
|
10
|
+
from .models import ExcelFetchConfig, ExcelRow
|
|
11
|
+
from .service import ExcelService
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"ExcelConnector",
|
|
15
|
+
"ExcelService",
|
|
16
|
+
"ExcelFetchConfig",
|
|
17
|
+
"ExcelRow",
|
|
18
|
+
]
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Excel connector for ergon-framework.
|
|
3
|
+
Provides transaction-based access to Excel files with support for:
|
|
4
|
+
- Batch mode (multiple rows per transaction)
|
|
5
|
+
- Normal mode (one row per transaction)
|
|
6
|
+
- Sharding for parallel processing
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import uuid
|
|
11
|
+
from typing import Any, List, Optional
|
|
12
|
+
|
|
13
|
+
from ..connector import Connector
|
|
14
|
+
from ..transaction import Transaction
|
|
15
|
+
from .service import ExcelService
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ExcelConnector(Connector):
|
|
21
|
+
"""
|
|
22
|
+
Connector for reading transactions from Excel files.
|
|
23
|
+
Features:
|
|
24
|
+
- Iterator-based reading (memory efficient)
|
|
25
|
+
- Batch mode for high-throughput processing
|
|
26
|
+
- Automatic sharding support for parallel workers
|
|
27
|
+
Example:
|
|
28
|
+
service = ExcelService()
|
|
29
|
+
connector = ExcelConnector(service)
|
|
30
|
+
transactions = connector.fetch_transactions(
|
|
31
|
+
limit=100,
|
|
32
|
+
file_path="data.xlsx",
|
|
33
|
+
filter_col="Type",
|
|
34
|
+
filter_val="Active",
|
|
35
|
+
batch_mode=True,
|
|
36
|
+
)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, service: ExcelService):
|
|
40
|
+
"""
|
|
41
|
+
Initialize the Excel connector.
|
|
42
|
+
Args:
|
|
43
|
+
service: ExcelService instance for reading Excel files.
|
|
44
|
+
"""
|
|
45
|
+
self.service = service
|
|
46
|
+
self._iterators = {}
|
|
47
|
+
|
|
48
|
+
def fetch_transactions(
|
|
49
|
+
self,
|
|
50
|
+
limit: int = 10,
|
|
51
|
+
file_path: Optional[str] = None,
|
|
52
|
+
filter_col: Optional[str] = None,
|
|
53
|
+
filter_val: Optional[Any] = None,
|
|
54
|
+
metadata: Optional[dict] = None,
|
|
55
|
+
worker_id: Optional[int] = None,
|
|
56
|
+
max_workers: Optional[int] = None,
|
|
57
|
+
batch_mode: bool = False,
|
|
58
|
+
sheet_name: Optional[str] = None,
|
|
59
|
+
*args,
|
|
60
|
+
**kwargs,
|
|
61
|
+
) -> List[Transaction]:
|
|
62
|
+
"""
|
|
63
|
+
Fetch transactions from Excel file.
|
|
64
|
+
Args:
|
|
65
|
+
limit: Number of rows to fetch per call (batch size).
|
|
66
|
+
file_path: Path to Excel file.
|
|
67
|
+
filter_col: Column to filter by.
|
|
68
|
+
filter_val: Value to filter for.
|
|
69
|
+
metadata: Metadata to attach to transactions.
|
|
70
|
+
worker_id: Worker ID for sharding (0-indexed).
|
|
71
|
+
max_workers: Total number of workers for sharding.
|
|
72
|
+
batch_mode: When True, returns a single transaction with payload
|
|
73
|
+
containing a list of rows instead of individual transactions.
|
|
74
|
+
This enables batch processing (e.g., batch embedding, batch upsert).
|
|
75
|
+
sheet_name: Optional sheet name to read.
|
|
76
|
+
Returns:
|
|
77
|
+
List of Transaction objects.
|
|
78
|
+
"""
|
|
79
|
+
if not file_path:
|
|
80
|
+
return []
|
|
81
|
+
|
|
82
|
+
# Create a key to identify this specific data stream
|
|
83
|
+
key = (file_path, filter_col, filter_val, worker_id, max_workers, sheet_name)
|
|
84
|
+
|
|
85
|
+
# Create iterator if needed
|
|
86
|
+
if key not in self._iterators:
|
|
87
|
+
data = self.service.read_excel(
|
|
88
|
+
file_path,
|
|
89
|
+
filter_col,
|
|
90
|
+
filter_val,
|
|
91
|
+
shard_id=worker_id,
|
|
92
|
+
total_shards=max_workers,
|
|
93
|
+
sheet_name=sheet_name,
|
|
94
|
+
)
|
|
95
|
+
self._iterators[key] = iter(data)
|
|
96
|
+
|
|
97
|
+
iterator = self._iterators[key]
|
|
98
|
+
base_metadata = metadata or {}
|
|
99
|
+
|
|
100
|
+
if batch_mode:
|
|
101
|
+
return self._fetch_batch_mode(iterator, key, limit, base_metadata)
|
|
102
|
+
else:
|
|
103
|
+
return self._fetch_normal_mode(iterator, key, limit, base_metadata)
|
|
104
|
+
|
|
105
|
+
def _fetch_batch_mode(
|
|
106
|
+
self,
|
|
107
|
+
iterator,
|
|
108
|
+
key,
|
|
109
|
+
limit: int,
|
|
110
|
+
base_metadata: dict,
|
|
111
|
+
) -> List[Transaction]:
|
|
112
|
+
"""Bundle multiple rows into a single transaction."""
|
|
113
|
+
rows = []
|
|
114
|
+
try:
|
|
115
|
+
for _ in range(limit):
|
|
116
|
+
row = next(iterator)
|
|
117
|
+
rows.append(row)
|
|
118
|
+
except StopIteration:
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
if not rows:
|
|
122
|
+
if key in self._iterators:
|
|
123
|
+
del self._iterators[key]
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
batch_id = str(uuid.uuid4())
|
|
127
|
+
return [
|
|
128
|
+
Transaction(
|
|
129
|
+
id=batch_id,
|
|
130
|
+
payload=rows,
|
|
131
|
+
metadata={**base_metadata, "batch_mode": True, "batch_size": len(rows)},
|
|
132
|
+
)
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
def _fetch_normal_mode(
|
|
136
|
+
self,
|
|
137
|
+
iterator,
|
|
138
|
+
key,
|
|
139
|
+
limit: int,
|
|
140
|
+
base_metadata: dict,
|
|
141
|
+
) -> List[Transaction]:
|
|
142
|
+
"""Return one transaction per row."""
|
|
143
|
+
transactions = []
|
|
144
|
+
try:
|
|
145
|
+
for _ in range(limit):
|
|
146
|
+
row = next(iterator)
|
|
147
|
+
row_id = str(uuid.uuid4())
|
|
148
|
+
transactions.append(
|
|
149
|
+
Transaction(
|
|
150
|
+
id=row_id,
|
|
151
|
+
payload=row,
|
|
152
|
+
metadata=base_metadata,
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
except StopIteration:
|
|
156
|
+
pass
|
|
157
|
+
|
|
158
|
+
if not transactions:
|
|
159
|
+
if key in self._iterators:
|
|
160
|
+
del self._iterators[key]
|
|
161
|
+
|
|
162
|
+
return transactions
|
|
163
|
+
|
|
164
|
+
def dispatch_transactions(
|
|
165
|
+
self,
|
|
166
|
+
transactions: List[Transaction],
|
|
167
|
+
*args,
|
|
168
|
+
**kwargs,
|
|
169
|
+
) -> None:
|
|
170
|
+
"""
|
|
171
|
+
Excel connector does not support writing transactions.
|
|
172
|
+
Raises:
|
|
173
|
+
NotImplementedError: Always raised.
|
|
174
|
+
"""
|
|
175
|
+
raise NotImplementedError("ExcelConnector does not support producing transactions.")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Excel connector models for ergon-framework.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ExcelFetchConfig(BaseModel):
|
|
11
|
+
"""Configuration for fetching transactions from an Excel file."""
|
|
12
|
+
|
|
13
|
+
file_path: str = Field(..., description="Path to the Excel file")
|
|
14
|
+
filter_col: Optional[str] = Field(None, description="Column name to filter by")
|
|
15
|
+
filter_val: Optional[Any] = Field(None, description="Value to filter for in filter_col")
|
|
16
|
+
sheet_name: Optional[str] = Field(None, description="Sheet name to read (defaults to active sheet)")
|
|
17
|
+
batch_mode: bool = Field(False, description="When True, bundle rows into single transaction")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ExcelRow(BaseModel):
|
|
21
|
+
"""Represents a row from an Excel file."""
|
|
22
|
+
|
|
23
|
+
data: Dict[str, Any] = Field(default_factory=dict)
|
|
24
|
+
row_index: int = Field(..., description="Original row index in the Excel file")
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Excel service (client) for reading Excel files.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any, Generator, Optional
|
|
7
|
+
|
|
8
|
+
from openpyxl import load_workbook
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ExcelService:
|
|
14
|
+
"""
|
|
15
|
+
Service for reading Excel files with streaming support.
|
|
16
|
+
Supports:
|
|
17
|
+
- Streaming row-by-row reading (memory efficient)
|
|
18
|
+
- Column-based filtering
|
|
19
|
+
- Sharding for parallel processing
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
# Cache for loaded data (optional future enhancement)
|
|
24
|
+
self._cache = {}
|
|
25
|
+
|
|
26
|
+
def read_excel(
|
|
27
|
+
self,
|
|
28
|
+
file_path: str,
|
|
29
|
+
filter_col: Optional[str] = None,
|
|
30
|
+
filter_val: Optional[Any] = None,
|
|
31
|
+
shard_id: Optional[int] = None,
|
|
32
|
+
total_shards: Optional[int] = None,
|
|
33
|
+
sheet_name: Optional[str] = None,
|
|
34
|
+
) -> Generator[dict, None, None]:
|
|
35
|
+
"""
|
|
36
|
+
Read Excel file and yield rows as dictionaries.
|
|
37
|
+
Args:
|
|
38
|
+
file_path: Path to the Excel file.
|
|
39
|
+
filter_col: Optional column name to filter by.
|
|
40
|
+
filter_val: Optional value to filter for in filter_col.
|
|
41
|
+
shard_id: Worker shard ID (0-indexed). Used for parallel processing.
|
|
42
|
+
total_shards: Total number of shards/workers. Used for parallel processing.
|
|
43
|
+
sheet_name: Optional sheet name to read (defaults to active sheet).
|
|
44
|
+
When shard_id and total_shards are provided, only rows where
|
|
45
|
+
(matched_index % total_shards == shard_id) are yielded. This allows
|
|
46
|
+
multiple workers to process different partitions of the same file.
|
|
47
|
+
Yields:
|
|
48
|
+
dict: Row data as a dictionary with column headers as keys.
|
|
49
|
+
"""
|
|
50
|
+
wb = load_workbook(filename=file_path, read_only=True, data_only=True)
|
|
51
|
+
try:
|
|
52
|
+
# Select sheet
|
|
53
|
+
if sheet_name:
|
|
54
|
+
ws = wb[sheet_name]
|
|
55
|
+
else:
|
|
56
|
+
ws = wb.active
|
|
57
|
+
|
|
58
|
+
if ws is None:
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
rows = ws.rows
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
headers = [cell.value for cell in next(rows)]
|
|
65
|
+
except StopIteration:
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
sharding_enabled = shard_id is not None and total_shards is not None
|
|
69
|
+
matched_index = 0
|
|
70
|
+
yielded_count = 0
|
|
71
|
+
|
|
72
|
+
logger.debug(f"Sharding enabled: {sharding_enabled}")
|
|
73
|
+
if sharding_enabled:
|
|
74
|
+
logger.info(f"[EXCEL] Worker {shard_id}/{total_shards} starting to read {file_path}")
|
|
75
|
+
|
|
76
|
+
for row in rows:
|
|
77
|
+
row_values = [cell.value for cell in row]
|
|
78
|
+
row_dict = dict(zip(headers, row_values))
|
|
79
|
+
|
|
80
|
+
# Apply filter first
|
|
81
|
+
if filter_col and filter_val:
|
|
82
|
+
val = row_dict.get(filter_col)
|
|
83
|
+
if val != filter_val:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# Apply sharding after filtering
|
|
87
|
+
if sharding_enabled and total_shards is not None and (matched_index % total_shards != shard_id):
|
|
88
|
+
matched_index += 1
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
matched_index += 1
|
|
92
|
+
yielded_count += 1
|
|
93
|
+
yield row_dict
|
|
94
|
+
|
|
95
|
+
if sharding_enabled:
|
|
96
|
+
logger.info(f"[EXCEL] Worker {shard_id} finished. Matched={matched_index}, Yielded={yielded_count}")
|
|
97
|
+
finally:
|
|
98
|
+
wb.close()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .async_connector import AsyncPipefyConnector
|
|
2
|
+
from .async_service import AsyncPipefyService
|
|
3
|
+
from .connector import PipefyConnector
|
|
4
|
+
from .models import (
|
|
5
|
+
CreateCardInput,
|
|
6
|
+
FieldFilter,
|
|
7
|
+
FieldFilterOperator,
|
|
8
|
+
PipefyClient,
|
|
9
|
+
)
|
|
10
|
+
from .service import PipefyService
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"AsyncPipefyConnector",
|
|
14
|
+
"AsyncPipefyService",
|
|
15
|
+
"PipefyConnector",
|
|
16
|
+
"PipefyService",
|
|
17
|
+
"PipefyClient",
|
|
18
|
+
"CreateCardInput",
|
|
19
|
+
"FieldFilter",
|
|
20
|
+
"FieldFilterOperator",
|
|
21
|
+
]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from ..connector import AsyncConnector
|
|
4
|
+
from ..transaction import Transaction
|
|
5
|
+
from .async_service import AsyncPipefyService
|
|
6
|
+
from .models import PipefyClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AsyncPipefyConnector(AsyncConnector):
|
|
10
|
+
service: AsyncPipefyService
|
|
11
|
+
|
|
12
|
+
def __init__(self, client: PipefyClient, *args, **kwargs):
|
|
13
|
+
self.service = AsyncPipefyService(client)
|
|
14
|
+
self._authenticated = False
|
|
15
|
+
|
|
16
|
+
async def _ensure_authenticated(self):
|
|
17
|
+
if not self._authenticated:
|
|
18
|
+
await self.service.authenticate()
|
|
19
|
+
self._authenticated = True
|
|
20
|
+
|
|
21
|
+
async def fetch_transactions_async(
|
|
22
|
+
self, batch_size: int = 1, phase_id: Optional[str] = None, field_filters=None, **kwargs
|
|
23
|
+
) -> List[Transaction]:
|
|
24
|
+
if not phase_id:
|
|
25
|
+
raise ValueError("AsyncPipefyConnector.fetch requires a phase_id")
|
|
26
|
+
|
|
27
|
+
await self._ensure_authenticated()
|
|
28
|
+
|
|
29
|
+
cards = await self.service.get_next_card(
|
|
30
|
+
phase_id=phase_id, field_filters=field_filters, batch_size=batch_size, **kwargs
|
|
31
|
+
)
|
|
32
|
+
if not cards:
|
|
33
|
+
return []
|
|
34
|
+
|
|
35
|
+
return [Transaction(id=card.get("id"), payload=card) for card in cards]
|
|
36
|
+
|
|
37
|
+
async def dispatch_transactions_async(self, transactions: List[Transaction], *args, **kwargs):
|
|
38
|
+
await self._ensure_authenticated()
|
|
39
|
+
|
|
40
|
+
for transaction in transactions:
|
|
41
|
+
await self.service.create_card(transaction.payload)
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
async def fetch_transaction_by_id_async(self, transaction_id: str, *args, **kwargs) -> Transaction:
|
|
45
|
+
await self._ensure_authenticated()
|
|
46
|
+
|
|
47
|
+
transaction = await self.service.get_card_by_id(transaction_id)
|
|
48
|
+
return Transaction(id=transaction.get("id"), payload=transaction)
|