bb-integrations-library 3.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bb_integrations_lib/__init__.py +0 -0
- bb_integrations_lib/converters/__init__.py +0 -0
- bb_integrations_lib/gravitate/__init__.py +0 -0
- bb_integrations_lib/gravitate/base_api.py +20 -0
- bb_integrations_lib/gravitate/model.py +29 -0
- bb_integrations_lib/gravitate/pe_api.py +122 -0
- bb_integrations_lib/gravitate/rita_api.py +552 -0
- bb_integrations_lib/gravitate/sd_api.py +572 -0
- bb_integrations_lib/gravitate/testing/TTE/sd/models.py +1398 -0
- bb_integrations_lib/gravitate/testing/TTE/sd/tests/test_models.py +2987 -0
- bb_integrations_lib/gravitate/testing/__init__.py +0 -0
- bb_integrations_lib/gravitate/testing/builder.py +55 -0
- bb_integrations_lib/gravitate/testing/openapi.py +70 -0
- bb_integrations_lib/gravitate/testing/util.py +274 -0
- bb_integrations_lib/mappers/__init__.py +0 -0
- bb_integrations_lib/mappers/prices/__init__.py +0 -0
- bb_integrations_lib/mappers/prices/model.py +106 -0
- bb_integrations_lib/mappers/prices/price_mapper.py +127 -0
- bb_integrations_lib/mappers/prices/protocol.py +20 -0
- bb_integrations_lib/mappers/prices/util.py +61 -0
- bb_integrations_lib/mappers/rita_mapper.py +523 -0
- bb_integrations_lib/models/__init__.py +0 -0
- bb_integrations_lib/models/dtn_supplier_invoice.py +487 -0
- bb_integrations_lib/models/enums.py +28 -0
- bb_integrations_lib/models/pipeline_structs.py +76 -0
- bb_integrations_lib/models/probe/probe_event.py +20 -0
- bb_integrations_lib/models/probe/request_data.py +431 -0
- bb_integrations_lib/models/probe/resume_token.py +7 -0
- bb_integrations_lib/models/rita/audit.py +113 -0
- bb_integrations_lib/models/rita/auth.py +30 -0
- bb_integrations_lib/models/rita/bucket.py +17 -0
- bb_integrations_lib/models/rita/config.py +188 -0
- bb_integrations_lib/models/rita/constants.py +19 -0
- bb_integrations_lib/models/rita/crossroads_entities.py +293 -0
- bb_integrations_lib/models/rita/crossroads_mapping.py +428 -0
- bb_integrations_lib/models/rita/crossroads_monitoring.py +78 -0
- bb_integrations_lib/models/rita/crossroads_network.py +41 -0
- bb_integrations_lib/models/rita/crossroads_rules.py +80 -0
- bb_integrations_lib/models/rita/email.py +39 -0
- bb_integrations_lib/models/rita/issue.py +63 -0
- bb_integrations_lib/models/rita/mapping.py +227 -0
- bb_integrations_lib/models/rita/probe.py +58 -0
- bb_integrations_lib/models/rita/reference_data.py +110 -0
- bb_integrations_lib/models/rita/source_system.py +9 -0
- bb_integrations_lib/models/rita/workers.py +76 -0
- bb_integrations_lib/models/sd/bols_and_drops.py +241 -0
- bb_integrations_lib/models/sd/get_order.py +301 -0
- bb_integrations_lib/models/sd/orders.py +18 -0
- bb_integrations_lib/models/sd_api.py +115 -0
- bb_integrations_lib/pipelines/__init__.py +0 -0
- bb_integrations_lib/pipelines/parsers/__init__.py +0 -0
- bb_integrations_lib/pipelines/parsers/distribution_report/__init__.py +0 -0
- bb_integrations_lib/pipelines/parsers/distribution_report/order_by_site_product_parser.py +50 -0
- bb_integrations_lib/pipelines/parsers/distribution_report/tank_configs_parser.py +47 -0
- bb_integrations_lib/pipelines/parsers/dtn/__init__.py +0 -0
- bb_integrations_lib/pipelines/parsers/dtn/dtn_price_parser.py +102 -0
- bb_integrations_lib/pipelines/parsers/dtn/model.py +79 -0
- bb_integrations_lib/pipelines/parsers/price_engine/__init__.py +0 -0
- bb_integrations_lib/pipelines/parsers/price_engine/parse_accessorials_prices_parser.py +67 -0
- bb_integrations_lib/pipelines/parsers/price_engine/price_file_upload/__init__.py +0 -0
- bb_integrations_lib/pipelines/parsers/price_engine/price_file_upload/price_merge_parser.py +111 -0
- bb_integrations_lib/pipelines/parsers/price_engine/price_file_upload/price_sync_parser.py +107 -0
- bb_integrations_lib/pipelines/parsers/price_engine/price_file_upload/shared.py +81 -0
- bb_integrations_lib/pipelines/parsers/tank_reading_parser.py +155 -0
- bb_integrations_lib/pipelines/parsers/tank_sales_parser.py +144 -0
- bb_integrations_lib/pipelines/shared/__init__.py +0 -0
- bb_integrations_lib/pipelines/shared/allocation_matching.py +227 -0
- bb_integrations_lib/pipelines/shared/bol_allocation.py +2793 -0
- bb_integrations_lib/pipelines/steps/__init__.py +0 -0
- bb_integrations_lib/pipelines/steps/create_accessorials_step.py +80 -0
- bb_integrations_lib/pipelines/steps/distribution_report/__init__.py +0 -0
- bb_integrations_lib/pipelines/steps/distribution_report/distribution_report_datafram_to_raw_data.py +33 -0
- bb_integrations_lib/pipelines/steps/distribution_report/get_model_history_step.py +50 -0
- bb_integrations_lib/pipelines/steps/distribution_report/get_order_by_site_product_step.py +62 -0
- bb_integrations_lib/pipelines/steps/distribution_report/get_tank_configs_step.py +40 -0
- bb_integrations_lib/pipelines/steps/distribution_report/join_distribution_order_dos_step.py +85 -0
- bb_integrations_lib/pipelines/steps/distribution_report/upload_distribution_report_datafram_to_big_query.py +47 -0
- bb_integrations_lib/pipelines/steps/echo_step.py +14 -0
- bb_integrations_lib/pipelines/steps/export_dataframe_to_rawdata_step.py +28 -0
- bb_integrations_lib/pipelines/steps/exporting/__init__.py +0 -0
- bb_integrations_lib/pipelines/steps/exporting/bbd_export_payroll_file_step.py +107 -0
- bb_integrations_lib/pipelines/steps/exporting/bbd_export_readings_step.py +236 -0
- bb_integrations_lib/pipelines/steps/exporting/cargas_wholesale_bundle_upload_step.py +33 -0
- bb_integrations_lib/pipelines/steps/exporting/dataframe_flat_file_export.py +29 -0
- bb_integrations_lib/pipelines/steps/exporting/gcs_bucket_export_file_step.py +34 -0
- bb_integrations_lib/pipelines/steps/exporting/keyvu_export_step.py +356 -0
- bb_integrations_lib/pipelines/steps/exporting/pe_price_export_step.py +238 -0
- bb_integrations_lib/pipelines/steps/exporting/platform_science_order_sync_step.py +500 -0
- bb_integrations_lib/pipelines/steps/exporting/save_rawdata_to_disk.py +15 -0
- bb_integrations_lib/pipelines/steps/exporting/sftp_export_file_step.py +60 -0
- bb_integrations_lib/pipelines/steps/exporting/sftp_export_many_files_step.py +23 -0
- bb_integrations_lib/pipelines/steps/exporting/update_exported_orders_table_step.py +64 -0
- bb_integrations_lib/pipelines/steps/filter_step.py +22 -0
- bb_integrations_lib/pipelines/steps/get_latest_sync_date.py +34 -0
- bb_integrations_lib/pipelines/steps/importing/bbd_import_payroll_step.py +30 -0
- bb_integrations_lib/pipelines/steps/importing/get_order_numbers_to_export_step.py +138 -0
- bb_integrations_lib/pipelines/steps/importing/load_file_to_dataframe_step.py +46 -0
- bb_integrations_lib/pipelines/steps/importing/load_imap_attachment_step.py +172 -0
- bb_integrations_lib/pipelines/steps/importing/pe_bulk_sync_price_structure_step.py +68 -0
- bb_integrations_lib/pipelines/steps/importing/pe_price_merge_step.py +86 -0
- bb_integrations_lib/pipelines/steps/importing/sftp_file_config_step.py +124 -0
- bb_integrations_lib/pipelines/steps/importing/test_exact_file_match.py +57 -0
- bb_integrations_lib/pipelines/steps/null_step.py +15 -0
- bb_integrations_lib/pipelines/steps/pe_integration_job_step.py +32 -0
- bb_integrations_lib/pipelines/steps/processing/__init__.py +0 -0
- bb_integrations_lib/pipelines/steps/processing/archive_gcs_step.py +76 -0
- bb_integrations_lib/pipelines/steps/processing/archive_sftp_step.py +48 -0
- bb_integrations_lib/pipelines/steps/processing/bbd_format_tank_readings_step.py +492 -0
- bb_integrations_lib/pipelines/steps/processing/bbd_upload_prices_step.py +54 -0
- bb_integrations_lib/pipelines/steps/processing/bbd_upload_tank_sales_step.py +124 -0
- bb_integrations_lib/pipelines/steps/processing/bbd_upload_tankreading_step.py +80 -0
- bb_integrations_lib/pipelines/steps/processing/convert_bbd_order_to_cargas_step.py +226 -0
- bb_integrations_lib/pipelines/steps/processing/delete_sftp_step.py +33 -0
- bb_integrations_lib/pipelines/steps/processing/dtn/__init__.py +2 -0
- bb_integrations_lib/pipelines/steps/processing/dtn/convert_dtn_invoice_to_sd_model.py +145 -0
- bb_integrations_lib/pipelines/steps/processing/dtn/parse_dtn_invoice_step.py +38 -0
- bb_integrations_lib/pipelines/steps/processing/file_config_parser_step.py +720 -0
- bb_integrations_lib/pipelines/steps/processing/file_config_parser_step_v2.py +418 -0
- bb_integrations_lib/pipelines/steps/processing/get_sd_price_price_request.py +105 -0
- bb_integrations_lib/pipelines/steps/processing/keyvu_upload_deliveryplan_step.py +39 -0
- bb_integrations_lib/pipelines/steps/processing/mark_orders_exported_in_bbd_step.py +185 -0
- bb_integrations_lib/pipelines/steps/processing/pe_price_rows_processing_step.py +174 -0
- bb_integrations_lib/pipelines/steps/processing/send_process_report_step.py +47 -0
- bb_integrations_lib/pipelines/steps/processing/sftp_renamer_step.py +61 -0
- bb_integrations_lib/pipelines/steps/processing/tank_reading_touchup_steps.py +75 -0
- bb_integrations_lib/pipelines/steps/processing/upload_supplier_invoice_step.py +16 -0
- bb_integrations_lib/pipelines/steps/send_attached_in_rita_email_step.py +44 -0
- bb_integrations_lib/pipelines/steps/send_rita_email_step.py +34 -0
- bb_integrations_lib/pipelines/steps/sleep_step.py +24 -0
- bb_integrations_lib/pipelines/wrappers/__init__.py +0 -0
- bb_integrations_lib/pipelines/wrappers/accessorials_transformation.py +104 -0
- bb_integrations_lib/pipelines/wrappers/distribution_report.py +191 -0
- bb_integrations_lib/pipelines/wrappers/export_tank_readings.py +237 -0
- bb_integrations_lib/pipelines/wrappers/import_tank_readings.py +192 -0
- bb_integrations_lib/pipelines/wrappers/wrapper.py +81 -0
- bb_integrations_lib/protocols/__init__.py +0 -0
- bb_integrations_lib/protocols/flat_file.py +210 -0
- bb_integrations_lib/protocols/gravitate_client.py +104 -0
- bb_integrations_lib/protocols/pipelines.py +697 -0
- bb_integrations_lib/provider/__init__.py +0 -0
- bb_integrations_lib/provider/api/__init__.py +0 -0
- bb_integrations_lib/provider/api/cargas/__init__.py +0 -0
- bb_integrations_lib/provider/api/cargas/client.py +43 -0
- bb_integrations_lib/provider/api/cargas/model.py +49 -0
- bb_integrations_lib/provider/api/cargas/protocol.py +23 -0
- bb_integrations_lib/provider/api/dtn/__init__.py +0 -0
- bb_integrations_lib/provider/api/dtn/client.py +128 -0
- bb_integrations_lib/provider/api/dtn/protocol.py +9 -0
- bb_integrations_lib/provider/api/keyvu/__init__.py +0 -0
- bb_integrations_lib/provider/api/keyvu/client.py +30 -0
- bb_integrations_lib/provider/api/keyvu/model.py +149 -0
- bb_integrations_lib/provider/api/macropoint/__init__.py +0 -0
- bb_integrations_lib/provider/api/macropoint/client.py +28 -0
- bb_integrations_lib/provider/api/macropoint/model.py +40 -0
- bb_integrations_lib/provider/api/pc_miler/__init__.py +0 -0
- bb_integrations_lib/provider/api/pc_miler/client.py +130 -0
- bb_integrations_lib/provider/api/pc_miler/model.py +6 -0
- bb_integrations_lib/provider/api/pc_miler/web_services_apis.py +131 -0
- bb_integrations_lib/provider/api/platform_science/__init__.py +0 -0
- bb_integrations_lib/provider/api/platform_science/client.py +147 -0
- bb_integrations_lib/provider/api/platform_science/model.py +82 -0
- bb_integrations_lib/provider/api/quicktrip/__init__.py +0 -0
- bb_integrations_lib/provider/api/quicktrip/client.py +52 -0
- bb_integrations_lib/provider/api/telapoint/__init__.py +0 -0
- bb_integrations_lib/provider/api/telapoint/client.py +68 -0
- bb_integrations_lib/provider/api/telapoint/model.py +178 -0
- bb_integrations_lib/provider/api/warren_rogers/__init__.py +0 -0
- bb_integrations_lib/provider/api/warren_rogers/client.py +207 -0
- bb_integrations_lib/provider/aws/__init__.py +0 -0
- bb_integrations_lib/provider/aws/s3/__init__.py +0 -0
- bb_integrations_lib/provider/aws/s3/client.py +126 -0
- bb_integrations_lib/provider/ftp/__init__.py +0 -0
- bb_integrations_lib/provider/ftp/client.py +140 -0
- bb_integrations_lib/provider/ftp/interface.py +273 -0
- bb_integrations_lib/provider/ftp/model.py +76 -0
- bb_integrations_lib/provider/imap/__init__.py +0 -0
- bb_integrations_lib/provider/imap/client.py +228 -0
- bb_integrations_lib/provider/imap/model.py +3 -0
- bb_integrations_lib/provider/sqlserver/__init__.py +0 -0
- bb_integrations_lib/provider/sqlserver/client.py +106 -0
- bb_integrations_lib/secrets/__init__.py +4 -0
- bb_integrations_lib/secrets/adapters.py +98 -0
- bb_integrations_lib/secrets/credential_models.py +222 -0
- bb_integrations_lib/secrets/factory.py +85 -0
- bb_integrations_lib/secrets/providers.py +160 -0
- bb_integrations_lib/shared/__init__.py +0 -0
- bb_integrations_lib/shared/exceptions.py +25 -0
- bb_integrations_lib/shared/model.py +1039 -0
- bb_integrations_lib/shared/shared_enums.py +510 -0
- bb_integrations_lib/storage/README.md +236 -0
- bb_integrations_lib/storage/__init__.py +0 -0
- bb_integrations_lib/storage/aws/__init__.py +0 -0
- bb_integrations_lib/storage/aws/s3.py +8 -0
- bb_integrations_lib/storage/defaults.py +72 -0
- bb_integrations_lib/storage/gcs/__init__.py +0 -0
- bb_integrations_lib/storage/gcs/client.py +8 -0
- bb_integrations_lib/storage/gcsmanager/__init__.py +0 -0
- bb_integrations_lib/storage/gcsmanager/client.py +8 -0
- bb_integrations_lib/storage/setup.py +29 -0
- bb_integrations_lib/util/__init__.py +0 -0
- bb_integrations_lib/util/cache/__init__.py +0 -0
- bb_integrations_lib/util/cache/custom_ttl_cache.py +75 -0
- bb_integrations_lib/util/cache/protocol.py +9 -0
- bb_integrations_lib/util/config/__init__.py +0 -0
- bb_integrations_lib/util/config/manager.py +391 -0
- bb_integrations_lib/util/config/model.py +41 -0
- bb_integrations_lib/util/exception_logger/__init__.py +0 -0
- bb_integrations_lib/util/exception_logger/exception_logger.py +146 -0
- bb_integrations_lib/util/exception_logger/test.py +114 -0
- bb_integrations_lib/util/utils.py +364 -0
- bb_integrations_lib/workers/__init__.py +0 -0
- bb_integrations_lib/workers/groups.py +13 -0
- bb_integrations_lib/workers/rpc_worker.py +50 -0
- bb_integrations_lib/workers/topics.py +20 -0
- bb_integrations_library-3.0.11.dist-info/METADATA +59 -0
- bb_integrations_library-3.0.11.dist-info/RECORD +217 -0
- bb_integrations_library-3.0.11.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,697 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys
|
|
3
|
+
from datetime import datetime, UTC
|
|
4
|
+
from typing import Any, Protocol, Tuple, TypeVar, Iterable, runtime_checkable, Optional, \
|
|
5
|
+
AsyncIterable, AsyncGenerator, Type, Union, List, Dict, Generic, Self
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from bb_integrations_lib.gravitate.rita_api import GravitateRitaAPI
|
|
11
|
+
from bb_integrations_lib.mappers.rita_mapper import RitaMapper, RitaAPIMappingProvider, AsyncMappingProvider
|
|
12
|
+
from bb_integrations_lib.models.pipeline_structs import StopBranch, StopPipeline, PipelineContext, \
|
|
13
|
+
NoPipelineData, NoPipelineSourceData
|
|
14
|
+
from bb_integrations_lib.models.rita.audit import CreateReportV2, ProcessReportV2Status, \
|
|
15
|
+
UploadProcessReportFile
|
|
16
|
+
from bb_integrations_lib.models.rita.config import MaxSync
|
|
17
|
+
from bb_integrations_lib.models.rita.issue import IssueCategory
|
|
18
|
+
from bb_integrations_lib.secrets import IntegrationSecretProvider, SecretProvider, RITACredential
|
|
19
|
+
from bb_integrations_lib.secrets.factory import APIFactory
|
|
20
|
+
from bb_integrations_lib.shared.exceptions import StepConfigValidationError, MapperLoadError
|
|
21
|
+
from bb_integrations_lib.shared.model import MappingMode
|
|
22
|
+
from bb_integrations_lib.util.config.manager import GlobalConfigManager
|
|
23
|
+
from bb_integrations_lib.util.utils import CustomJSONEncoder
|
|
24
|
+
|
|
25
|
+
Input = TypeVar("Input")
|
|
26
|
+
Output = TypeVar("Output")
|
|
27
|
+
StepConfig = TypeVar("StepConfig", bound=Optional[BaseModel])
|
|
28
|
+
T = TypeVar("T")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@runtime_checkable
|
|
32
|
+
class Step(Protocol[Input, Output]):
|
|
33
|
+
"""
|
|
34
|
+
Protocol for pipeline steps that process data with optional configuration validation.
|
|
35
|
+
|
|
36
|
+
A Step represents a single unit of work in a job pipeline, such as fetching files,
|
|
37
|
+
transforming data, or uploading results. Each step receives input data, processes it,
|
|
38
|
+
and produces output for the next step in the pipeline.
|
|
39
|
+
|
|
40
|
+
Type Parameters:
|
|
41
|
+
Input: The type of data this step accepts as input
|
|
42
|
+
Output: The type of data this step produces as output
|
|
43
|
+
Config: Optional Pydantic BaseModel subclass for configuration validation, or None
|
|
44
|
+
|
|
45
|
+
Configuration vs Input:
|
|
46
|
+
- Configuration: Static settings provided at step creation (credentials, settings)
|
|
47
|
+
- Input: Dynamic data passed from the previous step during execution
|
|
48
|
+
|
|
49
|
+
Examples:
|
|
50
|
+
Basic step without config validation:
|
|
51
|
+
```python
|
|
52
|
+
class SimpleStep(Step[str, int, None]):
|
|
53
|
+
def __init__(self, step_configuration=None):
|
|
54
|
+
super().__init__(step_configuration)
|
|
55
|
+
|
|
56
|
+
def describe(self) -> str:
|
|
57
|
+
return "Converts string to length"
|
|
58
|
+
|
|
59
|
+
async def execute(self, text: str) -> int:
|
|
60
|
+
return len(text)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Step with Pydantic configuration validation:
|
|
64
|
+
```python
|
|
65
|
+
class DatabaseConfig(BaseModel):
|
|
66
|
+
host: str
|
|
67
|
+
port: int
|
|
68
|
+
database: str
|
|
69
|
+
|
|
70
|
+
class DatabaseStep(Step[dict, list, DatabaseConfig]):
|
|
71
|
+
def __init__(self, step_configuration: dict):
|
|
72
|
+
super().__init__(step_configuration, DatabaseConfig)
|
|
73
|
+
self.validate_config() # Validates against DatabaseConfig BaseModel
|
|
74
|
+
|
|
75
|
+
async def execute(self, query_data: dict) -> list:
|
|
76
|
+
# Access validated config properties: self.config_class.host, etc.
|
|
77
|
+
return await self.fetch_data(query_data)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Step with access to pipeline resources:
|
|
81
|
+
```python
|
|
82
|
+
class APIStep(Step[dict, dict, None]):
|
|
83
|
+
async def execute(self, data: dict) -> dict:
|
|
84
|
+
# Access pipeline resources via context
|
|
85
|
+
api_client = self.pipeline_context.api_clients.some_api
|
|
86
|
+
return await api_client.process(data)
|
|
87
|
+
```
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
def __init__(self, *args, **kwargs):
|
|
91
|
+
"""
|
|
92
|
+
Initialize a pipeline step.
|
|
93
|
+
|
|
94
|
+
Attributes:
|
|
95
|
+
config: Configuration data - dict before validation, BaseModel instance after
|
|
96
|
+
config_class: Pydantic BaseModel subclass (if provided)
|
|
97
|
+
pipeline_context: Context object set by the pipeline during execution
|
|
98
|
+
config_manager: Global configuration manager for accessing secrets/APIs
|
|
99
|
+
"""
|
|
100
|
+
self.pipeline_context: PipelineContext | None = kwargs.get("pipeline_context", None)
|
|
101
|
+
self.config_manager = GlobalConfigManager()
|
|
102
|
+
self.secret_provider: SecretProvider | None = None
|
|
103
|
+
self.api_factory: APIFactory | None = None
|
|
104
|
+
|
|
105
|
+
def describe(self) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Return a human-readable description of what this step does.
|
|
108
|
+
|
|
109
|
+
Used for logging and debugging. Should be concise but descriptive.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Brief description of the step's purpose
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
return "Upload files to FTP server"
|
|
116
|
+
"""
|
|
117
|
+
# example: return "Unsubclassed Step"
|
|
118
|
+
raise NotImplementedError()
|
|
119
|
+
|
|
120
|
+
async def execute(self, i: Input) -> Output:
|
|
121
|
+
"""
|
|
122
|
+
Execute the main work of this pipeline step.
|
|
123
|
+
|
|
124
|
+
This is where the step performs its core functionality, processing
|
|
125
|
+
the input data and producing output for the next step.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
i: Input data from the previous step or initial pipeline input
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Output data to be passed to the next step in the pipeline
|
|
132
|
+
|
|
133
|
+
Note:
|
|
134
|
+
- Can access self.config for configuration data
|
|
135
|
+
- Can access self.pipeline_context for shared pipeline state
|
|
136
|
+
- Should handle errors gracefully or let them bubble up
|
|
137
|
+
"""
|
|
138
|
+
...
|
|
139
|
+
|
|
140
|
+
def set_context(self, context: PipelineContext):
|
|
141
|
+
"""
|
|
142
|
+
Set the pipeline context for this step.
|
|
143
|
+
|
|
144
|
+
Called automatically by the pipeline when the step is added.
|
|
145
|
+
Provides access to shared pipeline state and resources.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
context: Pipeline context containing shared state, logs, and resources
|
|
149
|
+
"""
|
|
150
|
+
self.pipeline_context = context
|
|
151
|
+
|
|
152
|
+
def set_secret_provider(self, sp: SecretProvider):
|
|
153
|
+
"""
|
|
154
|
+
Set the secret provider. Kept separate from pipeline context because it is not a Pydantic model and should not
|
|
155
|
+
be saved accidentally if we were to log context.
|
|
156
|
+
|
|
157
|
+
:param sp: The SecretProvider instance to use for retrieving secrets.
|
|
158
|
+
"""
|
|
159
|
+
self.secret_provider = sp
|
|
160
|
+
self.api_factory = APIFactory(sp)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@runtime_checkable
|
|
164
|
+
class GeneratorStep(Step[Input, Iterable[Output]], Protocol[Input, Output]):
|
|
165
|
+
"""
|
|
166
|
+
Represents a subtask in a job pipeline that is a generator, that is, it can yield values for another
|
|
167
|
+
step to accumulate.
|
|
168
|
+
|
|
169
|
+
A GeneratorStep is a Step[Input, Iterable[Output]] for some Input and Output type.
|
|
170
|
+
|
|
171
|
+
A pipeline that sees a Generator step will call next on that step's generator property, then send the
|
|
172
|
+
output to next step as its input. The next step must either be a GeneratorStep or an AccumulatorStep.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
async def generator(self, i: Input) -> AsyncIterable[Output]:
|
|
176
|
+
"""The generator function for this step. Each step will be called with the Input object."""
|
|
177
|
+
...
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class JobStep(BaseModel):
|
|
181
|
+
step: Any
|
|
182
|
+
id: str
|
|
183
|
+
parent_id: str | None = None
|
|
184
|
+
alt_input: str | None = None # If set, the input of a non-parent ancestor step will be provided to the step instead.
|
|
185
|
+
|
|
186
|
+
class Config:
|
|
187
|
+
arbitrary_types_allowed = True
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class PipelineTenantConfig(BaseModel):
|
|
191
|
+
tenant_name: str
|
|
192
|
+
config_id: str
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@runtime_checkable
|
|
196
|
+
class JobPipeline(Protocol):
|
|
197
|
+
"""
|
|
198
|
+
Protocol for job pipelines that orchestrate multiple steps in sequence.
|
|
199
|
+
|
|
200
|
+
A JobPipeline manages the execution of interconnected steps, handling data flow,
|
|
201
|
+
error management, logging, and reporting. Steps are organized as a tree structure
|
|
202
|
+
where each step processes input and passes output to its children.
|
|
203
|
+
|
|
204
|
+
Features:
|
|
205
|
+
- Sequential and parallel step execution
|
|
206
|
+
- Automatic error handling and retry logic
|
|
207
|
+
- Process reporting to RITA API
|
|
208
|
+
- Issue tracking and reporting
|
|
209
|
+
- Comprehensive logging with multiple sinks
|
|
210
|
+
- Pipeline context sharing between steps
|
|
211
|
+
|
|
212
|
+
Step Organization:
|
|
213
|
+
Steps are defined as a tree where each step has:
|
|
214
|
+
- id: Unique identifier
|
|
215
|
+
- parent_id: ID of parent step (None for root)
|
|
216
|
+
- alt_input: Optional alternative input from ancestor step
|
|
217
|
+
|
|
218
|
+
Examples:
|
|
219
|
+
Basic pipeline:
|
|
220
|
+
```python
|
|
221
|
+
class MyPipeline(JobPipeline):
|
|
222
|
+
def __init__(self):
|
|
223
|
+
steps = [
|
|
224
|
+
{"step": FetchDataStep(), "id": "fetch", "parent_id": None},
|
|
225
|
+
{"step": ProcessStep(), "id": "process", "parent_id": "fetch"},
|
|
226
|
+
{"step": UploadStep(), "id": "upload", "parent_id": "process"}
|
|
227
|
+
]
|
|
228
|
+
super().__init__(steps, initial_input="start_data")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Pipeline with reporting:
|
|
232
|
+
```python
|
|
233
|
+
report_config = PipelineProcessReportConfig(
|
|
234
|
+
config_id="my-config",
|
|
235
|
+
trigger="manual",
|
|
236
|
+
rita_tenant="my-tenant",
|
|
237
|
+
rita_client_id="client-id",
|
|
238
|
+
rita_client_secret="secret"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
pipeline = MyPipeline(
|
|
242
|
+
job_steps=steps,
|
|
243
|
+
process_report_config=report_config,
|
|
244
|
+
catch_step_errors=True
|
|
245
|
+
)
|
|
246
|
+
await pipeline.execute()
|
|
247
|
+
```
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
def __init__(
|
|
251
|
+
self,
|
|
252
|
+
job_steps: list[dict],
|
|
253
|
+
rita_client: GravitateRitaAPI,
|
|
254
|
+
pipeline_name: str,
|
|
255
|
+
pipeline_config_id: str,
|
|
256
|
+
secret_provider: SecretProvider,
|
|
257
|
+
initial_input: Any = None,
|
|
258
|
+
catch_step_errors: bool = False,
|
|
259
|
+
upload_process_report_on_stoppipeline: bool = True,
|
|
260
|
+
send_reports: bool = True
|
|
261
|
+
):
|
|
262
|
+
"""
|
|
263
|
+
Initialize a job pipeline with steps and configuration.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
job_steps: List of step dictionaries, each containing:
|
|
267
|
+
- step: Step instance implementing Step protocol
|
|
268
|
+
- id: Unique string identifier for the step
|
|
269
|
+
- parent_id: ID of parent step (None for root step)
|
|
270
|
+
- alt_input: Optional ancestor step ID to use as input
|
|
271
|
+
initial_input: Data to pass to the root step (default: None)
|
|
272
|
+
catch_step_errors: If True, step errors don't stop the entire pipeline,
|
|
273
|
+
just the current branch (default: False)
|
|
274
|
+
upload_process_report_on_stoppipeline: Whether to upload reports when
|
|
275
|
+
pipeline is stopped early (default: True)
|
|
276
|
+
|
|
277
|
+
Raises:
|
|
278
|
+
RuntimeError: If job_steps is empty
|
|
279
|
+
ValueError: If step tree structure is invalid (cycles, multiple roots, etc.)
|
|
280
|
+
|
|
281
|
+
Attributes:
|
|
282
|
+
job_steps: List of JobStep objects created from input dictionaries
|
|
283
|
+
pending_steps: Queue of steps ready for execution
|
|
284
|
+
saved_outputs: Cache of step outputs for alt_input functionality
|
|
285
|
+
context: Shared pipeline context accessible by all steps
|
|
286
|
+
config_manager: Global configuration manager for accessing secrets/APIs
|
|
287
|
+
"""
|
|
288
|
+
if len(job_steps) == 0:
|
|
289
|
+
raise RuntimeError("Pipelines must have at least 1 step.")
|
|
290
|
+
|
|
291
|
+
self.job_steps = [JobStep(**js) for js in job_steps]
|
|
292
|
+
# Scan the list of job_steps and look for a step that has no parent. If multiple steps have no parent, throw
|
|
293
|
+
# an error. Also throw an error if no step has a null parent.
|
|
294
|
+
check_tree_res = self.check_tree(self.job_steps)
|
|
295
|
+
if check_tree_res is not None:
|
|
296
|
+
raise ValueError(check_tree_res)
|
|
297
|
+
start = [s for s in self.job_steps if s.parent_id is None]
|
|
298
|
+
self.pending_steps: list[Tuple[JobStep, Any]] = [(start[0], initial_input)]
|
|
299
|
+
self.saved_outputs: dict[str, Any] = {}
|
|
300
|
+
self.saved_logs: list[Any] = []
|
|
301
|
+
self.catch_step_errors = catch_step_errors
|
|
302
|
+
self.context = PipelineContext()
|
|
303
|
+
self.rita_client = rita_client
|
|
304
|
+
self.secret_provider = secret_provider
|
|
305
|
+
self.pipeline_name = pipeline_name
|
|
306
|
+
self.pipeline_config_id = pipeline_config_id
|
|
307
|
+
self.upload_process_report_on_stoppipeline = upload_process_report_on_stoppipeline
|
|
308
|
+
self.config_manager = GlobalConfigManager()
|
|
309
|
+
self.send_reports = send_reports
|
|
310
|
+
|
|
311
|
+
# Configure logging - resets existing loguru handlers
|
|
312
|
+
logger.remove()
|
|
313
|
+
# Route anything less than ERROR to stdout,
|
|
314
|
+
logger.add(sink=sys.stdout, filter=lambda record: record["level"].no < 40)
|
|
315
|
+
# anything ERROR and above to stderr
|
|
316
|
+
logger.add(sink=sys.stderr, level="ERROR")
|
|
317
|
+
# Also collect all logs to use in process reporting
|
|
318
|
+
self.collect_handler_id = logger.add(self._collect_log)
|
|
319
|
+
|
|
320
|
+
for js in self.job_steps:
|
|
321
|
+
# Give each step a reference to the pipeline context
|
|
322
|
+
js.step.set_context(self.context)
|
|
323
|
+
# And outside of that context, a reference to the secret provider
|
|
324
|
+
js.step.set_secret_provider(self.secret_provider)
|
|
325
|
+
|
|
326
|
+
def _collect_log(self, log: Any):
|
|
327
|
+
self.saved_logs.append(log)
|
|
328
|
+
self.context.logs.append(log)
|
|
329
|
+
|
|
330
|
+
def check_tree(self, job_steps: list[JobStep]) -> Optional[str]:
|
|
331
|
+
# Starting from the first, I should reach each step in the list once and only once
|
|
332
|
+
first_step = [x for x in job_steps if x.parent_id is None]
|
|
333
|
+
if len(first_step) != 1:
|
|
334
|
+
return "The tree of job steps did not have a unique step with parent_id=None"
|
|
335
|
+
to_visit = first_step
|
|
336
|
+
visited = set()
|
|
337
|
+
while len(to_visit) != 0:
|
|
338
|
+
step = to_visit.pop(0)
|
|
339
|
+
if step.id in visited:
|
|
340
|
+
return f"The tree has a cycle. Step {step.id} was visited twice."
|
|
341
|
+
visited.add(step.id)
|
|
342
|
+
children = [x for x in job_steps if x.parent_id == step.id]
|
|
343
|
+
to_visit.extend(children)
|
|
344
|
+
return None
|
|
345
|
+
|
|
346
|
+
@logger.catch
|
|
347
|
+
async def execute(self):
|
|
348
|
+
"""
|
|
349
|
+
Execute the pipeline by processing all steps in the correct order.
|
|
350
|
+
|
|
351
|
+
Processes steps from the pending_steps queue until empty. Handles both
|
|
352
|
+
regular Steps and GeneratorSteps. Manages error recovery, logging,
|
|
353
|
+
and report generation.
|
|
354
|
+
|
|
355
|
+
Execution Flow:
|
|
356
|
+
1. Pop next step from pending queue
|
|
357
|
+
2. Execute step (regular or generator)
|
|
358
|
+
3. Add child steps to pending queue
|
|
359
|
+
4. Repeat until queue empty or error
|
|
360
|
+
5. Generate final process report
|
|
361
|
+
|
|
362
|
+
Error Handling:
|
|
363
|
+
- StopBranch: Stops current execution branch, continues with other branches
|
|
364
|
+
- StopPipeline: Stops entire pipeline execution
|
|
365
|
+
- Other exceptions: Stops pipeline unless catch_step_errors=True
|
|
366
|
+
|
|
367
|
+
Raises:
|
|
368
|
+
Exception: Any unhandled step execution errors (unless catch_step_errors=True)
|
|
369
|
+
|
|
370
|
+
Note:
|
|
371
|
+
Always calls finish_pipeline() for cleanup and reporting, regardless
|
|
372
|
+
of success or failure.
|
|
373
|
+
"""
|
|
374
|
+
try:
|
|
375
|
+
start_dt = datetime.now(UTC)
|
|
376
|
+
if self.send_reports:
|
|
377
|
+
await self.record_pipeline_start(start_dt)
|
|
378
|
+
|
|
379
|
+
while len(self.pending_steps) > 0:
|
|
380
|
+
jobstep, input = self.pending_steps.pop(0)
|
|
381
|
+
step = jobstep.step
|
|
382
|
+
if isinstance(step, GeneratorStep):
|
|
383
|
+
new_pending_steps = await self.handle_generator_step(jobstep, input)
|
|
384
|
+
elif isinstance(step, Step):
|
|
385
|
+
new_pending_steps = await self.handle_step(jobstep, input)
|
|
386
|
+
else:
|
|
387
|
+
raise RuntimeError(f"Step {step.id} doesn't implement either of Step or GeneratorStep.")
|
|
388
|
+
self.pending_steps = new_pending_steps + self.pending_steps
|
|
389
|
+
await self.finish_pipeline()
|
|
390
|
+
except StopPipeline as e:
|
|
391
|
+
await self.finish_pipeline(e)
|
|
392
|
+
except Exception as e:
|
|
393
|
+
await self.finish_pipeline(e)
|
|
394
|
+
|
|
395
|
+
async def finish_pipeline(self, exc: Exception | None = None):
|
|
396
|
+
"""
|
|
397
|
+
Finalize pipeline execution with cleanup, reporting, and issue tracking.
|
|
398
|
+
|
|
399
|
+
Called automatically at the end of pipeline execution, regardless of success
|
|
400
|
+
or failure. Handles process reporting to RITA, issue reporting, and final
|
|
401
|
+
logging.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
exc: Exception that caused pipeline termination (None for successful completion)
|
|
405
|
+
|
|
406
|
+
Process Report Behavior:
|
|
407
|
+
- Success: Status = 'stop', includes all logs and files
|
|
408
|
+
- StopPipeline: Status = 'stop', may skip report if upload_process_report_on_stoppipeline=False
|
|
409
|
+
- Error: Status = 'error', includes exception details and stack trace
|
|
410
|
+
|
|
411
|
+
Issue Reporting:
|
|
412
|
+
If issue_reporting_config is provided and issues were collected during
|
|
413
|
+
execution, uploads all issues to RITA for tracking.
|
|
414
|
+
|
|
415
|
+
Generated Reports Include:
|
|
416
|
+
- Complete execution logs
|
|
417
|
+
- Files added to context.included_files
|
|
418
|
+
- Pipeline metadata (config_id, trigger, etc.)
|
|
419
|
+
- Issue summaries (if any)
|
|
420
|
+
"""
|
|
421
|
+
halted_early = False
|
|
422
|
+
halted_with_error = False
|
|
423
|
+
if exc is not None and not isinstance(exc, StopPipeline):
|
|
424
|
+
logger.info("Pipeline exited with an error")
|
|
425
|
+
logger.exception(exc)
|
|
426
|
+
halted_with_error = True
|
|
427
|
+
elif exc is not None and isinstance(exc, StopPipeline):
|
|
428
|
+
logger.success("The pipeline was halted early")
|
|
429
|
+
halted_early = True
|
|
430
|
+
else:
|
|
431
|
+
logger.success("Pipeline completed")
|
|
432
|
+
|
|
433
|
+
# No matter how it ended, generate a process report if requested.
|
|
434
|
+
if halted_early and self.upload_process_report_on_stoppipeline == False:
|
|
435
|
+
logger.info("Did not upload process report because pipeline was halted early.")
|
|
436
|
+
else:
|
|
437
|
+
if self.send_reports:
|
|
438
|
+
logger.info("Creating process report on RITA...")
|
|
439
|
+
await self.rita_client.create_process_report(CreateReportV2(
|
|
440
|
+
trigger=self.pipeline_name,
|
|
441
|
+
status=ProcessReportV2Status.error if halted_with_error else ProcessReportV2Status.stop,
|
|
442
|
+
config_id=self.pipeline_config_id,
|
|
443
|
+
# Logs are one list item per line, newlines already included. Join into one string.
|
|
444
|
+
log=UploadProcessReportFile(file_base_name=f"log", content="".join(self.context.logs)),
|
|
445
|
+
included_files=[
|
|
446
|
+
UploadProcessReportFile(file_base_name=name, content=content)
|
|
447
|
+
for name, content in self.context.included_files.items()
|
|
448
|
+
]
|
|
449
|
+
))
|
|
450
|
+
logger.info("Uploaded process report")
|
|
451
|
+
else:
|
|
452
|
+
logger.info("Not sending process report (send_reports=False)")
|
|
453
|
+
|
|
454
|
+
# If there were any issues reported, upload those to RITA.
|
|
455
|
+
if self.send_reports:
|
|
456
|
+
if len(self.context.issues) > 0:
|
|
457
|
+
logger.info("Recording issues on RITA...")
|
|
458
|
+
# Prepend the pipeline name to issue keys
|
|
459
|
+
for issue in self.context.issues:
|
|
460
|
+
issue.key = f"{self.pipeline_name}__{issue.key}"
|
|
461
|
+
await self.rita_client.record_many_issues(self.context.issues)
|
|
462
|
+
logger.info("Recorded issues")
|
|
463
|
+
else:
|
|
464
|
+
logger.info("No issues to record")
|
|
465
|
+
else:
|
|
466
|
+
logger.info("Not recording issues (send_reports=False)")
|
|
467
|
+
|
|
468
|
+
async def handle_step(self, jobstep: JobStep, input: Input):
|
|
469
|
+
id = jobstep.id
|
|
470
|
+
description = jobstep.step.describe()
|
|
471
|
+
logger.info(f"Running step {id}: {description}")
|
|
472
|
+
if jobstep.alt_input:
|
|
473
|
+
self.check_if_ancestor(jobstep.id, jobstep.alt_input)
|
|
474
|
+
alt_input = self.saved_outputs[jobstep.alt_input]
|
|
475
|
+
self.context.previous_output = input
|
|
476
|
+
output_coroutine = jobstep.step.execute(alt_input)
|
|
477
|
+
else:
|
|
478
|
+
self.context.previous_output = None
|
|
479
|
+
output_coroutine = jobstep.step.execute(input)
|
|
480
|
+
try:
|
|
481
|
+
output = await output_coroutine
|
|
482
|
+
self.maybe_save_output(jobstep, output)
|
|
483
|
+
new_pending = [(s, output) for s in self.job_steps if s.parent_id == jobstep.id]
|
|
484
|
+
return new_pending
|
|
485
|
+
except StopBranch:
|
|
486
|
+
logger.info(f"Branch execution stopped at step {id} due to StopBranch exception")
|
|
487
|
+
return []
|
|
488
|
+
except StopPipeline as e:
|
|
489
|
+
raise e
|
|
490
|
+
|
|
491
|
+
except NoPipelineData as npd:
|
|
492
|
+
raise npd
|
|
493
|
+
|
|
494
|
+
except NoPipelineSourceData as npsd:
|
|
495
|
+
raise npsd
|
|
496
|
+
|
|
497
|
+
except Exception as e:
|
|
498
|
+
if self.catch_step_errors:
|
|
499
|
+
# If we get an error in the step, we don't want to cancel the whole pipeline, just this "branch" of
|
|
500
|
+
# execution. (On a pipeline without generator steps, there isn't any difference, but on a pipeline with
|
|
501
|
+
# generator steps this will just move to the next invocation of the generator step.) Due to the
|
|
502
|
+
# architecture of the pipeline we can "cancel" the current branch by simply returning no next jobsteps
|
|
503
|
+
# to execute.
|
|
504
|
+
logger.exception(e)
|
|
505
|
+
logger.warning("Exception encountered; canceling further execution on this branch.")
|
|
506
|
+
return []
|
|
507
|
+
else:
|
|
508
|
+
raise e
|
|
509
|
+
|
|
510
|
+
async def handle_generator_step(self, jobstep: JobStep, input: Input | AsyncGenerator):
|
|
511
|
+
id = jobstep.id
|
|
512
|
+
description = jobstep.step.describe()
|
|
513
|
+
logger.info(f"Generating next output for step {id}: {description}")
|
|
514
|
+
|
|
515
|
+
if isinstance(input, AsyncGenerator):
|
|
516
|
+
# This is a paused generator. The input parameter is the currently paused generator
|
|
517
|
+
generator = input
|
|
518
|
+
else:
|
|
519
|
+
# This is not a paused generator. The input is a real input, and we need to construct the generator.
|
|
520
|
+
assert isinstance(jobstep.step, GeneratorStep)
|
|
521
|
+
if jobstep.alt_input:
|
|
522
|
+
self.check_if_ancestor(jobstep.id, jobstep.alt_input)
|
|
523
|
+
alt_input = self.saved_outputs[jobstep.alt_input]
|
|
524
|
+
self.context.previous_output = input
|
|
525
|
+
generator = jobstep.step.generator(alt_input)
|
|
526
|
+
else:
|
|
527
|
+
self.context.previous_output = None
|
|
528
|
+
generator = jobstep.step.generator(input)
|
|
529
|
+
|
|
530
|
+
# Run the generator once and get the output, or catch a StopIteration
|
|
531
|
+
try:
|
|
532
|
+
next_output = await anext(generator)
|
|
533
|
+
self.maybe_save_output(jobstep, next_output)
|
|
534
|
+
new_pending_steps = [(s, next_output) for s in self.job_steps if s.parent_id == jobstep.id]
|
|
535
|
+
|
|
536
|
+
# Put the currently executing step at the back of the new_pending_steps list. It will execute once all of
|
|
537
|
+
# the child steps of this job are done. We store the generator instead of the input so we can pick up where
|
|
538
|
+
# execution left off.
|
|
539
|
+
new_pending_steps += [(jobstep, generator)]
|
|
540
|
+
return new_pending_steps
|
|
541
|
+
except StopAsyncIteration:
|
|
542
|
+
logger.info(f"Generator for step {id} completed.")
|
|
543
|
+
return []
|
|
544
|
+
|
|
545
|
+
def maybe_save_output(self, jobstep: JobStep, data: Any):
|
|
546
|
+
id = jobstep.id
|
|
547
|
+
alt_input_consumers = [s for s in self.job_steps if s.alt_input == id]
|
|
548
|
+
if len(alt_input_consumers) > 0:
|
|
549
|
+
self.saved_outputs[id] = data
|
|
550
|
+
|
|
551
|
+
def check_if_ancestor(self, id: str, ancestor_id: str):
|
|
552
|
+
if ancestor_id not in {s.id for s in self.job_steps}:
|
|
553
|
+
raise RuntimeError(f"{ancestor_id} is not a step in this pipeline.")
|
|
554
|
+
jobstep = [s for s in self.job_steps if s.id == id][0]
|
|
555
|
+
parent = [s for s in self.job_steps if s.id == jobstep.parent_id][0]
|
|
556
|
+
while parent is not None:
|
|
557
|
+
if parent.id == ancestor_id:
|
|
558
|
+
return
|
|
559
|
+
else:
|
|
560
|
+
parent = [s for s in self.job_steps if s.id == parent.parent_id][0]
|
|
561
|
+
raise RuntimeError("A step tried to use input from a non-ancestor step. This is unsupported.")
|
|
562
|
+
|
|
563
|
+
async def record_pipeline_start(self, dt: datetime):
|
|
564
|
+
max_sync = MaxSync(
|
|
565
|
+
max_sync_date=dt,
|
|
566
|
+
context={
|
|
567
|
+
"pipeline_name": self.__class__.__name__,
|
|
568
|
+
}
|
|
569
|
+
)
|
|
570
|
+
try:
|
|
571
|
+
latest = await self.rita_client.get_config_max_sync(config_id=self.pipeline_config_id)
|
|
572
|
+
latest_sync_json = latest.json()
|
|
573
|
+
self.context.max_sync = MaxSync.model_validate(latest_sync_json) if latest_sync_json is not None else None
|
|
574
|
+
await self.rita_client.update_config_max_sync(
|
|
575
|
+
config_id=self.pipeline_config_id,
|
|
576
|
+
max_sync=max_sync,
|
|
577
|
+
)
|
|
578
|
+
logger.info(f"Pipeline {self.__class__.__name__} started at {dt}")
|
|
579
|
+
except Exception as e:
|
|
580
|
+
logger.exception(e)
|
|
581
|
+
logger.warning(f"Failed to record pipeline start for {self.__class__.__name__}")
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
class FileParser(Protocol):
|
|
585
|
+
"""Protocol for file parsing implementations with RITA integration."""
|
|
586
|
+
|
|
587
|
+
def get_records(self, rd: T) -> List[Dict]:
|
|
588
|
+
"""Parse file data into records based on configuration."""
|
|
589
|
+
...
|
|
590
|
+
|
|
591
|
+
def get_translated_records(self, rd: T) -> Tuple[List[Dict], List[Dict]]:
|
|
592
|
+
"""Parse and translate file records, returning (records, errors)."""
|
|
593
|
+
...
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
@runtime_checkable
|
|
597
|
+
class ParserBase(Protocol):
|
|
598
|
+
def __init__(self, source_system: str = None, **kwargs):
|
|
599
|
+
self.source_system = source_system
|
|
600
|
+
|
|
601
|
+
async def load_mapper(self) -> RitaMapper:
|
|
602
|
+
"""Load Rita Mapper"""
|
|
603
|
+
...
|
|
604
|
+
|
|
605
|
+
async def parse(self, data: T, mapping_type: MappingMode | None = None) -> T:
|
|
606
|
+
"""Parse Data"""
|
|
607
|
+
...
|
|
608
|
+
|
|
609
|
+
def get_issues(self) -> list[dict]:
|
|
610
|
+
"""Retrieve issues stored over the course of the parser run."""
|
|
611
|
+
...
|
|
612
|
+
|
|
613
|
+
def record_issue(self, **kwargs):
|
|
614
|
+
"""Record an issue for later retrieval."""
|
|
615
|
+
...
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
class Parser(Generic[T]):
|
|
619
|
+
"""
|
|
620
|
+
Mixin class providing RITA mapper functionality for parser implementations.
|
|
621
|
+
This class provides common RITA integration functionality that can be inherited
|
|
622
|
+
by various parser classes. It handles mapper loading, credential management,
|
|
623
|
+
and issue recording.
|
|
624
|
+
|
|
625
|
+
Attributes:
|
|
626
|
+
source_system: Identifier for the source system
|
|
627
|
+
mapping_provider: Optional mapping provider (can be used in testing).
|
|
628
|
+
"""
|
|
629
|
+
|
|
630
|
+
def __init__(self,
|
|
631
|
+
source_system: str | None = None,
|
|
632
|
+
mapping_provider: Optional[AsyncMappingProvider] = None
|
|
633
|
+
):
|
|
634
|
+
"""
|
|
635
|
+
Initialize RITA parser mixin.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
source_system: Source system identifier for RITA mapping
|
|
639
|
+
mapping_provider: Provides mappings from RITA. Required if source_system is specified.
|
|
640
|
+
"""
|
|
641
|
+
self.source_system = source_system
|
|
642
|
+
self.mapping_provider = mapping_provider
|
|
643
|
+
self._issue_parts = []
|
|
644
|
+
self.logs = {}
|
|
645
|
+
|
|
646
|
+
async def load_mapper(self) -> Optional[RitaMapper]:
|
|
647
|
+
"""
|
|
648
|
+
Load and initialize RITA mapper for the configured source system.
|
|
649
|
+
|
|
650
|
+
Uses the tenant's RITA credentials to create a mapper instance and
|
|
651
|
+
loads the mapping configuration asynchronously.
|
|
652
|
+
|
|
653
|
+
Returns:
|
|
654
|
+
Initialized RitaMapper instance with loaded mappings
|
|
655
|
+
|
|
656
|
+
Raises:
|
|
657
|
+
MapperLoadError: If mapper initialization or loading fails
|
|
658
|
+
"""
|
|
659
|
+
if not self.source_system:
|
|
660
|
+
logger.warning("No source system configured for RITA mapper, skipping.")
|
|
661
|
+
return None
|
|
662
|
+
try:
|
|
663
|
+
mapper = RitaMapper(
|
|
664
|
+
provider=self.mapping_provider,
|
|
665
|
+
source_system=self.source_system
|
|
666
|
+
)
|
|
667
|
+
await mapper.load_mappings_async()
|
|
668
|
+
return mapper
|
|
669
|
+
except Exception as e:
|
|
670
|
+
msg = f"Failed to load mapper for source system '{self.source_system}': {e}"
|
|
671
|
+
logger.error(msg)
|
|
672
|
+
raise MapperLoadError(msg) from e
|
|
673
|
+
|
|
674
|
+
async def parse(self, data: T, mapping_type: MappingMode | None = None) -> T:
|
|
675
|
+
"""Custom Parer implementation. Must be overridden by subclasses."""
|
|
676
|
+
...
|
|
677
|
+
|
|
678
|
+
def get_issues(self) -> list[dict]:
|
|
679
|
+
"""Retrieve issues stored over the course of the parser run."""
|
|
680
|
+
return self._issue_parts
|
|
681
|
+
|
|
682
|
+
def get_logs(self) -> str:
|
|
683
|
+
return json.dumps(self.logs, cls=CustomJSONEncoder)
|
|
684
|
+
|
|
685
|
+
def record_issue(self, name: str,
|
|
686
|
+
category: IssueCategory,
|
|
687
|
+
problem_short: str,
|
|
688
|
+
problem_long: str,
|
|
689
|
+
key_suffix: Optional[str] = None):
|
|
690
|
+
"""Record an issue encountered during parser for later retrieval by the caller."""
|
|
691
|
+
self._issue_parts.append({
|
|
692
|
+
"key_suffix": key_suffix,
|
|
693
|
+
"name": name,
|
|
694
|
+
"category": category,
|
|
695
|
+
"problem_short": problem_short,
|
|
696
|
+
"problem_long": problem_long,
|
|
697
|
+
})
|
|
File without changes
|