bb-integrations-library 3.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. bb_integrations_lib/__init__.py +0 -0
  2. bb_integrations_lib/converters/__init__.py +0 -0
  3. bb_integrations_lib/gravitate/__init__.py +0 -0
  4. bb_integrations_lib/gravitate/base_api.py +20 -0
  5. bb_integrations_lib/gravitate/model.py +29 -0
  6. bb_integrations_lib/gravitate/pe_api.py +122 -0
  7. bb_integrations_lib/gravitate/rita_api.py +552 -0
  8. bb_integrations_lib/gravitate/sd_api.py +572 -0
  9. bb_integrations_lib/gravitate/testing/TTE/sd/models.py +1398 -0
  10. bb_integrations_lib/gravitate/testing/TTE/sd/tests/test_models.py +2987 -0
  11. bb_integrations_lib/gravitate/testing/__init__.py +0 -0
  12. bb_integrations_lib/gravitate/testing/builder.py +55 -0
  13. bb_integrations_lib/gravitate/testing/openapi.py +70 -0
  14. bb_integrations_lib/gravitate/testing/util.py +274 -0
  15. bb_integrations_lib/mappers/__init__.py +0 -0
  16. bb_integrations_lib/mappers/prices/__init__.py +0 -0
  17. bb_integrations_lib/mappers/prices/model.py +106 -0
  18. bb_integrations_lib/mappers/prices/price_mapper.py +127 -0
  19. bb_integrations_lib/mappers/prices/protocol.py +20 -0
  20. bb_integrations_lib/mappers/prices/util.py +61 -0
  21. bb_integrations_lib/mappers/rita_mapper.py +523 -0
  22. bb_integrations_lib/models/__init__.py +0 -0
  23. bb_integrations_lib/models/dtn_supplier_invoice.py +487 -0
  24. bb_integrations_lib/models/enums.py +28 -0
  25. bb_integrations_lib/models/pipeline_structs.py +76 -0
  26. bb_integrations_lib/models/probe/probe_event.py +20 -0
  27. bb_integrations_lib/models/probe/request_data.py +431 -0
  28. bb_integrations_lib/models/probe/resume_token.py +7 -0
  29. bb_integrations_lib/models/rita/audit.py +113 -0
  30. bb_integrations_lib/models/rita/auth.py +30 -0
  31. bb_integrations_lib/models/rita/bucket.py +17 -0
  32. bb_integrations_lib/models/rita/config.py +188 -0
  33. bb_integrations_lib/models/rita/constants.py +19 -0
  34. bb_integrations_lib/models/rita/crossroads_entities.py +293 -0
  35. bb_integrations_lib/models/rita/crossroads_mapping.py +428 -0
  36. bb_integrations_lib/models/rita/crossroads_monitoring.py +78 -0
  37. bb_integrations_lib/models/rita/crossroads_network.py +41 -0
  38. bb_integrations_lib/models/rita/crossroads_rules.py +80 -0
  39. bb_integrations_lib/models/rita/email.py +39 -0
  40. bb_integrations_lib/models/rita/issue.py +63 -0
  41. bb_integrations_lib/models/rita/mapping.py +227 -0
  42. bb_integrations_lib/models/rita/probe.py +58 -0
  43. bb_integrations_lib/models/rita/reference_data.py +110 -0
  44. bb_integrations_lib/models/rita/source_system.py +9 -0
  45. bb_integrations_lib/models/rita/workers.py +76 -0
  46. bb_integrations_lib/models/sd/bols_and_drops.py +241 -0
  47. bb_integrations_lib/models/sd/get_order.py +301 -0
  48. bb_integrations_lib/models/sd/orders.py +18 -0
  49. bb_integrations_lib/models/sd_api.py +115 -0
  50. bb_integrations_lib/pipelines/__init__.py +0 -0
  51. bb_integrations_lib/pipelines/parsers/__init__.py +0 -0
  52. bb_integrations_lib/pipelines/parsers/distribution_report/__init__.py +0 -0
  53. bb_integrations_lib/pipelines/parsers/distribution_report/order_by_site_product_parser.py +50 -0
  54. bb_integrations_lib/pipelines/parsers/distribution_report/tank_configs_parser.py +47 -0
  55. bb_integrations_lib/pipelines/parsers/dtn/__init__.py +0 -0
  56. bb_integrations_lib/pipelines/parsers/dtn/dtn_price_parser.py +102 -0
  57. bb_integrations_lib/pipelines/parsers/dtn/model.py +79 -0
  58. bb_integrations_lib/pipelines/parsers/price_engine/__init__.py +0 -0
  59. bb_integrations_lib/pipelines/parsers/price_engine/parse_accessorials_prices_parser.py +67 -0
  60. bb_integrations_lib/pipelines/parsers/price_engine/price_file_upload/__init__.py +0 -0
  61. bb_integrations_lib/pipelines/parsers/price_engine/price_file_upload/price_merge_parser.py +111 -0
  62. bb_integrations_lib/pipelines/parsers/price_engine/price_file_upload/price_sync_parser.py +107 -0
  63. bb_integrations_lib/pipelines/parsers/price_engine/price_file_upload/shared.py +81 -0
  64. bb_integrations_lib/pipelines/parsers/tank_reading_parser.py +155 -0
  65. bb_integrations_lib/pipelines/parsers/tank_sales_parser.py +144 -0
  66. bb_integrations_lib/pipelines/shared/__init__.py +0 -0
  67. bb_integrations_lib/pipelines/shared/allocation_matching.py +227 -0
  68. bb_integrations_lib/pipelines/shared/bol_allocation.py +2793 -0
  69. bb_integrations_lib/pipelines/steps/__init__.py +0 -0
  70. bb_integrations_lib/pipelines/steps/create_accessorials_step.py +80 -0
  71. bb_integrations_lib/pipelines/steps/distribution_report/__init__.py +0 -0
  72. bb_integrations_lib/pipelines/steps/distribution_report/distribution_report_datafram_to_raw_data.py +33 -0
  73. bb_integrations_lib/pipelines/steps/distribution_report/get_model_history_step.py +50 -0
  74. bb_integrations_lib/pipelines/steps/distribution_report/get_order_by_site_product_step.py +62 -0
  75. bb_integrations_lib/pipelines/steps/distribution_report/get_tank_configs_step.py +40 -0
  76. bb_integrations_lib/pipelines/steps/distribution_report/join_distribution_order_dos_step.py +85 -0
  77. bb_integrations_lib/pipelines/steps/distribution_report/upload_distribution_report_datafram_to_big_query.py +47 -0
  78. bb_integrations_lib/pipelines/steps/echo_step.py +14 -0
  79. bb_integrations_lib/pipelines/steps/export_dataframe_to_rawdata_step.py +28 -0
  80. bb_integrations_lib/pipelines/steps/exporting/__init__.py +0 -0
  81. bb_integrations_lib/pipelines/steps/exporting/bbd_export_payroll_file_step.py +107 -0
  82. bb_integrations_lib/pipelines/steps/exporting/bbd_export_readings_step.py +236 -0
  83. bb_integrations_lib/pipelines/steps/exporting/cargas_wholesale_bundle_upload_step.py +33 -0
  84. bb_integrations_lib/pipelines/steps/exporting/dataframe_flat_file_export.py +29 -0
  85. bb_integrations_lib/pipelines/steps/exporting/gcs_bucket_export_file_step.py +34 -0
  86. bb_integrations_lib/pipelines/steps/exporting/keyvu_export_step.py +356 -0
  87. bb_integrations_lib/pipelines/steps/exporting/pe_price_export_step.py +238 -0
  88. bb_integrations_lib/pipelines/steps/exporting/platform_science_order_sync_step.py +500 -0
  89. bb_integrations_lib/pipelines/steps/exporting/save_rawdata_to_disk.py +15 -0
  90. bb_integrations_lib/pipelines/steps/exporting/sftp_export_file_step.py +60 -0
  91. bb_integrations_lib/pipelines/steps/exporting/sftp_export_many_files_step.py +23 -0
  92. bb_integrations_lib/pipelines/steps/exporting/update_exported_orders_table_step.py +64 -0
  93. bb_integrations_lib/pipelines/steps/filter_step.py +22 -0
  94. bb_integrations_lib/pipelines/steps/get_latest_sync_date.py +34 -0
  95. bb_integrations_lib/pipelines/steps/importing/bbd_import_payroll_step.py +30 -0
  96. bb_integrations_lib/pipelines/steps/importing/get_order_numbers_to_export_step.py +138 -0
  97. bb_integrations_lib/pipelines/steps/importing/load_file_to_dataframe_step.py +46 -0
  98. bb_integrations_lib/pipelines/steps/importing/load_imap_attachment_step.py +172 -0
  99. bb_integrations_lib/pipelines/steps/importing/pe_bulk_sync_price_structure_step.py +68 -0
  100. bb_integrations_lib/pipelines/steps/importing/pe_price_merge_step.py +86 -0
  101. bb_integrations_lib/pipelines/steps/importing/sftp_file_config_step.py +124 -0
  102. bb_integrations_lib/pipelines/steps/importing/test_exact_file_match.py +57 -0
  103. bb_integrations_lib/pipelines/steps/null_step.py +15 -0
  104. bb_integrations_lib/pipelines/steps/pe_integration_job_step.py +32 -0
  105. bb_integrations_lib/pipelines/steps/processing/__init__.py +0 -0
  106. bb_integrations_lib/pipelines/steps/processing/archive_gcs_step.py +76 -0
  107. bb_integrations_lib/pipelines/steps/processing/archive_sftp_step.py +48 -0
  108. bb_integrations_lib/pipelines/steps/processing/bbd_format_tank_readings_step.py +492 -0
  109. bb_integrations_lib/pipelines/steps/processing/bbd_upload_prices_step.py +54 -0
  110. bb_integrations_lib/pipelines/steps/processing/bbd_upload_tank_sales_step.py +124 -0
  111. bb_integrations_lib/pipelines/steps/processing/bbd_upload_tankreading_step.py +80 -0
  112. bb_integrations_lib/pipelines/steps/processing/convert_bbd_order_to_cargas_step.py +226 -0
  113. bb_integrations_lib/pipelines/steps/processing/delete_sftp_step.py +33 -0
  114. bb_integrations_lib/pipelines/steps/processing/dtn/__init__.py +2 -0
  115. bb_integrations_lib/pipelines/steps/processing/dtn/convert_dtn_invoice_to_sd_model.py +145 -0
  116. bb_integrations_lib/pipelines/steps/processing/dtn/parse_dtn_invoice_step.py +38 -0
  117. bb_integrations_lib/pipelines/steps/processing/file_config_parser_step.py +720 -0
  118. bb_integrations_lib/pipelines/steps/processing/file_config_parser_step_v2.py +418 -0
  119. bb_integrations_lib/pipelines/steps/processing/get_sd_price_price_request.py +105 -0
  120. bb_integrations_lib/pipelines/steps/processing/keyvu_upload_deliveryplan_step.py +39 -0
  121. bb_integrations_lib/pipelines/steps/processing/mark_orders_exported_in_bbd_step.py +185 -0
  122. bb_integrations_lib/pipelines/steps/processing/pe_price_rows_processing_step.py +174 -0
  123. bb_integrations_lib/pipelines/steps/processing/send_process_report_step.py +47 -0
  124. bb_integrations_lib/pipelines/steps/processing/sftp_renamer_step.py +61 -0
  125. bb_integrations_lib/pipelines/steps/processing/tank_reading_touchup_steps.py +75 -0
  126. bb_integrations_lib/pipelines/steps/processing/upload_supplier_invoice_step.py +16 -0
  127. bb_integrations_lib/pipelines/steps/send_attached_in_rita_email_step.py +44 -0
  128. bb_integrations_lib/pipelines/steps/send_rita_email_step.py +34 -0
  129. bb_integrations_lib/pipelines/steps/sleep_step.py +24 -0
  130. bb_integrations_lib/pipelines/wrappers/__init__.py +0 -0
  131. bb_integrations_lib/pipelines/wrappers/accessorials_transformation.py +104 -0
  132. bb_integrations_lib/pipelines/wrappers/distribution_report.py +191 -0
  133. bb_integrations_lib/pipelines/wrappers/export_tank_readings.py +237 -0
  134. bb_integrations_lib/pipelines/wrappers/import_tank_readings.py +192 -0
  135. bb_integrations_lib/pipelines/wrappers/wrapper.py +81 -0
  136. bb_integrations_lib/protocols/__init__.py +0 -0
  137. bb_integrations_lib/protocols/flat_file.py +210 -0
  138. bb_integrations_lib/protocols/gravitate_client.py +104 -0
  139. bb_integrations_lib/protocols/pipelines.py +697 -0
  140. bb_integrations_lib/provider/__init__.py +0 -0
  141. bb_integrations_lib/provider/api/__init__.py +0 -0
  142. bb_integrations_lib/provider/api/cargas/__init__.py +0 -0
  143. bb_integrations_lib/provider/api/cargas/client.py +43 -0
  144. bb_integrations_lib/provider/api/cargas/model.py +49 -0
  145. bb_integrations_lib/provider/api/cargas/protocol.py +23 -0
  146. bb_integrations_lib/provider/api/dtn/__init__.py +0 -0
  147. bb_integrations_lib/provider/api/dtn/client.py +128 -0
  148. bb_integrations_lib/provider/api/dtn/protocol.py +9 -0
  149. bb_integrations_lib/provider/api/keyvu/__init__.py +0 -0
  150. bb_integrations_lib/provider/api/keyvu/client.py +30 -0
  151. bb_integrations_lib/provider/api/keyvu/model.py +149 -0
  152. bb_integrations_lib/provider/api/macropoint/__init__.py +0 -0
  153. bb_integrations_lib/provider/api/macropoint/client.py +28 -0
  154. bb_integrations_lib/provider/api/macropoint/model.py +40 -0
  155. bb_integrations_lib/provider/api/pc_miler/__init__.py +0 -0
  156. bb_integrations_lib/provider/api/pc_miler/client.py +130 -0
  157. bb_integrations_lib/provider/api/pc_miler/model.py +6 -0
  158. bb_integrations_lib/provider/api/pc_miler/web_services_apis.py +131 -0
  159. bb_integrations_lib/provider/api/platform_science/__init__.py +0 -0
  160. bb_integrations_lib/provider/api/platform_science/client.py +147 -0
  161. bb_integrations_lib/provider/api/platform_science/model.py +82 -0
  162. bb_integrations_lib/provider/api/quicktrip/__init__.py +0 -0
  163. bb_integrations_lib/provider/api/quicktrip/client.py +52 -0
  164. bb_integrations_lib/provider/api/telapoint/__init__.py +0 -0
  165. bb_integrations_lib/provider/api/telapoint/client.py +68 -0
  166. bb_integrations_lib/provider/api/telapoint/model.py +178 -0
  167. bb_integrations_lib/provider/api/warren_rogers/__init__.py +0 -0
  168. bb_integrations_lib/provider/api/warren_rogers/client.py +207 -0
  169. bb_integrations_lib/provider/aws/__init__.py +0 -0
  170. bb_integrations_lib/provider/aws/s3/__init__.py +0 -0
  171. bb_integrations_lib/provider/aws/s3/client.py +126 -0
  172. bb_integrations_lib/provider/ftp/__init__.py +0 -0
  173. bb_integrations_lib/provider/ftp/client.py +140 -0
  174. bb_integrations_lib/provider/ftp/interface.py +273 -0
  175. bb_integrations_lib/provider/ftp/model.py +76 -0
  176. bb_integrations_lib/provider/imap/__init__.py +0 -0
  177. bb_integrations_lib/provider/imap/client.py +228 -0
  178. bb_integrations_lib/provider/imap/model.py +3 -0
  179. bb_integrations_lib/provider/sqlserver/__init__.py +0 -0
  180. bb_integrations_lib/provider/sqlserver/client.py +106 -0
  181. bb_integrations_lib/secrets/__init__.py +4 -0
  182. bb_integrations_lib/secrets/adapters.py +98 -0
  183. bb_integrations_lib/secrets/credential_models.py +222 -0
  184. bb_integrations_lib/secrets/factory.py +85 -0
  185. bb_integrations_lib/secrets/providers.py +160 -0
  186. bb_integrations_lib/shared/__init__.py +0 -0
  187. bb_integrations_lib/shared/exceptions.py +25 -0
  188. bb_integrations_lib/shared/model.py +1039 -0
  189. bb_integrations_lib/shared/shared_enums.py +510 -0
  190. bb_integrations_lib/storage/README.md +236 -0
  191. bb_integrations_lib/storage/__init__.py +0 -0
  192. bb_integrations_lib/storage/aws/__init__.py +0 -0
  193. bb_integrations_lib/storage/aws/s3.py +8 -0
  194. bb_integrations_lib/storage/defaults.py +72 -0
  195. bb_integrations_lib/storage/gcs/__init__.py +0 -0
  196. bb_integrations_lib/storage/gcs/client.py +8 -0
  197. bb_integrations_lib/storage/gcsmanager/__init__.py +0 -0
  198. bb_integrations_lib/storage/gcsmanager/client.py +8 -0
  199. bb_integrations_lib/storage/setup.py +29 -0
  200. bb_integrations_lib/util/__init__.py +0 -0
  201. bb_integrations_lib/util/cache/__init__.py +0 -0
  202. bb_integrations_lib/util/cache/custom_ttl_cache.py +75 -0
  203. bb_integrations_lib/util/cache/protocol.py +9 -0
  204. bb_integrations_lib/util/config/__init__.py +0 -0
  205. bb_integrations_lib/util/config/manager.py +391 -0
  206. bb_integrations_lib/util/config/model.py +41 -0
  207. bb_integrations_lib/util/exception_logger/__init__.py +0 -0
  208. bb_integrations_lib/util/exception_logger/exception_logger.py +146 -0
  209. bb_integrations_lib/util/exception_logger/test.py +114 -0
  210. bb_integrations_lib/util/utils.py +364 -0
  211. bb_integrations_lib/workers/__init__.py +0 -0
  212. bb_integrations_lib/workers/groups.py +13 -0
  213. bb_integrations_lib/workers/rpc_worker.py +50 -0
  214. bb_integrations_lib/workers/topics.py +20 -0
  215. bb_integrations_library-3.0.11.dist-info/METADATA +59 -0
  216. bb_integrations_library-3.0.11.dist-info/RECORD +217 -0
  217. bb_integrations_library-3.0.11.dist-info/WHEEL +4 -0
@@ -0,0 +1,697 @@
1
+ import json
2
+ import sys
3
+ from datetime import datetime, UTC
4
+ from typing import Any, Protocol, Tuple, TypeVar, Iterable, runtime_checkable, Optional, \
5
+ AsyncIterable, AsyncGenerator, Type, Union, List, Dict, Generic, Self
6
+
7
+ from loguru import logger
8
+ from pydantic import BaseModel
9
+
10
+ from bb_integrations_lib.gravitate.rita_api import GravitateRitaAPI
11
+ from bb_integrations_lib.mappers.rita_mapper import RitaMapper, RitaAPIMappingProvider, AsyncMappingProvider
12
+ from bb_integrations_lib.models.pipeline_structs import StopBranch, StopPipeline, PipelineContext, \
13
+ NoPipelineData, NoPipelineSourceData
14
+ from bb_integrations_lib.models.rita.audit import CreateReportV2, ProcessReportV2Status, \
15
+ UploadProcessReportFile
16
+ from bb_integrations_lib.models.rita.config import MaxSync
17
+ from bb_integrations_lib.models.rita.issue import IssueCategory
18
+ from bb_integrations_lib.secrets import IntegrationSecretProvider, SecretProvider, RITACredential
19
+ from bb_integrations_lib.secrets.factory import APIFactory
20
+ from bb_integrations_lib.shared.exceptions import StepConfigValidationError, MapperLoadError
21
+ from bb_integrations_lib.shared.model import MappingMode
22
+ from bb_integrations_lib.util.config.manager import GlobalConfigManager
23
+ from bb_integrations_lib.util.utils import CustomJSONEncoder
24
+
25
+ Input = TypeVar("Input")
26
+ Output = TypeVar("Output")
27
+ StepConfig = TypeVar("StepConfig", bound=Optional[BaseModel])
28
+ T = TypeVar("T")
29
+
30
+
31
+ @runtime_checkable
32
+ class Step(Protocol[Input, Output]):
33
+ """
34
+ Protocol for pipeline steps that process data with optional configuration validation.
35
+
36
+ A Step represents a single unit of work in a job pipeline, such as fetching files,
37
+ transforming data, or uploading results. Each step receives input data, processes it,
38
+ and produces output for the next step in the pipeline.
39
+
40
+ Type Parameters:
41
+ Input: The type of data this step accepts as input
42
+ Output: The type of data this step produces as output
43
+ Config: Optional Pydantic BaseModel subclass for configuration validation, or None
44
+
45
+ Configuration vs Input:
46
+ - Configuration: Static settings provided at step creation (credentials, settings)
47
+ - Input: Dynamic data passed from the previous step during execution
48
+
49
+ Examples:
50
+ Basic step without config validation:
51
+ ```python
52
+ class SimpleStep(Step[str, int, None]):
53
+ def __init__(self, step_configuration=None):
54
+ super().__init__(step_configuration)
55
+
56
+ def describe(self) -> str:
57
+ return "Converts string to length"
58
+
59
+ async def execute(self, text: str) -> int:
60
+ return len(text)
61
+ ```
62
+
63
+ Step with Pydantic configuration validation:
64
+ ```python
65
+ class DatabaseConfig(BaseModel):
66
+ host: str
67
+ port: int
68
+ database: str
69
+
70
+ class DatabaseStep(Step[dict, list, DatabaseConfig]):
71
+ def __init__(self, step_configuration: dict):
72
+ super().__init__(step_configuration, DatabaseConfig)
73
+ self.validate_config() # Validates against DatabaseConfig BaseModel
74
+
75
+ async def execute(self, query_data: dict) -> list:
76
+ # Access validated config properties: self.config_class.host, etc.
77
+ return await self.fetch_data(query_data)
78
+ ```
79
+
80
+ Step with access to pipeline resources:
81
+ ```python
82
+ class APIStep(Step[dict, dict, None]):
83
+ async def execute(self, data: dict) -> dict:
84
+ # Access pipeline resources via context
85
+ api_client = self.pipeline_context.api_clients.some_api
86
+ return await api_client.process(data)
87
+ ```
88
+ """
89
+
90
+ def __init__(self, *args, **kwargs):
91
+ """
92
+ Initialize a pipeline step.
93
+
94
+ Attributes:
95
+ config: Configuration data - dict before validation, BaseModel instance after
96
+ config_class: Pydantic BaseModel subclass (if provided)
97
+ pipeline_context: Context object set by the pipeline during execution
98
+ config_manager: Global configuration manager for accessing secrets/APIs
99
+ """
100
+ self.pipeline_context: PipelineContext | None = kwargs.get("pipeline_context", None)
101
+ self.config_manager = GlobalConfigManager()
102
+ self.secret_provider: SecretProvider | None = None
103
+ self.api_factory: APIFactory | None = None
104
+
105
+ def describe(self) -> str:
106
+ """
107
+ Return a human-readable description of what this step does.
108
+
109
+ Used for logging and debugging. Should be concise but descriptive.
110
+
111
+ Returns:
112
+ Brief description of the step's purpose
113
+
114
+ Example:
115
+ return "Upload files to FTP server"
116
+ """
117
+ # example: return "Unsubclassed Step"
118
+ raise NotImplementedError()
119
+
120
+ async def execute(self, i: Input) -> Output:
121
+ """
122
+ Execute the main work of this pipeline step.
123
+
124
+ This is where the step performs its core functionality, processing
125
+ the input data and producing output for the next step.
126
+
127
+ Args:
128
+ i: Input data from the previous step or initial pipeline input
129
+
130
+ Returns:
131
+ Output data to be passed to the next step in the pipeline
132
+
133
+ Note:
134
+ - Can access self.config for configuration data
135
+ - Can access self.pipeline_context for shared pipeline state
136
+ - Should handle errors gracefully or let them bubble up
137
+ """
138
+ ...
139
+
140
+ def set_context(self, context: PipelineContext):
141
+ """
142
+ Set the pipeline context for this step.
143
+
144
+ Called automatically by the pipeline when the step is added.
145
+ Provides access to shared pipeline state and resources.
146
+
147
+ Args:
148
+ context: Pipeline context containing shared state, logs, and resources
149
+ """
150
+ self.pipeline_context = context
151
+
152
+ def set_secret_provider(self, sp: SecretProvider):
153
+ """
154
+ Set the secret provider. Kept separate from pipeline context because it is not a Pydantic model and should not
155
+ be saved accidentally if we were to log context.
156
+
157
+ :param sp: The SecretProvider instance to use for retrieving secrets.
158
+ """
159
+ self.secret_provider = sp
160
+ self.api_factory = APIFactory(sp)
161
+
162
+
163
+ @runtime_checkable
164
+ class GeneratorStep(Step[Input, Iterable[Output]], Protocol[Input, Output]):
165
+ """
166
+ Represents a subtask in a job pipeline that is a generator, that is, it can yield values for another
167
+ step to accumulate.
168
+
169
+ A GeneratorStep is a Step[Input, Iterable[Output]] for some Input and Output type.
170
+
171
+ A pipeline that sees a Generator step will call next on that step's generator property, then send the
172
+ output to next step as its input. The next step must either be a GeneratorStep or an AccumulatorStep.
173
+ """
174
+
175
+ async def generator(self, i: Input) -> AsyncIterable[Output]:
176
+ """The generator function for this step. Each step will be called with the Input object."""
177
+ ...
178
+
179
+
180
+ class JobStep(BaseModel):
181
+ step: Any
182
+ id: str
183
+ parent_id: str | None = None
184
+ alt_input: str | None = None # If set, the input of a non-parent ancestor step will be provided to the step instead.
185
+
186
+ class Config:
187
+ arbitrary_types_allowed = True
188
+
189
+
190
+ class PipelineTenantConfig(BaseModel):
191
+ tenant_name: str
192
+ config_id: str
193
+
194
+
195
+ @runtime_checkable
196
+ class JobPipeline(Protocol):
197
+ """
198
+ Protocol for job pipelines that orchestrate multiple steps in sequence.
199
+
200
+ A JobPipeline manages the execution of interconnected steps, handling data flow,
201
+ error management, logging, and reporting. Steps are organized as a tree structure
202
+ where each step processes input and passes output to its children.
203
+
204
+ Features:
205
+ - Sequential and parallel step execution
206
+ - Automatic error handling and retry logic
207
+ - Process reporting to RITA API
208
+ - Issue tracking and reporting
209
+ - Comprehensive logging with multiple sinks
210
+ - Pipeline context sharing between steps
211
+
212
+ Step Organization:
213
+ Steps are defined as a tree where each step has:
214
+ - id: Unique identifier
215
+ - parent_id: ID of parent step (None for root)
216
+ - alt_input: Optional alternative input from ancestor step
217
+
218
+ Examples:
219
+ Basic pipeline:
220
+ ```python
221
+ class MyPipeline(JobPipeline):
222
+ def __init__(self):
223
+ steps = [
224
+ {"step": FetchDataStep(), "id": "fetch", "parent_id": None},
225
+ {"step": ProcessStep(), "id": "process", "parent_id": "fetch"},
226
+ {"step": UploadStep(), "id": "upload", "parent_id": "process"}
227
+ ]
228
+ super().__init__(steps, initial_input="start_data")
229
+ ```
230
+
231
+ Pipeline with reporting:
232
+ ```python
233
+ report_config = PipelineProcessReportConfig(
234
+ config_id="my-config",
235
+ trigger="manual",
236
+ rita_tenant="my-tenant",
237
+ rita_client_id="client-id",
238
+ rita_client_secret="secret"
239
+ )
240
+
241
+ pipeline = MyPipeline(
242
+ job_steps=steps,
243
+ process_report_config=report_config,
244
+ catch_step_errors=True
245
+ )
246
+ await pipeline.execute()
247
+ ```
248
+ """
249
+
250
+ def __init__(
251
+ self,
252
+ job_steps: list[dict],
253
+ rita_client: GravitateRitaAPI,
254
+ pipeline_name: str,
255
+ pipeline_config_id: str,
256
+ secret_provider: SecretProvider,
257
+ initial_input: Any = None,
258
+ catch_step_errors: bool = False,
259
+ upload_process_report_on_stoppipeline: bool = True,
260
+ send_reports: bool = True
261
+ ):
262
+ """
263
+ Initialize a job pipeline with steps and configuration.
264
+
265
+ Args:
266
+ job_steps: List of step dictionaries, each containing:
267
+ - step: Step instance implementing Step protocol
268
+ - id: Unique string identifier for the step
269
+ - parent_id: ID of parent step (None for root step)
270
+ - alt_input: Optional ancestor step ID to use as input
271
+ initial_input: Data to pass to the root step (default: None)
272
+ catch_step_errors: If True, step errors don't stop the entire pipeline,
273
+ just the current branch (default: False)
274
+ upload_process_report_on_stoppipeline: Whether to upload reports when
275
+ pipeline is stopped early (default: True)
276
+
277
+ Raises:
278
+ RuntimeError: If job_steps is empty
279
+ ValueError: If step tree structure is invalid (cycles, multiple roots, etc.)
280
+
281
+ Attributes:
282
+ job_steps: List of JobStep objects created from input dictionaries
283
+ pending_steps: Queue of steps ready for execution
284
+ saved_outputs: Cache of step outputs for alt_input functionality
285
+ context: Shared pipeline context accessible by all steps
286
+ config_manager: Global configuration manager for accessing secrets/APIs
287
+ """
288
+ if len(job_steps) == 0:
289
+ raise RuntimeError("Pipelines must have at least 1 step.")
290
+
291
+ self.job_steps = [JobStep(**js) for js in job_steps]
292
+ # Scan the list of job_steps and look for a step that has no parent. If multiple steps have no parent, throw
293
+ # an error. Also throw an error if no step has a null parent.
294
+ check_tree_res = self.check_tree(self.job_steps)
295
+ if check_tree_res is not None:
296
+ raise ValueError(check_tree_res)
297
+ start = [s for s in self.job_steps if s.parent_id is None]
298
+ self.pending_steps: list[Tuple[JobStep, Any]] = [(start[0], initial_input)]
299
+ self.saved_outputs: dict[str, Any] = {}
300
+ self.saved_logs: list[Any] = []
301
+ self.catch_step_errors = catch_step_errors
302
+ self.context = PipelineContext()
303
+ self.rita_client = rita_client
304
+ self.secret_provider = secret_provider
305
+ self.pipeline_name = pipeline_name
306
+ self.pipeline_config_id = pipeline_config_id
307
+ self.upload_process_report_on_stoppipeline = upload_process_report_on_stoppipeline
308
+ self.config_manager = GlobalConfigManager()
309
+ self.send_reports = send_reports
310
+
311
+ # Configure logging - resets existing loguru handlers
312
+ logger.remove()
313
+ # Route anything less than ERROR to stdout,
314
+ logger.add(sink=sys.stdout, filter=lambda record: record["level"].no < 40)
315
+ # anything ERROR and above to stderr
316
+ logger.add(sink=sys.stderr, level="ERROR")
317
+ # Also collect all logs to use in process reporting
318
+ self.collect_handler_id = logger.add(self._collect_log)
319
+
320
+ for js in self.job_steps:
321
+ # Give each step a reference to the pipeline context
322
+ js.step.set_context(self.context)
323
+ # And outside of that context, a reference to the secret provider
324
+ js.step.set_secret_provider(self.secret_provider)
325
+
326
+ def _collect_log(self, log: Any):
327
+ self.saved_logs.append(log)
328
+ self.context.logs.append(log)
329
+
330
+ def check_tree(self, job_steps: list[JobStep]) -> Optional[str]:
331
+ # Starting from the first, I should reach each step in the list once and only once
332
+ first_step = [x for x in job_steps if x.parent_id is None]
333
+ if len(first_step) != 1:
334
+ return "The tree of job steps did not have a unique step with parent_id=None"
335
+ to_visit = first_step
336
+ visited = set()
337
+ while len(to_visit) != 0:
338
+ step = to_visit.pop(0)
339
+ if step.id in visited:
340
+ return f"The tree has a cycle. Step {step.id} was visited twice."
341
+ visited.add(step.id)
342
+ children = [x for x in job_steps if x.parent_id == step.id]
343
+ to_visit.extend(children)
344
+ return None
345
+
346
+ @logger.catch
347
+ async def execute(self):
348
+ """
349
+ Execute the pipeline by processing all steps in the correct order.
350
+
351
+ Processes steps from the pending_steps queue until empty. Handles both
352
+ regular Steps and GeneratorSteps. Manages error recovery, logging,
353
+ and report generation.
354
+
355
+ Execution Flow:
356
+ 1. Pop next step from pending queue
357
+ 2. Execute step (regular or generator)
358
+ 3. Add child steps to pending queue
359
+ 4. Repeat until queue empty or error
360
+ 5. Generate final process report
361
+
362
+ Error Handling:
363
+ - StopBranch: Stops current execution branch, continues with other branches
364
+ - StopPipeline: Stops entire pipeline execution
365
+ - Other exceptions: Stops pipeline unless catch_step_errors=True
366
+
367
+ Raises:
368
+ Exception: Any unhandled step execution errors (unless catch_step_errors=True)
369
+
370
+ Note:
371
+ Always calls finish_pipeline() for cleanup and reporting, regardless
372
+ of success or failure.
373
+ """
374
+ try:
375
+ start_dt = datetime.now(UTC)
376
+ if self.send_reports:
377
+ await self.record_pipeline_start(start_dt)
378
+
379
+ while len(self.pending_steps) > 0:
380
+ jobstep, input = self.pending_steps.pop(0)
381
+ step = jobstep.step
382
+ if isinstance(step, GeneratorStep):
383
+ new_pending_steps = await self.handle_generator_step(jobstep, input)
384
+ elif isinstance(step, Step):
385
+ new_pending_steps = await self.handle_step(jobstep, input)
386
+ else:
387
+ raise RuntimeError(f"Step {step.id} doesn't implement either of Step or GeneratorStep.")
388
+ self.pending_steps = new_pending_steps + self.pending_steps
389
+ await self.finish_pipeline()
390
+ except StopPipeline as e:
391
+ await self.finish_pipeline(e)
392
+ except Exception as e:
393
+ await self.finish_pipeline(e)
394
+
395
+ async def finish_pipeline(self, exc: Exception | None = None):
396
+ """
397
+ Finalize pipeline execution with cleanup, reporting, and issue tracking.
398
+
399
+ Called automatically at the end of pipeline execution, regardless of success
400
+ or failure. Handles process reporting to RITA, issue reporting, and final
401
+ logging.
402
+
403
+ Args:
404
+ exc: Exception that caused pipeline termination (None for successful completion)
405
+
406
+ Process Report Behavior:
407
+ - Success: Status = 'stop', includes all logs and files
408
+ - StopPipeline: Status = 'stop', may skip report if upload_process_report_on_stoppipeline=False
409
+ - Error: Status = 'error', includes exception details and stack trace
410
+
411
+ Issue Reporting:
412
+ If issue_reporting_config is provided and issues were collected during
413
+ execution, uploads all issues to RITA for tracking.
414
+
415
+ Generated Reports Include:
416
+ - Complete execution logs
417
+ - Files added to context.included_files
418
+ - Pipeline metadata (config_id, trigger, etc.)
419
+ - Issue summaries (if any)
420
+ """
421
+ halted_early = False
422
+ halted_with_error = False
423
+ if exc is not None and not isinstance(exc, StopPipeline):
424
+ logger.info("Pipeline exited with an error")
425
+ logger.exception(exc)
426
+ halted_with_error = True
427
+ elif exc is not None and isinstance(exc, StopPipeline):
428
+ logger.success("The pipeline was halted early")
429
+ halted_early = True
430
+ else:
431
+ logger.success("Pipeline completed")
432
+
433
+ # No matter how it ended, generate a process report if requested.
434
+ if halted_early and self.upload_process_report_on_stoppipeline == False:
435
+ logger.info("Did not upload process report because pipeline was halted early.")
436
+ else:
437
+ if self.send_reports:
438
+ logger.info("Creating process report on RITA...")
439
+ await self.rita_client.create_process_report(CreateReportV2(
440
+ trigger=self.pipeline_name,
441
+ status=ProcessReportV2Status.error if halted_with_error else ProcessReportV2Status.stop,
442
+ config_id=self.pipeline_config_id,
443
+ # Logs are one list item per line, newlines already included. Join into one string.
444
+ log=UploadProcessReportFile(file_base_name=f"log", content="".join(self.context.logs)),
445
+ included_files=[
446
+ UploadProcessReportFile(file_base_name=name, content=content)
447
+ for name, content in self.context.included_files.items()
448
+ ]
449
+ ))
450
+ logger.info("Uploaded process report")
451
+ else:
452
+ logger.info("Not sending process report (send_reports=False)")
453
+
454
+ # If there were any issues reported, upload those to RITA.
455
+ if self.send_reports:
456
+ if len(self.context.issues) > 0:
457
+ logger.info("Recording issues on RITA...")
458
+ # Prepend the pipeline name to issue keys
459
+ for issue in self.context.issues:
460
+ issue.key = f"{self.pipeline_name}__{issue.key}"
461
+ await self.rita_client.record_many_issues(self.context.issues)
462
+ logger.info("Recorded issues")
463
+ else:
464
+ logger.info("No issues to record")
465
+ else:
466
+ logger.info("Not recording issues (send_reports=False)")
467
+
468
+ async def handle_step(self, jobstep: JobStep, input: Input):
469
+ id = jobstep.id
470
+ description = jobstep.step.describe()
471
+ logger.info(f"Running step {id}: {description}")
472
+ if jobstep.alt_input:
473
+ self.check_if_ancestor(jobstep.id, jobstep.alt_input)
474
+ alt_input = self.saved_outputs[jobstep.alt_input]
475
+ self.context.previous_output = input
476
+ output_coroutine = jobstep.step.execute(alt_input)
477
+ else:
478
+ self.context.previous_output = None
479
+ output_coroutine = jobstep.step.execute(input)
480
+ try:
481
+ output = await output_coroutine
482
+ self.maybe_save_output(jobstep, output)
483
+ new_pending = [(s, output) for s in self.job_steps if s.parent_id == jobstep.id]
484
+ return new_pending
485
+ except StopBranch:
486
+ logger.info(f"Branch execution stopped at step {id} due to StopBranch exception")
487
+ return []
488
+ except StopPipeline as e:
489
+ raise e
490
+
491
+ except NoPipelineData as npd:
492
+ raise npd
493
+
494
+ except NoPipelineSourceData as npsd:
495
+ raise npsd
496
+
497
+ except Exception as e:
498
+ if self.catch_step_errors:
499
+ # If we get an error in the step, we don't want to cancel the whole pipeline, just this "branch" of
500
+ # execution. (On a pipeline without generator steps, there isn't any difference, but on a pipeline with
501
+ # generator steps this will just move to the next invocation of the generator step.) Due to the
502
+ # architecture of the pipeline we can "cancel" the current branch by simply returning no next jobsteps
503
+ # to execute.
504
+ logger.exception(e)
505
+ logger.warning("Exception encountered; canceling further execution on this branch.")
506
+ return []
507
+ else:
508
+ raise e
509
+
510
+ async def handle_generator_step(self, jobstep: JobStep, input: Input | AsyncGenerator):
511
+ id = jobstep.id
512
+ description = jobstep.step.describe()
513
+ logger.info(f"Generating next output for step {id}: {description}")
514
+
515
+ if isinstance(input, AsyncGenerator):
516
+ # This is a paused generator. The input parameter is the currently paused generator
517
+ generator = input
518
+ else:
519
+ # This is not a paused generator. The input is a real input, and we need to construct the generator.
520
+ assert isinstance(jobstep.step, GeneratorStep)
521
+ if jobstep.alt_input:
522
+ self.check_if_ancestor(jobstep.id, jobstep.alt_input)
523
+ alt_input = self.saved_outputs[jobstep.alt_input]
524
+ self.context.previous_output = input
525
+ generator = jobstep.step.generator(alt_input)
526
+ else:
527
+ self.context.previous_output = None
528
+ generator = jobstep.step.generator(input)
529
+
530
+ # Run the generator once and get the output, or catch a StopIteration
531
+ try:
532
+ next_output = await anext(generator)
533
+ self.maybe_save_output(jobstep, next_output)
534
+ new_pending_steps = [(s, next_output) for s in self.job_steps if s.parent_id == jobstep.id]
535
+
536
+ # Put the currently executing step at the back of the new_pending_steps list. It will execute once all of
537
+ # the child steps of this job are done. We store the generator instead of the input so we can pick up where
538
+ # execution left off.
539
+ new_pending_steps += [(jobstep, generator)]
540
+ return new_pending_steps
541
+ except StopAsyncIteration:
542
+ logger.info(f"Generator for step {id} completed.")
543
+ return []
544
+
545
+ def maybe_save_output(self, jobstep: JobStep, data: Any):
546
+ id = jobstep.id
547
+ alt_input_consumers = [s for s in self.job_steps if s.alt_input == id]
548
+ if len(alt_input_consumers) > 0:
549
+ self.saved_outputs[id] = data
550
+
551
+ def check_if_ancestor(self, id: str, ancestor_id: str):
552
+ if ancestor_id not in {s.id for s in self.job_steps}:
553
+ raise RuntimeError(f"{ancestor_id} is not a step in this pipeline.")
554
+ jobstep = [s for s in self.job_steps if s.id == id][0]
555
+ parent = [s for s in self.job_steps if s.id == jobstep.parent_id][0]
556
+ while parent is not None:
557
+ if parent.id == ancestor_id:
558
+ return
559
+ else:
560
+ parent = [s for s in self.job_steps if s.id == parent.parent_id][0]
561
+ raise RuntimeError("A step tried to use input from a non-ancestor step. This is unsupported.")
562
+
563
+ async def record_pipeline_start(self, dt: datetime):
564
+ max_sync = MaxSync(
565
+ max_sync_date=dt,
566
+ context={
567
+ "pipeline_name": self.__class__.__name__,
568
+ }
569
+ )
570
+ try:
571
+ latest = await self.rita_client.get_config_max_sync(config_id=self.pipeline_config_id)
572
+ latest_sync_json = latest.json()
573
+ self.context.max_sync = MaxSync.model_validate(latest_sync_json) if latest_sync_json is not None else None
574
+ await self.rita_client.update_config_max_sync(
575
+ config_id=self.pipeline_config_id,
576
+ max_sync=max_sync,
577
+ )
578
+ logger.info(f"Pipeline {self.__class__.__name__} started at {dt}")
579
+ except Exception as e:
580
+ logger.exception(e)
581
+ logger.warning(f"Failed to record pipeline start for {self.__class__.__name__}")
582
+
583
+
584
+ class FileParser(Protocol):
585
+ """Protocol for file parsing implementations with RITA integration."""
586
+
587
+ def get_records(self, rd: T) -> List[Dict]:
588
+ """Parse file data into records based on configuration."""
589
+ ...
590
+
591
+ def get_translated_records(self, rd: T) -> Tuple[List[Dict], List[Dict]]:
592
+ """Parse and translate file records, returning (records, errors)."""
593
+ ...
594
+
595
+
596
+ @runtime_checkable
597
+ class ParserBase(Protocol):
598
+ def __init__(self, source_system: str = None, **kwargs):
599
+ self.source_system = source_system
600
+
601
+ async def load_mapper(self) -> RitaMapper:
602
+ """Load Rita Mapper"""
603
+ ...
604
+
605
+ async def parse(self, data: T, mapping_type: MappingMode | None = None) -> T:
606
+ """Parse Data"""
607
+ ...
608
+
609
+ def get_issues(self) -> list[dict]:
610
+ """Retrieve issues stored over the course of the parser run."""
611
+ ...
612
+
613
+ def record_issue(self, **kwargs):
614
+ """Record an issue for later retrieval."""
615
+ ...
616
+
617
+
618
+ class Parser(Generic[T]):
619
+ """
620
+ Mixin class providing RITA mapper functionality for parser implementations.
621
+ This class provides common RITA integration functionality that can be inherited
622
+ by various parser classes. It handles mapper loading, credential management,
623
+ and issue recording.
624
+
625
+ Attributes:
626
+ source_system: Identifier for the source system
627
+ mapping_provider: Optional mapping provider (can be used in testing).
628
+ """
629
+
630
+ def __init__(self,
631
+ source_system: str | None = None,
632
+ mapping_provider: Optional[AsyncMappingProvider] = None
633
+ ):
634
+ """
635
+ Initialize RITA parser mixin.
636
+
637
+ Args:
638
+ source_system: Source system identifier for RITA mapping
639
+ mapping_provider: Provides mappings from RITA. Required if source_system is specified.
640
+ """
641
+ self.source_system = source_system
642
+ self.mapping_provider = mapping_provider
643
+ self._issue_parts = []
644
+ self.logs = {}
645
+
646
+ async def load_mapper(self) -> Optional[RitaMapper]:
647
+ """
648
+ Load and initialize RITA mapper for the configured source system.
649
+
650
+ Uses the tenant's RITA credentials to create a mapper instance and
651
+ loads the mapping configuration asynchronously.
652
+
653
+ Returns:
654
+ Initialized RitaMapper instance with loaded mappings
655
+
656
+ Raises:
657
+ MapperLoadError: If mapper initialization or loading fails
658
+ """
659
+ if not self.source_system:
660
+ logger.warning("No source system configured for RITA mapper, skipping.")
661
+ return None
662
+ try:
663
+ mapper = RitaMapper(
664
+ provider=self.mapping_provider,
665
+ source_system=self.source_system
666
+ )
667
+ await mapper.load_mappings_async()
668
+ return mapper
669
+ except Exception as e:
670
+ msg = f"Failed to load mapper for source system '{self.source_system}': {e}"
671
+ logger.error(msg)
672
+ raise MapperLoadError(msg) from e
673
+
674
+ async def parse(self, data: T, mapping_type: MappingMode | None = None) -> T:
675
+ """Custom Parer implementation. Must be overridden by subclasses."""
676
+ ...
677
+
678
+ def get_issues(self) -> list[dict]:
679
+ """Retrieve issues stored over the course of the parser run."""
680
+ return self._issue_parts
681
+
682
+ def get_logs(self) -> str:
683
+ return json.dumps(self.logs, cls=CustomJSONEncoder)
684
+
685
+ def record_issue(self, name: str,
686
+ category: IssueCategory,
687
+ problem_short: str,
688
+ problem_long: str,
689
+ key_suffix: Optional[str] = None):
690
+ """Record an issue encountered during parser for later retrieval by the caller."""
691
+ self._issue_parts.append({
692
+ "key_suffix": key_suffix,
693
+ "name": name,
694
+ "category": category,
695
+ "problem_short": problem_short,
696
+ "problem_long": problem_long,
697
+ })
File without changes