dkist-processing-core 5.2.1__tar.gz → 6.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/CHANGELOG.rst +9 -0
  2. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/PKG-INFO +35 -42
  3. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/README.rst +32 -38
  4. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/config.py +24 -25
  5. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/failure_callback.py +2 -1
  6. dkist_processing_core-6.0.0/dkist_processing_core/task.py +258 -0
  7. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/conftest.py +1 -1
  8. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/test_task.py +3 -13
  9. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core.egg-info/PKG-INFO +35 -42
  10. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core.egg-info/requires.txt +2 -3
  11. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/conf.py +7 -1
  12. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/pyproject.toml +2 -3
  13. dkist_processing_core-5.2.1/dkist_processing_core/task.py +0 -250
  14. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/.gitignore +0 -0
  15. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/.pre-commit-config.yaml +0 -0
  16. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/.readthedocs.yml +0 -0
  17. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/.snyk +0 -0
  18. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/bitbucket-pipelines.yml +0 -0
  19. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/changelog/.gitempty +0 -0
  20. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/__init__.py +0 -0
  21. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/build_utils.py +0 -0
  22. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/node.py +0 -0
  23. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/resource_queue.py +0 -0
  24. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/__init__.py +0 -0
  25. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/invalid_workflow_cyclic/__init__.py +0 -0
  26. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/invalid_workflow_cyclic/workflow.py +0 -0
  27. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/invalid_workflow_for_docker_multi_category/__init__.py +0 -0
  28. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/invalid_workflow_for_docker_multi_category/workflow.py +0 -0
  29. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/task_example.py +0 -0
  30. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/test_build_utils.py +0 -0
  31. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/test_export.py +0 -0
  32. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/test_failure_callback.py +0 -0
  33. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/test_node.py +0 -0
  34. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/test_workflow.py +0 -0
  35. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/valid_workflow_package/__init__.py +0 -0
  36. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/valid_workflow_package/workflow.py +0 -0
  37. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/zero_node_workflow_package/__init__.py +0 -0
  38. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/tests/zero_node_workflow_package/workflow.py +0 -0
  39. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core/workflow.py +0 -0
  40. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core.egg-info/SOURCES.txt +0 -0
  41. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core.egg-info/dependency_links.txt +0 -0
  42. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/dkist_processing_core.egg-info/top_level.txt +0 -0
  43. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/Makefile +0 -0
  44. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/auto-proc-concept-model.png +0 -0
  45. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/auto_proc_brick.png +0 -0
  46. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/automated-processing-deployed.png +0 -0
  47. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/changelog.rst +0 -0
  48. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/index.rst +0 -0
  49. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/landing_page.rst +0 -0
  50. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/make.bat +0 -0
  51. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/docs/requirements.txt +0 -0
  52. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/licenses/LICENSE.rst +0 -0
  53. {dkist_processing_core-5.2.1 → dkist_processing_core-6.0.0}/setup.cfg +0 -0
@@ -1,3 +1,12 @@
1
+ v6.0.0 (2025-09-26)
2
+ ===================
3
+
4
+ Misc
5
+ ----
6
+
7
+ - Swap out Elastic APM for OpenTelemetry tracing and metrics. (`#56 <https://bitbucket.org/dkistdc/dkist-processing-core/pull-requests/56>`__)
8
+
9
+
1
10
  v5.2.1 (2025-09-08)
2
11
  ===================
3
12
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dkist-processing-core
3
- Version: 5.2.1
3
+ Version: 6.0.0
4
4
  Summary: Abstraction layer used by the DKIST science data processing pipelines with Apache Airflow
5
5
  Author-email: NSO / AURA <dkistdc@nso.edu>
6
6
  License: BSD-3-Clause
@@ -14,12 +14,11 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Requires-Python: >=3.12
15
15
  Description-Content-Type: text/x-rst
16
16
  Requires-Dist: apache-airflow[celery,postgres]==2.11.0
17
- Requires-Dist: elastic-apm<7.0.0
18
17
  Requires-Dist: requests>=2.23
19
- Requires-Dist: talus<2.0,>=1.1.0
18
+ Requires-Dist: talus<2.0,>=1.3.4
20
19
  Requires-Dist: pendulum
21
20
  Requires-Dist: nbformat>=5.9.2
22
- Requires-Dist: dkist-service-configuration<3.0,>=2.0.2
21
+ Requires-Dist: dkist-service-configuration<5.0,>=4.1.7
23
22
  Requires-Dist: pydantic>2.0
24
23
  Provides-Extra: test
25
24
  Requires-Dist: pytest; extra == "test"
@@ -128,53 +127,47 @@ Environment Variables
128
127
  ---------------------
129
128
 
130
129
  .. list-table::
131
- :widths: 10 70 10 10
130
+ :widths: 10 90
132
131
  :header-rows: 1
133
132
 
134
133
  * - Variable
135
- - Description
136
- - Type
137
- - Default
138
- * - BUILD_VERSION
139
- - Build/Export pipelines only. This is the value that will be appended to all artifacts and represents their unique version
140
- - STR
141
- - dev
134
+ - Field Info
135
+ * - LOGURU_LEVEL
136
+ - annotation=str required=False default='INFO' alias_priority=2 validation_alias='LOGURU_LEVEL' description='Log level for the application'
142
137
  * - MESH_CONFIG
143
- - Provides the dkistdc cloud mesh configuration. Specifically the location of the message broker
144
- - JSON
145
- - ``{}``
138
+ - annotation=dict[str, MeshService] required=False default_factory=dict alias_priority=2 validation_alias='MESH_CONFIG' description='Service mesh configuration' examples=[{'upstream_service_name': {'mesh_address': 'localhost', 'mesh_port': 6742}}]
139
+ * - RETRY_CONFIG
140
+ - annotation=RetryConfig required=False default_factory=RetryConfig description='Retry configuration for the service'
141
+ * - OTEL_SERVICE_NAME
142
+ - annotation=str required=False default='unknown-service-name' alias_priority=2 validation_alias='OTEL_SERVICE_NAME' description='Service name for OpenTelemetry'
143
+ * - DKIST_SERVICE_VERSION
144
+ - annotation=str required=False default='unknown-service-version' alias_priority=2 validation_alias='DKIST_SERVICE_VERSION' description='Service version for OpenTelemetry'
145
+ * - NOMAD_ALLOC_ID
146
+ - annotation=str required=False default='unknown-allocation-id' alias_priority=2 validation_alias='NOMAD_ALLOC_ID' description='Nomad allocation ID for OpenTelemetry'
147
+ * - OTEL_EXPORTER_OTLP_TRACES_INSECURE
148
+ - annotation=bool required=False default=True description='Use insecure connection for OTLP traces'
149
+ * - OTEL_EXPORTER_OTLP_METRICS_INSECURE
150
+ - annotation=bool required=False default=True description='Use insecure connection for OTLP metrics'
151
+ * - OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
152
+ - annotation=Union[str, NoneType] required=False default=None description='OTLP traces endpoint. Overrides mesh configuration' examples=['localhost:4317']
153
+ * - OTEL_EXPORTER_OTLP_METRICS_ENDPOINT
154
+ - annotation=Union[str, NoneType] required=False default=None description='OTLP metrics endpoint. Overrides mesh configuration' examples=['localhost:4317']
155
+ * - OTEL_PYTHON_DISABLED_INSTRUMENTATIONS
156
+ - annotation=list[str] required=False default_factory=list description='List of instrumentations to disable. https://opentelemetry.io/docs/zero-code/python/configuration/' examples=[['pika', 'requests']]
157
+ * - OTEL_PYTHON_FASTAPI_EXCLUDED_URLS
158
+ - annotation=str required=False default='health' description='Comma separated list of URLs to exclude from OpenTelemetry instrumentation in FastAPI.' examples=['client/.*/info,healthcheck']
159
+ * - SYSTEM_METRIC_INSTRUMENTATION_CONFIG
160
+ - annotation=Union[dict[str, bool], NoneType] required=False default=None description='Configuration for system metric instrumentation. https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/system_metrics/system_metrics.html' examples=[{'system.memory.usage': ['used', 'free', 'cached'], 'system.cpu.time': ['idle', 'user', 'system', 'irq'], 'system.network.io': ['transmit', 'receive'], 'process.runtime.memory': ['rss', 'vms'], 'process.runtime.cpu.time': ['user', 'system'], 'process.runtime.context_switches': ['involuntary', 'voluntary']}]
146
161
  * - ISB_USERNAME
147
- - Message broker user name
148
- - STR
149
- - guest
162
+ - annotation=str required=False default='guest' description='Username for the interservice-bus.'
150
163
  * - ISB_PASSWORD
151
- - Message broker password
152
- - STR
153
- - guest
164
+ - annotation=str required=False default='guest' description='Password for the interservice-bus.'
154
165
  * - ISB_EXCHANGE
155
- - Message Broker Exchange name for publishing messages
156
- - STR
157
- - master.direct.x
166
+ - annotation=str required=False default='master.direct.x' description='Exchange for the interservice-bus.'
158
167
  * - ISB_QUEUE_TYPE
159
- - Message Broker queue type for transporting messages
160
- - STR
161
- - classic
162
- * - ELASTIC_APM_SERVICE_NAME
163
- - Service Name used by Elastic Application Performance Monitoring
164
- - STR
165
- -
166
- * - ELASTIC_APM_OTHER_OPTIONS
167
- - Dictionary of configuration for the Elastic Application Performance Monitoring client
168
- - STR
169
- - ``{}``
170
- * - ELASTIC_APM_ENABLED
171
- - Flag to disable/enable Elastic Application Performance Monitoring client calls which are chatty if not connected to an APM server.
172
- - BOOL
173
- - FALSE
168
+ - annotation=str required=False default='classic' description='Queue type for the interservice-bus.' examples=['quorum', 'classic']
174
169
  * - BUILD_VERSION
175
- - Version of the pipeline. When built this makes its way into the workflow or dag name.
176
- - STR
177
- - dev
170
+ - annotation=str required=False default='dev' description='Fallback build version for workflow tasks.'
178
171
 
179
172
  Development
180
173
  -----------
@@ -86,53 +86,47 @@ Environment Variables
86
86
  ---------------------
87
87
 
88
88
  .. list-table::
89
- :widths: 10 70 10 10
89
+ :widths: 10 90
90
90
  :header-rows: 1
91
91
 
92
92
  * - Variable
93
- - Description
94
- - Type
95
- - Default
96
- * - BUILD_VERSION
97
- - Build/Export pipelines only. This is the value that will be appended to all artifacts and represents their unique version
98
- - STR
99
- - dev
93
+ - Field Info
94
+ * - LOGURU_LEVEL
95
+ - annotation=str required=False default='INFO' alias_priority=2 validation_alias='LOGURU_LEVEL' description='Log level for the application'
100
96
  * - MESH_CONFIG
101
- - Provides the dkistdc cloud mesh configuration. Specifically the location of the message broker
102
- - JSON
103
- - ``{}``
97
+ - annotation=dict[str, MeshService] required=False default_factory=dict alias_priority=2 validation_alias='MESH_CONFIG' description='Service mesh configuration' examples=[{'upstream_service_name': {'mesh_address': 'localhost', 'mesh_port': 6742}}]
98
+ * - RETRY_CONFIG
99
+ - annotation=RetryConfig required=False default_factory=RetryConfig description='Retry configuration for the service'
100
+ * - OTEL_SERVICE_NAME
101
+ - annotation=str required=False default='unknown-service-name' alias_priority=2 validation_alias='OTEL_SERVICE_NAME' description='Service name for OpenTelemetry'
102
+ * - DKIST_SERVICE_VERSION
103
+ - annotation=str required=False default='unknown-service-version' alias_priority=2 validation_alias='DKIST_SERVICE_VERSION' description='Service version for OpenTelemetry'
104
+ * - NOMAD_ALLOC_ID
105
+ - annotation=str required=False default='unknown-allocation-id' alias_priority=2 validation_alias='NOMAD_ALLOC_ID' description='Nomad allocation ID for OpenTelemetry'
106
+ * - OTEL_EXPORTER_OTLP_TRACES_INSECURE
107
+ - annotation=bool required=False default=True description='Use insecure connection for OTLP traces'
108
+ * - OTEL_EXPORTER_OTLP_METRICS_INSECURE
109
+ - annotation=bool required=False default=True description='Use insecure connection for OTLP metrics'
110
+ * - OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
111
+ - annotation=Union[str, NoneType] required=False default=None description='OTLP traces endpoint. Overrides mesh configuration' examples=['localhost:4317']
112
+ * - OTEL_EXPORTER_OTLP_METRICS_ENDPOINT
113
+ - annotation=Union[str, NoneType] required=False default=None description='OTLP metrics endpoint. Overrides mesh configuration' examples=['localhost:4317']
114
+ * - OTEL_PYTHON_DISABLED_INSTRUMENTATIONS
115
+ - annotation=list[str] required=False default_factory=list description='List of instrumentations to disable. https://opentelemetry.io/docs/zero-code/python/configuration/' examples=[['pika', 'requests']]
116
+ * - OTEL_PYTHON_FASTAPI_EXCLUDED_URLS
117
+ - annotation=str required=False default='health' description='Comma separated list of URLs to exclude from OpenTelemetry instrumentation in FastAPI.' examples=['client/.*/info,healthcheck']
118
+ * - SYSTEM_METRIC_INSTRUMENTATION_CONFIG
119
+ - annotation=Union[dict[str, bool], NoneType] required=False default=None description='Configuration for system metric instrumentation. https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/system_metrics/system_metrics.html' examples=[{'system.memory.usage': ['used', 'free', 'cached'], 'system.cpu.time': ['idle', 'user', 'system', 'irq'], 'system.network.io': ['transmit', 'receive'], 'process.runtime.memory': ['rss', 'vms'], 'process.runtime.cpu.time': ['user', 'system'], 'process.runtime.context_switches': ['involuntary', 'voluntary']}]
104
120
  * - ISB_USERNAME
105
- - Message broker user name
106
- - STR
107
- - guest
121
+ - annotation=str required=False default='guest' description='Username for the interservice-bus.'
108
122
  * - ISB_PASSWORD
109
- - Message broker password
110
- - STR
111
- - guest
123
+ - annotation=str required=False default='guest' description='Password for the interservice-bus.'
112
124
  * - ISB_EXCHANGE
113
- - Message Broker Exchange name for publishing messages
114
- - STR
115
- - master.direct.x
125
+ - annotation=str required=False default='master.direct.x' description='Exchange for the interservice-bus.'
116
126
  * - ISB_QUEUE_TYPE
117
- - Message Broker queue type for transporting messages
118
- - STR
119
- - classic
120
- * - ELASTIC_APM_SERVICE_NAME
121
- - Service Name used by Elastic Application Performance Monitoring
122
- - STR
123
- -
124
- * - ELASTIC_APM_OTHER_OPTIONS
125
- - Dictionary of configuration for the Elastic Application Performance Monitoring client
126
- - STR
127
- - ``{}``
128
- * - ELASTIC_APM_ENABLED
129
- - Flag to disable/enable Elastic Application Performance Monitoring client calls which are chatty if not connected to an APM server.
130
- - BOOL
131
- - FALSE
127
+ - annotation=str required=False default='classic' description='Queue type for the interservice-bus.' examples=['quorum', 'classic']
132
128
  * - BUILD_VERSION
133
- - Version of the pipeline. When built this makes its way into the workflow or dag name.
134
- - STR
135
- - dev
129
+ - annotation=str required=False default='dev' description='Fallback build version for workflow tasks.'
136
130
 
137
131
  Development
138
132
  -----------
@@ -1,24 +1,30 @@
1
1
  """Environment controlled configurations for dkist_processing_core."""
2
2
 
3
- from dkist_service_configuration import MeshServiceConfigurationBase
3
+ from dkist_service_configuration import InstrumentedMeshServiceConfigurationBase
4
4
  from dkist_service_configuration.settings import MeshService
5
+ from opentelemetry.sdk.resources import Resource
5
6
  from pydantic import Field
6
7
  from talus import ConnectionRetryerFactory
7
8
  from talus import Exchange
8
9
  from talus.models.connection_parameters import ConnectionParameterFactory
9
10
 
10
11
 
11
- class DKISTProcessingCoreConfiguration(MeshServiceConfigurationBase):
12
+ class DKISTProcessingCoreConfiguration(InstrumentedMeshServiceConfigurationBase):
12
13
  """Environment configurations for dkist_processing_core."""
13
14
 
14
- isb_username: str = Field(default="guest")
15
- isb_password: str = Field(default="guest")
16
- isb_exchange: str = Field(default="master.direct.x")
17
- isb_queue_type: str = Field(default="classic")
18
- elastic_apm_service_name: str = Field(default="dkist-processing-core")
19
- elastic_apm_other_options: dict = Field(default_factory=dict)
20
- elastic_apm_enabled: bool = False
21
- build_version: str = Field(default="dev")
15
+ isb_username: str = Field(default="guest", description="Username for the interservice-bus.")
16
+ isb_password: str = Field(default="guest", description="Password for the interservice-bus.")
17
+ isb_exchange: str = Field(
18
+ default="master.direct.x", description="Exchange for the interservice-bus."
19
+ )
20
+ isb_queue_type: str = Field(
21
+ default="classic",
22
+ description="Queue type for the interservice-bus.",
23
+ examples=["quorum", "classic"],
24
+ )
25
+ build_version: str = Field(
26
+ default="dev", description="Fallback build version for workflow tasks."
27
+ )
22
28
 
23
29
  @property
24
30
  def isb_mesh_service(self) -> MeshService:
@@ -36,7 +42,7 @@ class DKISTProcessingCoreConfiguration(MeshServiceConfigurationBase):
36
42
  rabbitmq_port=self.isb_mesh_service.port,
37
43
  rabbitmq_user=self.isb_username,
38
44
  rabbitmq_pass=self.isb_password,
39
- connection_name="dkist-processing-core-producer",
45
+ connection_name=f"{self.service_name}-producer",
40
46
  )
41
47
 
42
48
  @property
@@ -64,20 +70,13 @@ class DKISTProcessingCoreConfiguration(MeshServiceConfigurationBase):
64
70
  return Exchange(name=self.isb_exchange)
65
71
 
66
72
  @property
67
- def elastic_apm_server_url(self) -> str:
68
- """Return the URL for the Elastic APM server."""
69
- apm_server = self.service_mesh_detail(service_name="system-monitoring-log-apm")
70
- return f"http://{apm_server.host}:{apm_server.port}/"
71
-
72
- @property
73
- def apm_config(self) -> dict:
74
- """Return the configuration for the Elastic APM."""
75
- return {
76
- "SERVICE_NAME": self.elastic_apm_service_name,
77
- "SERVER_URL": self.elastic_apm_server_url,
78
- "ENVIRONMENT": "Workflows",
79
- **self.elastic_apm_other_options,
80
- }
73
+ def otel_resource(self) -> Resource:
74
+ """Open Telemetry resource attributes."""
75
+ old = super().otel_resource
76
+ updates = Resource(attributes={"service.name.alias": "dkist-processing"})
77
+ new = old.merge(updates)
78
+ return new
81
79
 
82
80
 
83
81
  core_configurations = DKISTProcessingCoreConfiguration()
82
+ core_configurations.auto_instrument()
@@ -3,6 +3,7 @@
3
3
  import logging
4
4
  from contextlib import contextmanager
5
5
  from typing import Callable
6
+ from typing import Generator
6
7
  from typing import Type
7
8
 
8
9
  from talus import Binding
@@ -35,7 +36,7 @@ class RecipeRunFailureMessage(PublishMessageBase):
35
36
 
36
37
 
37
38
  @contextmanager
38
- def recipe_run_failure_message_producer_factory() -> DurableProducer:
39
+ def recipe_run_failure_message_producer_factory() -> Generator[DurableProducer, None, None]:
39
40
  """Create message producer for recipe run failure messages."""
40
41
  # Configure the queue the messages should be routed to
41
42
  recipe_run_failure_queue = Queue(
@@ -0,0 +1,258 @@
1
+ """
2
+ Base class that is used to wrap the various DAG task methods.
3
+
4
+ It provides support for user-defined setup and cleanup, task monitoring using OpenTelemetry,
5
+ standardized logging, and exception handling.
6
+ """
7
+
8
+ import logging
9
+ from abc import ABC
10
+ from abc import abstractmethod
11
+ from contextlib import contextmanager
12
+ from typing import Generator
13
+ from typing import Sequence
14
+
15
+ from opentelemetry.context.context import Context
16
+ from opentelemetry.metrics import Counter
17
+ from opentelemetry.metrics import Meter
18
+ from opentelemetry.trace import Link
19
+ from opentelemetry.trace import Span
20
+ from opentelemetry.trace import SpanKind
21
+ from opentelemetry.trace import StatusCode
22
+ from opentelemetry.trace import Tracer
23
+ from opentelemetry.util.types import Attributes
24
+
25
+ from dkist_processing_core.config import core_configurations
26
+
27
+ __all__ = ["TaskBase"]
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class TaskBase(ABC):
33
+ """
34
+ A Task is the interface between processing code and its execution. Processing code can follow this interface through subclassing remain agnostic to the execution environment.
35
+
36
+ Each DAG task must implement its own subclass of this abstract wrapper class.
37
+
38
+ Intended instantiation is as a context manager
39
+
40
+ >>> class RealTask(TaskBase):
41
+ >>> def run(self):
42
+ >>> pass
43
+ >>>
44
+ >>> with RealTask(1, "a", "b") as task:
45
+ >>> task()
46
+
47
+ Task names in airflow are the same as the class name
48
+ Additional methods can be added but will only be called if they are referenced via run,
49
+ pre_run, post_run, or __exit__
50
+
51
+ overriding methods other than run, pre_run, post_run, and in special cases __exit__ is
52
+ discouraged as they are used internally to support the abstraction.
53
+ e.g. __init__ is called by the core api without user involvement so adding parameters will not
54
+ result in them being passed in as there is no client interface to __init__.
55
+
56
+ To use the tracing infrastructure in subclass code one would do the following:
57
+
58
+ >>> def foo(self):
59
+ >>> with self.telemetry_span("do detailed work"):
60
+ >>> pass # do work
61
+
62
+ Parameters
63
+ ----------
64
+ recipe_run_id : int
65
+ id of the recipe run used to identify the workflow run this task is part of
66
+ workflow_name : str
67
+ name of the workflow to which this instance of the task belongs
68
+ workflow_version : str
69
+ version of the workflow to which this instance of the task belongs
70
+
71
+ """
72
+
73
+ retries = 0
74
+ retry_delay_seconds = 60
75
+ tracer: Tracer = core_configurations.tracer
76
+ meter: Meter = core_configurations.meter
77
+
78
+ def __init__(
79
+ self,
80
+ recipe_run_id: int,
81
+ workflow_name: str,
82
+ workflow_version: str,
83
+ ):
84
+ """
85
+ Instantiate a Task.
86
+
87
+ The details of instantiation may vary based upon the export target but this signature is what is expected by the instantiation transformation (Node) code.
88
+ """
89
+ self.recipe_run_id = int(recipe_run_id)
90
+ self.workflow_name = workflow_name
91
+ self.workflow_version = workflow_version
92
+ self.task_name = self.__class__.__name__
93
+
94
+ self.base_telemetry_attributes = {
95
+ "recipe.run.id": self.recipe_run_id,
96
+ "workflow.name": self.workflow_name,
97
+ "workflow.version": self.workflow_version,
98
+ "task.name": self.task_name,
99
+ }
100
+
101
+ # meter instruments
102
+ self.task_execution_counter: Counter = self.meter.create_counter(
103
+ name=self.format_metric_name("tasks.executed"),
104
+ unit="1",
105
+ description="The number of tasks executed in the processing stack.",
106
+ )
107
+
108
+ logger.info(f"Task {self.task_name} initialized")
109
+
110
+ @contextmanager
111
+ def telemetry_span(
112
+ self,
113
+ name: str,
114
+ context: Context | None = None,
115
+ kind: SpanKind = SpanKind.INTERNAL,
116
+ attributes: Attributes = None,
117
+ links: Sequence[Link] | None = None,
118
+ start_time: int | None = None,
119
+ record_exception: bool = True,
120
+ set_status_on_exception: bool = True,
121
+ end_on_exit: bool = True,
122
+ ) -> Generator[Span, None, None]: # noqa: D405,D407
123
+ """
124
+ Context manager for creating a new span and set it as the current span in this tracer's context.
125
+
126
+ Parameters
127
+ ----------
128
+ name
129
+ The name of the span to be created.
130
+
131
+ context
132
+ An optional Context containing the span's parent. Defaults to the global context.
133
+
134
+ kind
135
+ The span's kind (relationship to parent). Note that is meaningful even if there is no parent.
136
+
137
+ attributes
138
+ The span's attributes.
139
+
140
+ links
141
+ Links span to other spans
142
+
143
+ start_time
144
+ Sets the start time of a span
145
+
146
+ record_exception
147
+ Whether to record any exceptions raised within the context as error event on the span.
148
+
149
+ set_status_on_exception
150
+ Only relevant if the returned span is used in a with/context manager. Defines whether the span status will
151
+ be automatically set to ERROR when an uncaught exception is raised in the span with block. The span status
152
+ won't be set by this mechanism if it was previously set manually.
153
+
154
+ end_on_exit
155
+ Whether to end the span automatically when leaving the context manager.
156
+
157
+ Yields
158
+ ------
159
+ The newly-created span.
160
+ """
161
+ with self.tracer.start_as_current_span(
162
+ name=name,
163
+ context=context,
164
+ kind=kind,
165
+ attributes=attributes,
166
+ links=links,
167
+ start_time=start_time,
168
+ record_exception=record_exception,
169
+ set_status_on_exception=set_status_on_exception,
170
+ end_on_exit=end_on_exit,
171
+ ) as span:
172
+ span.set_attributes(self.base_telemetry_attributes)
173
+ yield span
174
+ span.set_status(StatusCode.OK)
175
+
176
+ def format_metric_name(self, name: str) -> str:
177
+ """
178
+ Format the metric name to include the meter name and a namespace of 'processing' for dkist-processing-* meters. Words are separated by a dot.
179
+
180
+ For example, if the meter name is "dkist.meter" and the metric name is "tasks.executed",
181
+ the formatted name will be "dkist.meter.processing.tasks.executed".
182
+ """
183
+ return f"{self.meter.name}.processing.{name}"
184
+
185
+ def pre_run(self) -> None:
186
+ """Intended to be overridden and will execute prior to run() with Open Telemetry trace span capturing."""
187
+
188
+ @abstractmethod
189
+ def run(self) -> None:
190
+ """Abstract method that must be overridden to execute the desired DAG task with Open Telemetry trace span capturing."""
191
+
192
+ def post_run(self) -> None:
193
+ """Intended to be overridden and will execute after run() with Open Telemetry trace span capturing."""
194
+
195
+ def rollback(self) -> None:
196
+ """Rollback any changes to persistent stores performed by the task."""
197
+
198
+ def __call__(self) -> None:
199
+ """
200
+ DAG task wrapper. Execution is instrumented with Open Telemetry tracing if configured.
201
+
202
+ The standard execution sequence is:
203
+
204
+ 1 run
205
+
206
+ 2 record provenance
207
+
208
+ Returns
209
+ -------
210
+ None
211
+
212
+ """
213
+ verbose_task_name = f"{self.workflow_name}.{self.task_name}"
214
+ logger.info(f"{verbose_task_name} started")
215
+
216
+ self.task_execution_counter.add(amount=1, attributes=self.base_telemetry_attributes)
217
+
218
+ with self.telemetry_span(f"{verbose_task_name}"): # Root Span
219
+ with self.telemetry_span("Pre Run"):
220
+ self.pre_run()
221
+ with self.telemetry_span("Run"):
222
+ self.run()
223
+ with self.telemetry_span("Post Run"):
224
+ self.post_run()
225
+ logger.info(f"{verbose_task_name} complete")
226
+
227
+ def __enter__(self):
228
+ """
229
+ Override to execute setup tasks before task execution.
230
+
231
+ Only override this method with tasks that need to happen
232
+ regardless of tasks having an exception, ensure that no additional exception
233
+ will be raised, and always call super().__enter__
234
+ """
235
+ return self
236
+
237
+ def __exit__(self, exc_type, exc_val, exc_tb):
238
+ """
239
+ Override to execute teardown tasks after task execution regardless of task execution success.
240
+
241
+ Only override this method with tasks that need to happen
242
+ regardless of tasks having an exception, ensure that no additional exception
243
+ will be raised, and always call super().__exit__
244
+ """
245
+
246
+ def __repr__(self):
247
+ """Return the representation of the task."""
248
+ return (
249
+ f"{self.__class__.__name__}("
250
+ f"recipe_run_id={self.recipe_run_id}, "
251
+ f"workflow_name={self.workflow_name}, "
252
+ f"workflow_version={self.workflow_version}, "
253
+ f")"
254
+ )
255
+
256
+ def __str__(self):
257
+ """Return a string representation of the task."""
258
+ return repr(self)
@@ -27,7 +27,7 @@ def export_path() -> str:
27
27
 
28
28
  @pytest.fixture(scope="session")
29
29
  def task_subclass():
30
- """Sub class of the abstract task base class implementing methods that are expected to be subclassed with inspect-able metadata."""
30
+ """Subclass of the abstract task base class implementing methods that are expected to be subclassed with inspect-able metadata."""
31
31
  return Task
32
32
 
33
33
 
@@ -2,22 +2,12 @@
2
2
 
3
3
  import pytest
4
4
 
5
- from dkist_processing_core.config import core_configurations
6
5
  from dkist_processing_core.task import TaskBase
7
6
 
8
7
 
9
- @pytest.fixture(
10
- params=[pytest.param(True, id="apm_enabled"), pytest.param(False, id="apm_disabled")]
11
- )
12
- def apm_enabled(request, mocker):
13
- mocked_config = core_configurations.model_copy()
14
- mocked_config.elastic_apm_enabled = request.param
15
- mocker.patch("dkist_processing_core.task.core_configurations", mocked_config)
16
-
17
-
18
- def test_task_execution(task_subclass, apm_enabled):
8
+ def test_task_execution(task_subclass):
19
9
  """
20
- Given: Task subclass and parametrized APM configurations.
10
+ Given: Task subclass.
21
11
  When: calling the instance.
22
12
  Then: the run method is executed.
23
13
  """
@@ -30,7 +20,7 @@ def test_task_execution(task_subclass, apm_enabled):
30
20
 
31
21
  def test_task_run_failure(error_task_subclass):
32
22
  """
33
- Given: Task subclass and parametrized APM configurations.
23
+ Given: Task subclass.
34
24
  When: calling the instance.
35
25
  Then: the run method is executed.
36
26
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dkist-processing-core
3
- Version: 5.2.1
3
+ Version: 6.0.0
4
4
  Summary: Abstraction layer used by the DKIST science data processing pipelines with Apache Airflow
5
5
  Author-email: NSO / AURA <dkistdc@nso.edu>
6
6
  License: BSD-3-Clause
@@ -14,12 +14,11 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Requires-Python: >=3.12
15
15
  Description-Content-Type: text/x-rst
16
16
  Requires-Dist: apache-airflow[celery,postgres]==2.11.0
17
- Requires-Dist: elastic-apm<7.0.0
18
17
  Requires-Dist: requests>=2.23
19
- Requires-Dist: talus<2.0,>=1.1.0
18
+ Requires-Dist: talus<2.0,>=1.3.4
20
19
  Requires-Dist: pendulum
21
20
  Requires-Dist: nbformat>=5.9.2
22
- Requires-Dist: dkist-service-configuration<3.0,>=2.0.2
21
+ Requires-Dist: dkist-service-configuration<5.0,>=4.1.7
23
22
  Requires-Dist: pydantic>2.0
24
23
  Provides-Extra: test
25
24
  Requires-Dist: pytest; extra == "test"
@@ -128,53 +127,47 @@ Environment Variables
128
127
  ---------------------
129
128
 
130
129
  .. list-table::
131
- :widths: 10 70 10 10
130
+ :widths: 10 90
132
131
  :header-rows: 1
133
132
 
134
133
  * - Variable
135
- - Description
136
- - Type
137
- - Default
138
- * - BUILD_VERSION
139
- - Build/Export pipelines only. This is the value that will be appended to all artifacts and represents their unique version
140
- - STR
141
- - dev
134
+ - Field Info
135
+ * - LOGURU_LEVEL
136
+ - annotation=str required=False default='INFO' alias_priority=2 validation_alias='LOGURU_LEVEL' description='Log level for the application'
142
137
  * - MESH_CONFIG
143
- - Provides the dkistdc cloud mesh configuration. Specifically the location of the message broker
144
- - JSON
145
- - ``{}``
138
+ - annotation=dict[str, MeshService] required=False default_factory=dict alias_priority=2 validation_alias='MESH_CONFIG' description='Service mesh configuration' examples=[{'upstream_service_name': {'mesh_address': 'localhost', 'mesh_port': 6742}}]
139
+ * - RETRY_CONFIG
140
+ - annotation=RetryConfig required=False default_factory=RetryConfig description='Retry configuration for the service'
141
+ * - OTEL_SERVICE_NAME
142
+ - annotation=str required=False default='unknown-service-name' alias_priority=2 validation_alias='OTEL_SERVICE_NAME' description='Service name for OpenTelemetry'
143
+ * - DKIST_SERVICE_VERSION
144
+ - annotation=str required=False default='unknown-service-version' alias_priority=2 validation_alias='DKIST_SERVICE_VERSION' description='Service version for OpenTelemetry'
145
+ * - NOMAD_ALLOC_ID
146
+ - annotation=str required=False default='unknown-allocation-id' alias_priority=2 validation_alias='NOMAD_ALLOC_ID' description='Nomad allocation ID for OpenTelemetry'
147
+ * - OTEL_EXPORTER_OTLP_TRACES_INSECURE
148
+ - annotation=bool required=False default=True description='Use insecure connection for OTLP traces'
149
+ * - OTEL_EXPORTER_OTLP_METRICS_INSECURE
150
+ - annotation=bool required=False default=True description='Use insecure connection for OTLP metrics'
151
+ * - OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
152
+ - annotation=Union[str, NoneType] required=False default=None description='OTLP traces endpoint. Overrides mesh configuration' examples=['localhost:4317']
153
+ * - OTEL_EXPORTER_OTLP_METRICS_ENDPOINT
154
+ - annotation=Union[str, NoneType] required=False default=None description='OTLP metrics endpoint. Overrides mesh configuration' examples=['localhost:4317']
155
+ * - OTEL_PYTHON_DISABLED_INSTRUMENTATIONS
156
+ - annotation=list[str] required=False default_factory=list description='List of instrumentations to disable. https://opentelemetry.io/docs/zero-code/python/configuration/' examples=[['pika', 'requests']]
157
+ * - OTEL_PYTHON_FASTAPI_EXCLUDED_URLS
158
+ - annotation=str required=False default='health' description='Comma separated list of URLs to exclude from OpenTelemetry instrumentation in FastAPI.' examples=['client/.*/info,healthcheck']
159
+ * - SYSTEM_METRIC_INSTRUMENTATION_CONFIG
160
+ - annotation=Union[dict[str, bool], NoneType] required=False default=None description='Configuration for system metric instrumentation. https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/system_metrics/system_metrics.html' examples=[{'system.memory.usage': ['used', 'free', 'cached'], 'system.cpu.time': ['idle', 'user', 'system', 'irq'], 'system.network.io': ['transmit', 'receive'], 'process.runtime.memory': ['rss', 'vms'], 'process.runtime.cpu.time': ['user', 'system'], 'process.runtime.context_switches': ['involuntary', 'voluntary']}]
146
161
  * - ISB_USERNAME
147
- - Message broker user name
148
- - STR
149
- - guest
162
+ - annotation=str required=False default='guest' description='Username for the interservice-bus.'
150
163
  * - ISB_PASSWORD
151
- - Message broker password
152
- - STR
153
- - guest
164
+ - annotation=str required=False default='guest' description='Password for the interservice-bus.'
154
165
  * - ISB_EXCHANGE
155
- - Message Broker Exchange name for publishing messages
156
- - STR
157
- - master.direct.x
166
+ - annotation=str required=False default='master.direct.x' description='Exchange for the interservice-bus.'
158
167
  * - ISB_QUEUE_TYPE
159
- - Message Broker queue type for transporting messages
160
- - STR
161
- - classic
162
- * - ELASTIC_APM_SERVICE_NAME
163
- - Service Name used by Elastic Application Performance Monitoring
164
- - STR
165
- -
166
- * - ELASTIC_APM_OTHER_OPTIONS
167
- - Dictionary of configuration for the Elastic Application Performance Monitoring client
168
- - STR
169
- - ``{}``
170
- * - ELASTIC_APM_ENABLED
171
- - Flag to disable/enable Elastic Application Performance Monitoring client calls which are chatty if not connected to an APM server.
172
- - BOOL
173
- - FALSE
168
+ - annotation=str required=False default='classic' description='Queue type for the interservice-bus.' examples=['quorum', 'classic']
174
169
  * - BUILD_VERSION
175
- - Version of the pipeline. When built this makes its way into the workflow or dag name.
176
- - STR
177
- - dev
170
+ - annotation=str required=False default='dev' description='Fallback build version for workflow tasks.'
178
171
 
179
172
  Development
180
173
  -----------
@@ -1,10 +1,9 @@
1
1
  apache-airflow[celery,postgres]==2.11.0
2
- elastic-apm<7.0.0
3
2
  requests>=2.23
4
- talus<2.0,>=1.1.0
3
+ talus<2.0,>=1.3.4
5
4
  pendulum
6
5
  nbformat>=5.9.2
7
- dkist-service-configuration<3.0,>=2.0.2
6
+ dkist-service-configuration<5.0,>=4.1.7
8
7
  pydantic>2.0
9
8
 
10
9
  [docs]
@@ -33,9 +33,15 @@ autoapi_keep_files = True
33
33
 
34
34
  # -- Options for intersphinx extension -----------------------------------------
35
35
  intersphinx_mapping = {
36
+ # Official Python docs
36
37
  "python": (
37
38
  "https://docs.python.org/3/",
38
- (None, "http://www.astropy.org/astropy-data/intersphinx/python3.inv"),
39
+ "https://docs.python.org/3/objects.inv",
40
+ ),
41
+ # OpenTelemetry (Python)
42
+ "opentelemetry": (
43
+ "https://opentelemetry-python.readthedocs.io/en/stable/",
44
+ "https://opentelemetry-python.readthedocs.io/en/stable/objects.inv",
39
45
  ),
40
46
  }
41
47
  # Remaining sphinx settings are in dkist-sphinx-theme conf.py
@@ -22,12 +22,11 @@ classifiers = [
22
22
  ]
23
23
  dependencies = [
24
24
  "apache-airflow[postgres, celery] == 2.11.0",
25
- "elastic-apm < 7.0.0",
26
25
  "requests >= 2.23",
27
- "talus >= 1.1.0, <2.0",
26
+ "talus >= 1.3.4, <2.0",
28
27
  "pendulum",
29
28
  "nbformat >= 5.9.2",
30
- "dkist-service-configuration >=2.0.2, <3.0",
29
+ "dkist-service-configuration >=4.1.7, <5.0",
31
30
  "pydantic > 2.0",
32
31
  ]
33
32
 
@@ -1,250 +0,0 @@
1
- """
2
- Base class that is used to wrap the various DAG task methods.
3
-
4
- It provides support for user-defined setup and cleanup, task monitoring using Elastic APM,
5
- standardized logging and exception handling.
6
- """
7
-
8
- import logging
9
- from abc import ABC
10
- from abc import abstractmethod
11
- from contextlib import contextmanager
12
-
13
- import elasticapm
14
-
15
- from dkist_processing_core.config import core_configurations
16
-
17
- __all__ = ["TaskBase"]
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- class ApmTransaction:
23
- """
24
- Elastic APM transaction manager for a DAG Task.
25
-
26
- Without configuration, it disables itself.
27
- """
28
-
29
- @property
30
- def apm_service_name(self) -> str:
31
- """Format the service name for Elastic APM."""
32
- name = f"{self._workflow_name}-{self._workflow_version}"
33
- name = name.replace("_", "-")
34
- name = name.replace(".", "-")
35
- return name
36
-
37
- @property
38
- def apm_config(self) -> dict:
39
- """Override the Elastic APM configuration with the workflow specific service name."""
40
- core_config = core_configurations.apm_config
41
- core_config["SERVICE_NAME"] = self.apm_service_name
42
- return core_config
43
-
44
- def __init__(self, transaction_name: str, workflow_name: str, workflow_version: str) -> None:
45
- self._workflow_name = workflow_name
46
- self._workflow_version = workflow_version
47
- self.transaction_name = transaction_name
48
-
49
- if core_configurations.elastic_apm_enabled:
50
- self.client = elasticapm.Client(self.apm_config)
51
- self.instrument()
52
- self.client.begin_transaction(transaction_type="Task")
53
- logger.info(f"APM Configured: {self=} {self.apm_config=}")
54
- else:
55
- logger.warning(f"APM Not Configured")
56
-
57
- @contextmanager
58
- def capture_span(self, name: str, *args, **kwargs):
59
- if core_configurations.elastic_apm_enabled:
60
- try:
61
- with elasticapm.capture_span(name, *args, **kwargs):
62
- yield
63
- finally:
64
- pass
65
- else:
66
- try:
67
- yield
68
- finally:
69
- pass
70
-
71
- def close(self, exc_type=None):
72
- if core_configurations.elastic_apm_enabled:
73
- result = "Success"
74
- if exc_type is not None:
75
- result = "Error" # pragma: no cover
76
- self.client.capture_exception(handled=False) # pragma: no cover
77
- self.client.end_transaction(name=self.transaction_name, result=result)
78
- self.client.close()
79
-
80
- @staticmethod
81
- def instrument():
82
- """Vendored implementation of elasticapm.instrumentation.control.instrument changed to omit certain frameworks."""
83
- omit_frameworks = {
84
- "elasticapm.instrumentation.packages.redis.RedisInstrumentation",
85
- "elasticapm.instrumentation.packages.redis.RedisPipelineInstrumentation",
86
- "elasticapm.instrumentation.packages.redis.RedisConnectionInstrumentation",
87
- "elasticapm.instrumentation.packages.asyncio.aioredis.RedisConnectionPoolInstrumentation",
88
- "elasticapm.instrumentation.packages.asyncio.aioredis.RedisPipelineInstrumentation",
89
- "elasticapm.instrumentation.packages.asyncio.aioredis.RedisConnectionInstrumentation",
90
- }
91
-
92
- from elasticapm.instrumentation.control import _lock
93
- from elasticapm.instrumentation.register import _cls_register
94
- from elasticapm.instrumentation.register import _instrumentation_singletons
95
- from elasticapm.instrumentation.register import import_string
96
-
97
- # from elasticapm.instrumentation.control.instrument
98
- with _lock:
99
- # update to vendored code
100
- filtered_cls_register = _cls_register.difference(omit_frameworks)
101
- # from elasticapm.instrumentation.register.get_instrumentation_objects
102
- for cls_str in filtered_cls_register:
103
- if cls_str not in _instrumentation_singletons:
104
- cls = import_string(cls_str)
105
- _instrumentation_singletons[cls_str] = cls()
106
- obj = _instrumentation_singletons[cls_str]
107
- # from elasticapm.instrumentation.control.instrument
108
- obj.instrument()
109
-
110
- def __repr__(self):
111
- return f"{self.__class__.__name__}(transaction_name={self.transaction_name}, workflow_name={self._workflow_name}, workflow_version={self._workflow_version})"
112
-
113
-
114
- class TaskBase(ABC):
115
- """
116
- A Task is the interface between processing code and its execution. Processing code can follow this interface through subclassing remain agnostic to the execution environment.
117
-
118
- Each DAG task must implement its own subclass of this abstract wrapper class.
119
-
120
- Intended instantiation is as a context manager
121
-
122
- >>> class RealTask(TaskBase):
123
- >>> def run(self):
124
- >>> pass
125
- >>>
126
- >>> with RealTask(1, "a", "b") as task:
127
- >>> task()
128
-
129
- Task names in airflow are the same as the class name
130
- Additional methods can be added but will only be called if they are referenced via run,
131
- pre_run, post_run, or __exit__
132
-
133
- overriding methods other than run, pre_run, post_run, and in special cases __exit__ is
134
- discouraged as they are used internally to support the abstraction.
135
- e.g. __init__ is called by the core api without user involvement so adding parameters will not
136
- result in them being passed in as there is no client interface to __init__.
137
-
138
- To use the apm infrastructure in subclass code one would do the following:
139
-
140
- >>> def foo(self):
141
- >>> with self.apm_step("do detailed work"):
142
- >>> pass # do work
143
-
144
- Parameters
145
- ----------
146
- recipe_run_id : int
147
- id of the recipe run used to identify the workflow run this task is part of
148
- workflow_name : str
149
- name of the workflow to which this instance of the task belongs
150
- workflow_version : str
151
- version of the workflow to which this instance of the task belongs
152
-
153
- """
154
-
155
- retries = 0
156
- retry_delay_seconds = 60
157
-
158
- def __init__(
159
- self,
160
- recipe_run_id: int,
161
- workflow_name: str,
162
- workflow_version: str,
163
- ):
164
- """
165
- Instantiate a Task.
166
-
167
- The details of instantiation may vary based upon the export target but this signature is what is expected by the intantiation transformation (Node) code.
168
- """
169
- self.recipe_run_id = int(recipe_run_id)
170
- self.workflow_name = workflow_name
171
- self.workflow_version = workflow_version
172
- self.task_name = self.__class__.__name__
173
- logger.info(f"Task {self.task_name} initialized")
174
- self.apm = ApmTransaction(
175
- transaction_name=self.task_name,
176
- workflow_name=self.workflow_name,
177
- workflow_version=self.workflow_version,
178
- )
179
- self.apm_step = self.apm.capture_span # abbreviated syntax for capture span context mgr
180
-
181
- def pre_run(self) -> None:
182
- """Intended to be overridden and will execute prior to run() with Elastic APM span capturing."""
183
-
184
- @abstractmethod
185
- def run(self) -> None:
186
- """Abstract method that must be overridden to execute the desired DAG task."""
187
-
188
- def post_run(self) -> None:
189
- """Intended to be overridden and will execute after run() with Elastic APM span capturing."""
190
-
191
- def rollback(self) -> None:
192
- """Rollback any changes to persistent stores performed by the task."""
193
-
194
- def __call__(self) -> None:
195
- """
196
- DAG task wrapper. Execution is instrumented with Application Performance Monitoring if configured.
197
-
198
- The standard execution sequence is:
199
-
200
- 1 run
201
-
202
- 2 record provenance
203
-
204
- Returns
205
- -------
206
- None
207
-
208
- """
209
- logger.info(f"Task {self.task_name} started")
210
- with self.apm_step("Pre Run", span_type="code.core", labels={"type": "core"}):
211
- self.pre_run()
212
- with self.apm_step("Run", span_type="code.core", labels={"type": "core"}):
213
- self.run()
214
- with self.apm_step("Post Run", span_type="code.core", labels={"type": "core"}):
215
- self.post_run()
216
- logger.info(f"Task {self.task_name} complete")
217
-
218
- def __enter__(self):
219
- """
220
- Override to execute setup tasks before task execution.
221
-
222
- Only override this method with tasks that need to happen
223
- regardless of tasks having an exception, ensure that no additional exception
224
- will be raised, and always call super().__enter__
225
- """
226
- return self
227
-
228
- def __exit__(self, exc_type, exc_val, exc_tb):
229
- """
230
- Override to execute teardown tasks after task execution regardless of task execution success.
231
-
232
- Only override this method with tasks that need to happen
233
- regardless of tasks having an exception, ensure that no additional exception
234
- will be raised, and always call super().__exit__
235
- """
236
- self.apm.close(exc_type=exc_type)
237
-
238
- def __repr__(self):
239
- """Return the representation of the task."""
240
- return (
241
- f"{self.__class__.__name__}("
242
- f"recipe_run_id={self.recipe_run_id}, "
243
- f"workflow_name={self.workflow_name}, "
244
- f"workflow_version={self.workflow_version}, "
245
- f")"
246
- )
247
-
248
- def __str__(self):
249
- """Return a string representation of the task."""
250
- return repr(self)