bizon 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bizon/alerting/__init__.py +0 -0
- bizon/alerting/alerts.py +23 -0
- bizon/alerting/models.py +28 -0
- bizon/alerting/slack/__init__.py +0 -0
- bizon/alerting/slack/config.py +5 -0
- bizon/alerting/slack/handler.py +39 -0
- bizon/cli/main.py +7 -3
- bizon/common/models.py +33 -7
- bizon/{destinations → connectors/destinations}/bigquery/config/bigquery.example.yml +3 -4
- bizon/connectors/destinations/bigquery/src/config.py +128 -0
- bizon/{destinations → connectors/destinations}/bigquery/src/destination.py +48 -25
- bizon/connectors/destinations/bigquery_streaming/src/config.py +57 -0
- bizon/connectors/destinations/bigquery_streaming/src/destination.py +377 -0
- bizon/connectors/destinations/bigquery_streaming_v2/src/config.py +57 -0
- bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +446 -0
- bizon/{destinations/bigquery_streaming → connectors/destinations/bigquery_streaming_v2}/src/proto_utils.py +30 -36
- bizon/{destinations → connectors/destinations}/file/src/config.py +9 -3
- bizon/connectors/destinations/file/src/destination.py +56 -0
- bizon/{destinations → connectors/destinations}/logger/src/config.py +2 -1
- bizon/{destinations → connectors/destinations}/logger/src/destination.py +18 -3
- bizon/connectors/sources/cycle/config/cycle.example.yml +15 -0
- bizon/connectors/sources/cycle/src/source.py +133 -0
- bizon/{sources/periscope/tests/periscope_pipeline_dashboard.py → connectors/sources/cycle/tests/cycle_customers.py} +1 -1
- bizon/{sources → connectors/sources}/dummy/config/dummy.example.yml +2 -2
- bizon/{sources → connectors/sources}/dummy/src/fake_api.py +6 -1
- bizon/{sources → connectors/sources}/dummy/src/source.py +18 -5
- bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline.py +2 -2
- bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_bigquery_backend.py +2 -2
- bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_kafka.py +2 -2
- bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_rabbitmq.py +2 -2
- bizon/connectors/sources/dummy/tests/dummy_pipeline_unnest.py +29 -0
- bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_write_data_bigquery.py +2 -2
- bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_write_data_bigquery_through_kafka.py +2 -2
- bizon/{sources → connectors/sources}/gsheets/config/default_auth.example.yml +4 -2
- bizon/{sources → connectors/sources}/gsheets/config/service_account.example.yml +4 -2
- bizon/{sources → connectors/sources}/hubspot/config/api_key.example.yml +4 -2
- bizon/{sources → connectors/sources}/hubspot/config/oauth.example.yml +4 -2
- bizon/{sources → connectors/sources}/hubspot/src/hubspot_objects.py +1 -1
- bizon/{sources → connectors/sources}/kafka/config/kafka.example.yml +3 -5
- bizon/connectors/sources/kafka/config/kafka_debezium.example.yml +110 -0
- bizon/connectors/sources/kafka/src/callback.py +18 -0
- bizon/connectors/sources/kafka/src/config.py +69 -0
- bizon/connectors/sources/kafka/src/decode.py +93 -0
- bizon/connectors/sources/kafka/src/source.py +381 -0
- bizon/connectors/sources/kafka/tests/kafka_pipeline.py +7 -0
- bizon/connectors/sources/periscope/config/periscope_charts.example.yml +20 -0
- bizon/connectors/sources/periscope/config/periscope_dashboards.example.yml +20 -0
- bizon/{sources → connectors/sources}/periscope/src/source.py +137 -13
- bizon/connectors/sources/periscope/tests/periscope_pipeline_dashboard.py +9 -0
- bizon/connectors/sources/pokeapi/config/pokeapi_pokemon_to_json.example.yml +19 -0
- bizon/connectors/sources/pokeapi/config/pokeapi_pokemon_to_logger.example.yml +10 -0
- bizon/connectors/sources/pokeapi/src/source.py +79 -0
- bizon/{destinations → destination}/buffer.py +5 -0
- bizon/destination/config.py +83 -0
- bizon/{destinations → destination}/destination.py +103 -15
- bizon/engine/backend/adapters/sqlalchemy/backend.py +3 -1
- bizon/engine/engine.py +20 -1
- bizon/engine/pipeline/consumer.py +73 -5
- bizon/engine/pipeline/models.py +8 -3
- bizon/engine/pipeline/producer.py +18 -9
- bizon/engine/queue/adapters/kafka/consumer.py +2 -2
- bizon/engine/queue/adapters/kafka/queue.py +3 -2
- bizon/engine/queue/adapters/python_queue/consumer.py +40 -23
- bizon/engine/queue/adapters/python_queue/queue.py +19 -9
- bizon/engine/queue/adapters/rabbitmq/consumer.py +3 -6
- bizon/engine/queue/adapters/rabbitmq/queue.py +3 -2
- bizon/engine/queue/config.py +16 -0
- bizon/engine/queue/queue.py +17 -16
- bizon/engine/runner/adapters/process.py +15 -2
- bizon/engine/runner/adapters/streaming.py +121 -0
- bizon/engine/runner/adapters/thread.py +32 -9
- bizon/engine/runner/config.py +28 -0
- bizon/engine/runner/runner.py +113 -24
- bizon/monitoring/__init__.py +0 -0
- bizon/monitoring/config.py +39 -0
- bizon/monitoring/datadog/__init__.py +0 -0
- bizon/monitoring/datadog/monitor.py +153 -0
- bizon/monitoring/monitor.py +71 -0
- bizon/monitoring/noop/__init__.py +0 -0
- bizon/monitoring/noop/monitor.py +30 -0
- bizon/source/callback.py +24 -0
- bizon/source/config.py +3 -3
- bizon/source/cursor.py +1 -1
- bizon/source/discover.py +4 -3
- bizon/source/models.py +4 -2
- bizon/source/source.py +10 -2
- bizon/transform/config.py +8 -0
- bizon/transform/transform.py +48 -0
- {bizon-0.1.0.dist-info → bizon-0.1.2.dist-info}/METADATA +23 -6
- bizon-0.1.2.dist-info/RECORD +123 -0
- {bizon-0.1.0.dist-info → bizon-0.1.2.dist-info}/WHEEL +1 -1
- bizon/destinations/bigquery/src/config.py +0 -51
- bizon/destinations/bigquery_streaming/src/config.py +0 -43
- bizon/destinations/bigquery_streaming/src/destination.py +0 -154
- bizon/destinations/config.py +0 -47
- bizon/destinations/file/src/destination.py +0 -27
- bizon/sources/kafka/src/source.py +0 -357
- bizon/sources/kafka/tests/kafka_pipeline.py +0 -9
- bizon/sources/periscope/config/periscope_charts.example.yml +0 -26
- bizon/sources/periscope/config/periscope_dashboards.example.yml +0 -26
- bizon-0.1.0.dist-info/RECORD +0 -93
- /bizon/{sources → connectors/sources}/gsheets/src/source.py +0 -0
- /bizon/{sources → connectors/sources}/gsheets/tests/gsheets_pipeline.py +0 -0
- /bizon/{sources → connectors/sources}/hubspot/src/hubspot_base.py +0 -0
- /bizon/{sources → connectors/sources}/hubspot/src/models/hs_object.py +0 -0
- /bizon/{sources → connectors/sources}/hubspot/tests/hubspot_pipeline.py +0 -0
- /bizon/{sources → connectors/sources}/periscope/tests/periscope_pipeline_charts.py +0 -0
- /bizon/{destinations → destination}/models.py +0 -0
- {bizon-0.1.0.dist-info → bizon-0.1.2.dist-info}/LICENSE +0 -0
- {bizon-0.1.0.dist-info → bizon-0.1.2.dist-info}/entry_points.txt +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import concurrent.futures
|
|
2
2
|
import time
|
|
3
|
-
import
|
|
3
|
+
from threading import Event
|
|
4
4
|
|
|
5
5
|
from loguru import logger
|
|
6
6
|
|
|
7
7
|
from bizon.common.models import BizonConfig
|
|
8
|
+
from bizon.engine.pipeline.models import PipelineReturnStatus
|
|
9
|
+
from bizon.engine.runner.config import RunnerStatus
|
|
8
10
|
from bizon.engine.runner.runner import AbstractRunner
|
|
9
11
|
|
|
10
12
|
|
|
@@ -25,7 +27,7 @@ class ThreadRunner(AbstractRunner):
|
|
|
25
27
|
|
|
26
28
|
return extra_kwargs
|
|
27
29
|
|
|
28
|
-
def run(self) ->
|
|
30
|
+
def run(self) -> RunnerStatus:
|
|
29
31
|
"""Run the pipeline with dedicated threads for source and destination"""
|
|
30
32
|
|
|
31
33
|
extra_kwargs = self.get_kwargs()
|
|
@@ -35,6 +37,10 @@ class ThreadRunner(AbstractRunner):
|
|
|
35
37
|
result_producer = None
|
|
36
38
|
result_consumer = None
|
|
37
39
|
|
|
40
|
+
# Start the producer and consumer events
|
|
41
|
+
producer_stop_event = Event()
|
|
42
|
+
consumer_stop_event = Event()
|
|
43
|
+
|
|
38
44
|
extra_kwargs = self.get_kwargs()
|
|
39
45
|
|
|
40
46
|
with concurrent.futures.ThreadPoolExecutor(
|
|
@@ -46,6 +52,7 @@ class ThreadRunner(AbstractRunner):
|
|
|
46
52
|
self.bizon_config,
|
|
47
53
|
self.config,
|
|
48
54
|
job.id,
|
|
55
|
+
producer_stop_event,
|
|
49
56
|
**extra_kwargs,
|
|
50
57
|
)
|
|
51
58
|
logger.info("Producer thread has started ...")
|
|
@@ -55,7 +62,9 @@ class ThreadRunner(AbstractRunner):
|
|
|
55
62
|
future_consumer = executor.submit(
|
|
56
63
|
AbstractRunner.instanciate_and_run_consumer,
|
|
57
64
|
self.bizon_config,
|
|
65
|
+
self.config,
|
|
58
66
|
job.id,
|
|
67
|
+
consumer_stop_event,
|
|
59
68
|
**extra_kwargs,
|
|
60
69
|
)
|
|
61
70
|
logger.info("Consumer thread has started ...")
|
|
@@ -68,14 +77,28 @@ class ThreadRunner(AbstractRunner):
|
|
|
68
77
|
self._is_running = False
|
|
69
78
|
|
|
70
79
|
if not future_producer.running():
|
|
71
|
-
result_producer = future_producer.result()
|
|
80
|
+
result_producer: PipelineReturnStatus = future_producer.result()
|
|
72
81
|
logger.info(f"Producer thread stopped running with result: {result_producer}")
|
|
73
82
|
|
|
83
|
+
if result_producer.SUCCESS:
|
|
84
|
+
logger.info("Producer thread has finished successfully, will wait for consumer to finish ...")
|
|
85
|
+
else:
|
|
86
|
+
logger.error("Producer thread failed, stopping consumer ...")
|
|
87
|
+
consumer_stop_event.set()
|
|
88
|
+
|
|
74
89
|
if not future_consumer.running():
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
logger.
|
|
90
|
+
result_consumer = future_consumer.result()
|
|
91
|
+
logger.info(f"Consumer thread stopped running with result: {result_consumer}")
|
|
92
|
+
|
|
93
|
+
if result_consumer == PipelineReturnStatus.SUCCESS:
|
|
94
|
+
logger.info("Consumer thread has finished successfully")
|
|
95
|
+
else:
|
|
96
|
+
logger.error("Consumer thread failed, stopping producer ...")
|
|
97
|
+
producer_stop_event.set()
|
|
98
|
+
|
|
99
|
+
runner_status = RunnerStatus(producer=future_producer.result(), consumer=future_consumer.result())
|
|
100
|
+
|
|
101
|
+
if not runner_status.is_success:
|
|
102
|
+
logger.error(runner_status.to_string())
|
|
80
103
|
|
|
81
|
-
return
|
|
104
|
+
return runner_status
|
bizon/engine/runner/config.py
CHANGED
|
@@ -3,10 +3,13 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, Field
|
|
5
5
|
|
|
6
|
+
from bizon.engine.pipeline.models import PipelineReturnStatus
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
class RunnerTypes(str, Enum):
|
|
8
10
|
THREAD = "thread"
|
|
9
11
|
PROCESS = "process"
|
|
12
|
+
STREAM = "stream"
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
class LoggerLevel(str, Enum):
|
|
@@ -49,3 +52,28 @@ class RunnerConfig(BaseModel):
|
|
|
49
52
|
description="Logging level",
|
|
50
53
|
default=LoggerLevel.INFO,
|
|
51
54
|
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class RunnerStatus(BaseModel):
|
|
58
|
+
producer: Optional[PipelineReturnStatus] = None
|
|
59
|
+
consumer: Optional[PipelineReturnStatus] = None
|
|
60
|
+
stream: Optional[PipelineReturnStatus] = None
|
|
61
|
+
|
|
62
|
+
def __init__(self, **data):
|
|
63
|
+
super().__init__(**data)
|
|
64
|
+
if not ((self.producer is not None and self.consumer is not None) or self.stream is not None):
|
|
65
|
+
raise ValueError("Either both producer and consumer must be set, or stream must be set")
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def is_success(self):
|
|
69
|
+
if self.stream is not None:
|
|
70
|
+
return self.stream == PipelineReturnStatus.SUCCESS
|
|
71
|
+
return self.producer == PipelineReturnStatus.SUCCESS and self.consumer == PipelineReturnStatus.SUCCESS
|
|
72
|
+
|
|
73
|
+
def to_string(self):
|
|
74
|
+
if self.stream is not None:
|
|
75
|
+
return f"Pipeline finished with status {'Success' if self.is_success else 'Failure'} (Stream: {self.stream.value})"
|
|
76
|
+
return (
|
|
77
|
+
f"Pipeline finished with status {'Success' if self.is_success else 'Failure'} "
|
|
78
|
+
f"(Producer: {self.producer.value}, Consumer: {self.consumer.value})"
|
|
79
|
+
)
|
bizon/engine/runner/runner.py
CHANGED
|
@@ -1,18 +1,28 @@
|
|
|
1
|
+
import multiprocessing
|
|
2
|
+
import multiprocessing.synchronize
|
|
1
3
|
import os
|
|
2
4
|
import sys
|
|
5
|
+
import threading
|
|
3
6
|
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import Union
|
|
4
8
|
|
|
5
9
|
from loguru import logger
|
|
6
10
|
|
|
11
|
+
from bizon.alerting.models import AlertMethod
|
|
7
12
|
from bizon.cli.utils import parse_from_yaml
|
|
8
13
|
from bizon.common.models import BizonConfig, SyncMetadata
|
|
9
|
-
from bizon.
|
|
14
|
+
from bizon.destination.destination import AbstractDestination, DestinationFactory
|
|
10
15
|
from bizon.engine.backend.backend import AbstractBackend, BackendFactory
|
|
11
16
|
from bizon.engine.backend.models import JobStatus, StreamJob
|
|
12
17
|
from bizon.engine.pipeline.producer import Producer
|
|
13
18
|
from bizon.engine.queue.queue import AbstractQueue, QueueFactory
|
|
19
|
+
from bizon.engine.runner.config import RunnerStatus
|
|
20
|
+
from bizon.monitoring.monitor import AbstractMonitor, MonitorFactory
|
|
21
|
+
from bizon.source.callback import AbstractSourceCallback
|
|
22
|
+
from bizon.source.config import SourceSyncModes
|
|
14
23
|
from bizon.source.discover import get_source_instance_by_source_and_stream
|
|
15
24
|
from bizon.source.source import AbstractSource
|
|
25
|
+
from bizon.transform.transform import Transform
|
|
16
26
|
|
|
17
27
|
|
|
18
28
|
class AbstractRunner(ABC):
|
|
@@ -21,15 +31,31 @@ class AbstractRunner(ABC):
|
|
|
21
31
|
# Internal state
|
|
22
32
|
self._is_running: bool = False
|
|
23
33
|
|
|
24
|
-
|
|
25
|
-
self.
|
|
26
|
-
|
|
34
|
+
self.config = config
|
|
35
|
+
self.bizon_config = BizonConfig.model_validate(obj=self.config)
|
|
36
|
+
|
|
37
|
+
# Set pipeline information as environment variables
|
|
38
|
+
os.environ["BIZON_SYNC_NAME"] = self.bizon_config.name
|
|
39
|
+
os.environ["BIZON_SOURCE_NAME"] = self.bizon_config.source.name
|
|
40
|
+
os.environ["BIZON_SOURCE_STREAM"] = self.bizon_config.source.stream
|
|
41
|
+
os.environ["BIZON_DESTINATION_NAME"] = self.bizon_config.destination.name
|
|
27
42
|
|
|
28
43
|
# Set log level
|
|
29
44
|
logger.info(f"Setting log level to {self.bizon_config.engine.runner.log_level.name}")
|
|
30
45
|
logger.remove()
|
|
31
46
|
logger.add(sys.stderr, level=self.bizon_config.engine.runner.log_level)
|
|
32
47
|
|
|
48
|
+
if self.bizon_config.alerting:
|
|
49
|
+
logger.info(f"Setting up alerting method {self.bizon_config.alerting.type}")
|
|
50
|
+
if self.bizon_config.alerting.type == AlertMethod.SLACK:
|
|
51
|
+
from bizon.alerting.slack.handler import SlackHandler
|
|
52
|
+
|
|
53
|
+
alert = SlackHandler(
|
|
54
|
+
config=self.bizon_config.alerting.config,
|
|
55
|
+
log_levels=self.bizon_config.alerting.log_levels,
|
|
56
|
+
)
|
|
57
|
+
alert.add_handlers()
|
|
58
|
+
|
|
33
59
|
@property
|
|
34
60
|
def is_running(self) -> bool:
|
|
35
61
|
"""Return True if the pipeline is running"""
|
|
@@ -45,17 +71,23 @@ class AbstractRunner(ABC):
|
|
|
45
71
|
def get_source(bizon_config: BizonConfig, config: dict) -> AbstractSource:
|
|
46
72
|
"""Get an instance of the source based on the source config dict"""
|
|
47
73
|
|
|
48
|
-
logger.info(f"Creating client for {bizon_config.source.
|
|
74
|
+
logger.info(f"Creating client for {bizon_config.source.name} - {bizon_config.source.stream} ...")
|
|
49
75
|
|
|
50
76
|
# Get the client class, validate the config and return the client
|
|
51
77
|
return get_source_instance_by_source_and_stream(
|
|
52
|
-
source_name=bizon_config.source.
|
|
53
|
-
stream_name=bizon_config.source.
|
|
78
|
+
source_name=bizon_config.source.name,
|
|
79
|
+
stream_name=bizon_config.source.stream,
|
|
54
80
|
source_config=config["source"], # We pass the raw config to have flexibility for custom sources
|
|
55
81
|
)
|
|
56
82
|
|
|
57
83
|
@staticmethod
|
|
58
|
-
def get_destination(
|
|
84
|
+
def get_destination(
|
|
85
|
+
bizon_config: BizonConfig,
|
|
86
|
+
backend: AbstractBackend,
|
|
87
|
+
job_id: str,
|
|
88
|
+
source_callback: AbstractSourceCallback,
|
|
89
|
+
monitor: AbstractMonitor,
|
|
90
|
+
) -> AbstractDestination:
|
|
59
91
|
"""Get an instance of the destination based on the destination config dict"""
|
|
60
92
|
|
|
61
93
|
sync_metadata = SyncMetadata.from_bizon_config(job_id=job_id, config=bizon_config)
|
|
@@ -64,6 +96,8 @@ class AbstractRunner(ABC):
|
|
|
64
96
|
sync_metadata=sync_metadata,
|
|
65
97
|
config=bizon_config.destination,
|
|
66
98
|
backend=backend,
|
|
99
|
+
source_callback=source_callback,
|
|
100
|
+
monitor=monitor,
|
|
67
101
|
)
|
|
68
102
|
|
|
69
103
|
@staticmethod
|
|
@@ -89,6 +123,16 @@ class AbstractRunner(ABC):
|
|
|
89
123
|
**kwargs,
|
|
90
124
|
)
|
|
91
125
|
|
|
126
|
+
@staticmethod
|
|
127
|
+
def get_transform(bizon_config: BizonConfig) -> Transform:
|
|
128
|
+
"""Return the transform instance to apply to the source records"""
|
|
129
|
+
return Transform(transforms=bizon_config.transforms)
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def get_monitoring_client(sync_metadata: SyncMetadata, bizon_config: BizonConfig) -> AbstractMonitor:
|
|
133
|
+
"""Return the monitoring client instance"""
|
|
134
|
+
return MonitorFactory.get_monitor(sync_metadata, bizon_config.monitoring)
|
|
135
|
+
|
|
92
136
|
@staticmethod
|
|
93
137
|
def get_or_create_job(
|
|
94
138
|
bizon_config: BizonConfig,
|
|
@@ -101,15 +145,15 @@ class AbstractRunner(ABC):
|
|
|
101
145
|
# Retrieve the last job for this stream
|
|
102
146
|
job = backend.get_running_stream_job(
|
|
103
147
|
name=bizon_config.name,
|
|
104
|
-
source_name=bizon_config.source.
|
|
105
|
-
stream_name=bizon_config.source.
|
|
148
|
+
source_name=bizon_config.source.name,
|
|
149
|
+
stream_name=bizon_config.source.stream,
|
|
106
150
|
session=session,
|
|
107
151
|
)
|
|
108
152
|
|
|
109
153
|
if job:
|
|
110
154
|
# If force_create and a job is already running, we cancel it and create a new one
|
|
111
155
|
if force_create:
|
|
112
|
-
logger.info(
|
|
156
|
+
logger.info("Found an existing job, cancelling it...")
|
|
113
157
|
backend.update_stream_job_status(job_id=job.id, job_status=JobStatus.CANCELED)
|
|
114
158
|
logger.info(f"Job {job.id} canceled. Creating a new one...")
|
|
115
159
|
# Otherwise we return the existing job
|
|
@@ -119,13 +163,16 @@ class AbstractRunner(ABC):
|
|
|
119
163
|
|
|
120
164
|
# If no job is running, we create a new one:
|
|
121
165
|
# Get the total number of records
|
|
122
|
-
|
|
166
|
+
if bizon_config.source.sync_mode == SourceSyncModes.STREAM:
|
|
167
|
+
total_records = None # Not available for stream mode
|
|
168
|
+
else:
|
|
169
|
+
total_records = source.get_total_records_count()
|
|
123
170
|
|
|
124
171
|
# Create a new job
|
|
125
172
|
job = backend.create_stream_job(
|
|
126
173
|
name=bizon_config.name,
|
|
127
|
-
source_name=bizon_config.source.
|
|
128
|
-
stream_name=bizon_config.source.
|
|
174
|
+
source_name=bizon_config.source.name,
|
|
175
|
+
stream_name=bizon_config.source.stream,
|
|
129
176
|
sync_mode=bizon_config.source.sync_mode,
|
|
130
177
|
total_records_to_fetch=total_records,
|
|
131
178
|
session=session,
|
|
@@ -148,9 +195,7 @@ class AbstractRunner(ABC):
|
|
|
148
195
|
source = AbstractRunner.get_source(bizon_config=bizon_config, config=config)
|
|
149
196
|
|
|
150
197
|
check_connection, connection_error = source.check_connection()
|
|
151
|
-
logger.info(
|
|
152
|
-
f"Connection to source {bizon_config.source.source_name} - {bizon_config.source.stream_name} successful"
|
|
153
|
-
)
|
|
198
|
+
logger.info(f"Connection to source {bizon_config.source.name} - {bizon_config.source.stream} successful")
|
|
154
199
|
|
|
155
200
|
if not check_connection:
|
|
156
201
|
logger.error(f"Error while connecting to source: {connection_error}")
|
|
@@ -170,12 +215,24 @@ class AbstractRunner(ABC):
|
|
|
170
215
|
return job
|
|
171
216
|
|
|
172
217
|
@staticmethod
|
|
173
|
-
def instanciate_and_run_producer(
|
|
218
|
+
def instanciate_and_run_producer(
|
|
219
|
+
bizon_config: BizonConfig,
|
|
220
|
+
config: dict,
|
|
221
|
+
job_id: str,
|
|
222
|
+
stop_event: Union[multiprocessing.synchronize.Event, threading.Event],
|
|
223
|
+
**kwargs,
|
|
224
|
+
):
|
|
174
225
|
|
|
226
|
+
# Get the source instance
|
|
175
227
|
source = AbstractRunner.get_source(bizon_config=bizon_config, config=config)
|
|
228
|
+
|
|
229
|
+
# Get the queue instance
|
|
176
230
|
queue = AbstractRunner.get_queue(bizon_config=bizon_config, **kwargs)
|
|
231
|
+
|
|
232
|
+
# Get the backend instance
|
|
177
233
|
backend = AbstractRunner.get_backend(bizon_config=bizon_config, **kwargs)
|
|
178
234
|
|
|
235
|
+
# Create the producer instance
|
|
179
236
|
producer = AbstractRunner.get_producer(
|
|
180
237
|
bizon_config=bizon_config,
|
|
181
238
|
source=source,
|
|
@@ -183,22 +240,54 @@ class AbstractRunner(ABC):
|
|
|
183
240
|
backend=backend,
|
|
184
241
|
)
|
|
185
242
|
|
|
186
|
-
|
|
243
|
+
# Run the producer
|
|
244
|
+
status = producer.run(job_id, stop_event)
|
|
187
245
|
return status
|
|
188
246
|
|
|
189
247
|
@staticmethod
|
|
190
|
-
def instanciate_and_run_consumer(
|
|
248
|
+
def instanciate_and_run_consumer(
|
|
249
|
+
bizon_config: BizonConfig,
|
|
250
|
+
config: dict,
|
|
251
|
+
job_id: str,
|
|
252
|
+
stop_event: Union[multiprocessing.synchronize.Event, threading.Event],
|
|
253
|
+
**kwargs,
|
|
254
|
+
):
|
|
255
|
+
# Get the source callback instance
|
|
256
|
+
source_callback = AbstractRunner.get_source(
|
|
257
|
+
bizon_config=bizon_config, config=config
|
|
258
|
+
).get_source_callback_instance()
|
|
191
259
|
|
|
260
|
+
sync_metadata = SyncMetadata.from_bizon_config(job_id=job_id, config=bizon_config)
|
|
261
|
+
|
|
262
|
+
# Get the queue instance
|
|
192
263
|
queue = AbstractRunner.get_queue(bizon_config=bizon_config, **kwargs)
|
|
264
|
+
|
|
265
|
+
# Get the backend instance
|
|
193
266
|
backend = AbstractRunner.get_backend(bizon_config=bizon_config, **kwargs)
|
|
194
|
-
destination = AbstractRunner.get_destination(bizon_config=bizon_config, backend=backend, job_id=job_id)
|
|
195
267
|
|
|
196
|
-
|
|
268
|
+
# Get the monitor instance
|
|
269
|
+
monitor = AbstractRunner.get_monitoring_client(sync_metadata=sync_metadata, bizon_config=bizon_config)
|
|
270
|
+
|
|
271
|
+
# Get the destination instance
|
|
272
|
+
destination = AbstractRunner.get_destination(
|
|
273
|
+
bizon_config=bizon_config, backend=backend, job_id=job_id, source_callback=source_callback, monitor=monitor
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Get the transform instance
|
|
277
|
+
transform = AbstractRunner.get_transform(bizon_config=bizon_config)
|
|
278
|
+
|
|
279
|
+
# Create the consumer instance
|
|
280
|
+
consumer = queue.get_consumer(
|
|
281
|
+
destination=destination,
|
|
282
|
+
transform=transform,
|
|
283
|
+
monitor=monitor,
|
|
284
|
+
)
|
|
197
285
|
|
|
198
|
-
|
|
286
|
+
# Run the consumer
|
|
287
|
+
status = consumer.run(stop_event)
|
|
199
288
|
return status
|
|
200
289
|
|
|
201
290
|
@abstractmethod
|
|
202
|
-
def run(self) ->
|
|
291
|
+
def run(self) -> RunnerStatus:
|
|
203
292
|
"""Run the pipeline with dedicated adapter for source and destination"""
|
|
204
293
|
pass
|
|
File without changes
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MonitorType(str, Enum):
|
|
8
|
+
DATADOG = "datadog"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseMonitoringConfig(BaseModel):
|
|
12
|
+
enable_tracing: bool = Field(default=False, description="Enable tracing for the monitor")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DatadogConfig(BaseMonitoringConfig):
|
|
16
|
+
datadog_agent_host: Optional[str] = None
|
|
17
|
+
datadog_host_env_var: Optional[str] = None
|
|
18
|
+
datadog_agent_port: int = 8125
|
|
19
|
+
tags: Optional[Dict[str, str]] = Field(default={}, description="Key-value pairs to add to the monitor as tags")
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def host_is_configured(self) -> bool:
|
|
23
|
+
return bool(self.datadog_agent_host or self.datadog_host_env_var)
|
|
24
|
+
|
|
25
|
+
def __init__(self, **data):
|
|
26
|
+
super().__init__(**data)
|
|
27
|
+
if not self.host_is_configured:
|
|
28
|
+
raise ValueError("Either datadog_agent_host or datadog_host_env_var must be specified")
|
|
29
|
+
|
|
30
|
+
class Config:
|
|
31
|
+
extra = "forbid"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MonitoringConfig(BaseMonitoringConfig):
|
|
35
|
+
type: MonitorType
|
|
36
|
+
config: Optional[DatadogConfig] = None
|
|
37
|
+
|
|
38
|
+
class Config:
|
|
39
|
+
extra = "forbid"
|
|
File without changes
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from typing import Dict, List, Union
|
|
4
|
+
|
|
5
|
+
from datadog import initialize, statsd
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
from bizon.common.models import SyncMetadata
|
|
9
|
+
from bizon.engine.pipeline.models import PipelineReturnStatus
|
|
10
|
+
from bizon.monitoring.config import MonitoringConfig
|
|
11
|
+
from bizon.monitoring.monitor import AbstractMonitor
|
|
12
|
+
from bizon.source.models import SourceRecord
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DatadogMonitor(AbstractMonitor):
|
|
16
|
+
def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig):
|
|
17
|
+
super().__init__(sync_metadata, monitoring_config)
|
|
18
|
+
|
|
19
|
+
# In Kubernetes, set the host dynamically
|
|
20
|
+
try:
|
|
21
|
+
datadog_host_from_env_var = os.getenv(monitoring_config.config.datadog_host_env_var)
|
|
22
|
+
if datadog_host_from_env_var:
|
|
23
|
+
initialize(
|
|
24
|
+
statsd_host=datadog_host_from_env_var,
|
|
25
|
+
statsd_port=monitoring_config.config.datadog_agent_port,
|
|
26
|
+
)
|
|
27
|
+
else:
|
|
28
|
+
initialize(
|
|
29
|
+
statsd_host=monitoring_config.config.datadog_agent_host,
|
|
30
|
+
statsd_port=monitoring_config.config.datadog_agent_port,
|
|
31
|
+
)
|
|
32
|
+
except Exception as e:
|
|
33
|
+
logger.info(f"Failed to initialize Datadog agent: {e}")
|
|
34
|
+
|
|
35
|
+
self.pipeline_monitor_status = "bizon_pipeline.status"
|
|
36
|
+
self.tags = [
|
|
37
|
+
f"pipeline_name:{self.sync_metadata.name}",
|
|
38
|
+
f"pipeline_stream:{self.sync_metadata.stream_name}",
|
|
39
|
+
f"pipeline_source:{self.sync_metadata.source_name}",
|
|
40
|
+
f"pipeline_destination:{self.sync_metadata.destination_name}",
|
|
41
|
+
] + [f"{key}:{value}" for key, value in self.monitoring_config.config.tags.items()]
|
|
42
|
+
|
|
43
|
+
self.pipeline_active_pipelines = "bizon_pipeline.active_pipelines"
|
|
44
|
+
self.pipeline_records_synced = "bizon_pipeline.records_synced"
|
|
45
|
+
self.pipeline_large_records = "bizon_pipeline.large_records"
|
|
46
|
+
|
|
47
|
+
def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tags: Dict[str, str] = {}) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Track the status of the pipeline.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
status (str): The current status of the pipeline (e.g., 'running', 'failed', 'completed').
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
statsd.increment(
|
|
56
|
+
self.pipeline_monitor_status,
|
|
57
|
+
tags=self.tags
|
|
58
|
+
+ [f"pipeline_status:{pipeline_status}"]
|
|
59
|
+
+ [f"{key}:{value}" for key, value in extra_tags.items()],
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def track_records_synced(
|
|
63
|
+
self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: List[Dict[str, str]] = []
|
|
64
|
+
) -> Union[List[Dict[str, str]], None]:
|
|
65
|
+
"""
|
|
66
|
+
Track the number of records synced in the pipeline.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
num_records (int): Number of records synced in this batch
|
|
70
|
+
"""
|
|
71
|
+
statsd.increment(
|
|
72
|
+
self.pipeline_records_synced,
|
|
73
|
+
value=num_records,
|
|
74
|
+
tags=self.tags + [f"{key}:{value}" for key, value in extra_tags.items()],
|
|
75
|
+
)
|
|
76
|
+
if os.getenv("DD_DATA_STREAMS_ENABLED") == "true":
|
|
77
|
+
from ddtrace.data_streams import set_produce_checkpoint
|
|
78
|
+
|
|
79
|
+
destination_type = self.sync_metadata.destination_alias
|
|
80
|
+
|
|
81
|
+
for header in headers:
|
|
82
|
+
if "x-datadog-sampling-priority" in header:
|
|
83
|
+
del header["x-datadog-sampling-priority"]
|
|
84
|
+
if "dd-pathway-ctx-base64" in header:
|
|
85
|
+
del header["dd-pathway-ctx-base64"]
|
|
86
|
+
set_produce_checkpoint(destination_type, destination_id, header.setdefault)
|
|
87
|
+
return headers
|
|
88
|
+
|
|
89
|
+
def track_large_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None:
|
|
90
|
+
statsd.increment(
|
|
91
|
+
self.pipeline_large_records,
|
|
92
|
+
value=num_records,
|
|
93
|
+
tags=self.tags + [f"{key}:{value}" for key, value in extra_tags.items()],
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def track_source_iteration(self, records: List[SourceRecord]) -> Union[List[Dict[str, str]], None]:
|
|
97
|
+
"""
|
|
98
|
+
Track the number of records consumed from a Kafka topic.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
kafka_topic (str): The Kafka topic name
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
if os.getenv("DD_DATA_STREAMS_ENABLED") == "true":
|
|
105
|
+
from ddtrace.data_streams import set_consume_checkpoint
|
|
106
|
+
|
|
107
|
+
headers_list = []
|
|
108
|
+
for record in records:
|
|
109
|
+
headers = record.data.get("headers", {})
|
|
110
|
+
set_consume_checkpoint("kafka", record.data["topic"], headers.get)
|
|
111
|
+
headers_list.append(headers)
|
|
112
|
+
return headers_list
|
|
113
|
+
|
|
114
|
+
@contextmanager
|
|
115
|
+
def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None):
|
|
116
|
+
"""
|
|
117
|
+
Create a trace span for monitoring using Datadog APM.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
operation_name (str): The name of the operation being traced
|
|
121
|
+
resource (str): The resource being operated on (e.g., topic name, table name)
|
|
122
|
+
extra_tags (Dict[str, str]): Additional tags for the trace
|
|
123
|
+
|
|
124
|
+
Yields:
|
|
125
|
+
A span object that can be used to add additional metadata
|
|
126
|
+
"""
|
|
127
|
+
if not self.monitoring_config.config.enable_tracing:
|
|
128
|
+
yield None
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
from ddtrace import tracer
|
|
133
|
+
|
|
134
|
+
# Combine tags
|
|
135
|
+
all_tags = self.tags.copy()
|
|
136
|
+
if extra_tags:
|
|
137
|
+
all_tags.extend([f"{key}:{value}" for key, value in extra_tags.items()])
|
|
138
|
+
|
|
139
|
+
# Create the span
|
|
140
|
+
with tracer.trace(operation_name, resource=resource) as span:
|
|
141
|
+
# Add tags to the span
|
|
142
|
+
for tag in all_tags:
|
|
143
|
+
if ":" in tag:
|
|
144
|
+
key, value = tag.split(":", 1)
|
|
145
|
+
span.set_tag(key, value)
|
|
146
|
+
span.set_tag("_sampling_priority_v1", 1)
|
|
147
|
+
yield span
|
|
148
|
+
except ImportError:
|
|
149
|
+
logger.warning("ddtrace not available, skipping tracing")
|
|
150
|
+
yield None
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.warning(f"Failed to create trace: {e}")
|
|
153
|
+
yield None
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Dict, List, Union
|
|
3
|
+
|
|
4
|
+
from bizon.common.models import SyncMetadata
|
|
5
|
+
from bizon.engine.pipeline.models import PipelineReturnStatus
|
|
6
|
+
from bizon.monitoring.config import MonitoringConfig, MonitorType
|
|
7
|
+
from bizon.source.models import SourceRecord
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AbstractMonitor(ABC):
|
|
11
|
+
def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig):
|
|
12
|
+
self.sync_metadata = sync_metadata
|
|
13
|
+
self.monitoring_config = monitoring_config
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tags: Dict[str, str] = {}) -> None:
|
|
17
|
+
"""
|
|
18
|
+
Track the status of the pipeline.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
status (str): The current status of the pipeline (e.g., 'running', 'failed', 'completed').
|
|
22
|
+
"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def track_source_iteration(self, records: List[SourceRecord], headers: Dict[str, str] = {}) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Run a process that tracks the source iteration.
|
|
28
|
+
"""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
def track_records_synced(
|
|
32
|
+
self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: Dict[str, str] = {}
|
|
33
|
+
) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Track the number of records synced in the pipeline.
|
|
36
|
+
"""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None):
|
|
40
|
+
"""
|
|
41
|
+
Create a trace span for monitoring.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
operation_name (str): The name of the operation being traced
|
|
45
|
+
resource (str): The resource being operated on (e.g., topic name, table name)
|
|
46
|
+
extra_tags (Dict[str, str]): Additional tags for the trace
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
A context manager that can be used with 'with' statement
|
|
50
|
+
"""
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def track_large_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Track the number of large records synced in the destination system. This aims at helping to identify the source of the large records.
|
|
56
|
+
"""
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class MonitorFactory:
|
|
61
|
+
@staticmethod
|
|
62
|
+
def get_monitor(sync_metadata: SyncMetadata, monitoring_config: Union[MonitoringConfig, None]) -> AbstractMonitor:
|
|
63
|
+
if monitoring_config is None:
|
|
64
|
+
from bizon.monitoring.noop.monitor import NoOpMonitor
|
|
65
|
+
|
|
66
|
+
return NoOpMonitor(sync_metadata, monitoring_config)
|
|
67
|
+
|
|
68
|
+
if monitoring_config.type == MonitorType.DATADOG:
|
|
69
|
+
from bizon.monitoring.datadog.monitor import DatadogMonitor
|
|
70
|
+
|
|
71
|
+
return DatadogMonitor(sync_metadata, monitoring_config)
|
|
File without changes
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
4
|
+
from bizon.common.models import BizonConfig, SyncMetadata
|
|
5
|
+
from bizon.engine.pipeline.models import PipelineReturnStatus
|
|
6
|
+
from bizon.monitoring.config import MonitoringConfig
|
|
7
|
+
from bizon.monitoring.monitor import AbstractMonitor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NoOpMonitor(AbstractMonitor):
|
|
11
|
+
def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig):
|
|
12
|
+
super().__init__(sync_metadata, monitoring_config)
|
|
13
|
+
|
|
14
|
+
def track_pipeline_status(self, pipeline_status: PipelineReturnStatus) -> None:
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
@contextmanager
|
|
18
|
+
def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None):
|
|
19
|
+
"""
|
|
20
|
+
No-op trace implementation.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
operation_name (str): The name of the operation being traced
|
|
24
|
+
resource (str): The resource being operated on (e.g., topic name, table name)
|
|
25
|
+
extra_tags (Dict[str, str]): Additional tags for the trace
|
|
26
|
+
|
|
27
|
+
Yields:
|
|
28
|
+
None (no-op implementation)
|
|
29
|
+
"""
|
|
30
|
+
yield None
|