PyPI - bizon - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

bizon 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

bizon/alerting/alerts.py +0 -1
bizon/common/models.py +184 -4
bizon/connectors/destinations/bigquery/src/config.py +1 -1
bizon/connectors/destinations/bigquery/src/destination.py +14 -9
bizon/connectors/destinations/bigquery_streaming/config/bigquery_streaming.example.yml +74 -0
bizon/connectors/destinations/bigquery_streaming/src/config.py +6 -5
bizon/connectors/destinations/bigquery_streaming/src/destination.py +13 -9
bizon/connectors/destinations/bigquery_streaming_v2/config/bigquery_streaming_v2.example.yml +79 -0
bizon/connectors/destinations/bigquery_streaming_v2/src/config.py +6 -1
bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +232 -49
bizon/connectors/destinations/bigquery_streaming_v2/src/proto_utils.py +1 -13
bizon/connectors/destinations/file/config/file.example.yml +40 -0
bizon/connectors/destinations/file/src/config.py +2 -1
bizon/connectors/destinations/file/src/destination.py +3 -6
bizon/connectors/destinations/logger/config/logger.example.yml +30 -0
bizon/connectors/destinations/logger/src/config.py +1 -2
bizon/connectors/destinations/logger/src/destination.py +4 -2
bizon/connectors/sources/cycle/src/source.py +2 -6
bizon/connectors/sources/dummy/src/source.py +0 -4
bizon/connectors/sources/gsheets/src/source.py +2 -3
bizon/connectors/sources/hubspot/src/hubspot_base.py +0 -1
bizon/connectors/sources/hubspot/src/hubspot_objects.py +3 -4
bizon/connectors/sources/hubspot/src/models/hs_object.py +0 -1
bizon/connectors/sources/kafka/config/kafka.example.yml +1 -3
bizon/connectors/sources/kafka/config/kafka_debezium.example.yml +1 -3
bizon/connectors/sources/kafka/config/kafka_streams.example.yml +124 -0
bizon/connectors/sources/kafka/src/config.py +10 -12
bizon/connectors/sources/kafka/src/decode.py +65 -60
bizon/connectors/sources/kafka/src/source.py +182 -61
bizon/connectors/sources/kafka/tests/kafka_pipeline.py +1 -1
bizon/connectors/sources/notion/config/api_key.example.yml +35 -0
bizon/connectors/sources/notion/src/__init__.py +0 -0
bizon/connectors/sources/notion/src/config.py +59 -0
bizon/connectors/sources/notion/src/source.py +1159 -0
bizon/connectors/sources/notion/tests/notion_pipeline.py +7 -0
bizon/connectors/sources/notion/tests/test_notion.py +113 -0
bizon/connectors/sources/periscope/src/source.py +0 -6
bizon/connectors/sources/pokeapi/src/source.py +0 -1
bizon/connectors/sources/sana_ai/config/sana.example.yml +25 -0
bizon/connectors/sources/sana_ai/src/source.py +85 -0
bizon/destination/buffer.py +0 -1
bizon/destination/config.py +9 -1
bizon/destination/destination.py +38 -9
bizon/engine/backend/adapters/sqlalchemy/backend.py +2 -5
bizon/engine/backend/adapters/sqlalchemy/config.py +0 -1
bizon/engine/config.py +0 -1
bizon/engine/engine.py +0 -1
bizon/engine/pipeline/consumer.py +0 -1
bizon/engine/pipeline/producer.py +1 -5
bizon/engine/queue/adapters/kafka/config.py +1 -1
bizon/engine/queue/adapters/kafka/queue.py +0 -1
bizon/engine/queue/adapters/python_queue/consumer.py +0 -1
bizon/engine/queue/adapters/python_queue/queue.py +0 -2
bizon/engine/queue/adapters/rabbitmq/consumer.py +0 -1
bizon/engine/queue/adapters/rabbitmq/queue.py +0 -1
bizon/engine/queue/config.py +0 -2
bizon/engine/runner/adapters/process.py +0 -2
bizon/engine/runner/adapters/streaming.py +114 -42
bizon/engine/runner/adapters/thread.py +0 -2
bizon/engine/runner/config.py +0 -1
bizon/engine/runner/runner.py +14 -9
bizon/monitoring/config.py +12 -2
bizon/monitoring/datadog/monitor.py +100 -14
bizon/monitoring/monitor.py +41 -12
bizon/monitoring/noop/monitor.py +22 -3
bizon/source/auth/authenticators/abstract_oauth.py +11 -3
bizon/source/auth/authenticators/abstract_token.py +2 -1
bizon/source/auth/authenticators/basic.py +1 -1
bizon/source/auth/authenticators/cookies.py +2 -1
bizon/source/auth/authenticators/oauth.py +8 -3
bizon/source/config.py +0 -2
bizon/source/cursor.py +8 -16
bizon/source/discover.py +3 -6
bizon/source/models.py +0 -1
bizon/source/session.py +0 -1
bizon/source/source.py +18 -3
bizon/transform/config.py +0 -2
bizon/transform/transform.py +0 -3
{bizon-0.1.1.dist-info → bizon-0.2.0.dist-info}/METADATA +62 -41
bizon-0.2.0.dist-info/RECORD +136 -0
{bizon-0.1.1.dist-info → bizon-0.2.0.dist-info}/WHEEL +1 -1
bizon-0.2.0.dist-info/entry_points.txt +2 -0
bizon-0.1.1.dist-info/RECORD +0 -123
bizon-0.1.1.dist-info/entry_points.txt +0 -3
{bizon-0.1.1.dist-info → bizon-0.2.0.dist-info/licenses}/LICENSE +0 -0

bizon/engine/runner/adapters/streaming.py CHANGED Viewed

@@ -8,12 +8,14 @@ import simplejson as json
 from loguru import logger
 from pytz import UTC
-from bizon.common.models import BizonConfig
+from bizon.common.models import BizonConfig, SyncMetadata
+from bizon.connectors.destinations.bigquery.src.config import BigQueryRecordSchemaConfig
 from bizon.destination.models import transform_to_df_destination_records
 from bizon.engine.pipeline.models import PipelineReturnStatus
 from bizon.engine.runner.config import RunnerStatus
 from bizon.engine.runner.runner import AbstractRunner
 from bizon.source.models import SourceRecord, source_record_schema
+from bizon.source.source import AbstractSource
 class StreamingRunner(AbstractRunner):
@@ -36,68 +38,138 @@ class StreamingRunner(AbstractRunner):
     def convert_to_destination_records(df_source_records: pl.DataFrame, extracted_at: datetime) -> pl.DataFrame:
         return transform_to_df_destination_records(df_source_records=df_source_records, extracted_at=extracted_at)
+    def _apply_streams_config(self, source: AbstractSource = None) -> None:
+        """Apply streams configuration to source and destination.
+        This method is completely source-agnostic. Each source connector is responsible
+        for handling streams config appropriately via set_streams_config().
+        When a top-level 'streams' configuration is present, this method:
+        1. Calls source.set_streams_config() to let the source enrich its own config
+        2. Builds destination record_schemas from streams config
+        3. Injects record_schemas into destination config for backward compatibility
+        The source is responsible for modifying self.config (which points to bizon_config.source)
+        so that subsequent source instantiations see the enriched config.
+        """
+        if not self.bizon_config.streams:
+            return
+        logger.info(f"Applying streams configuration: {len(self.bizon_config.streams)} streams defined")
+        # Let the source enrich its own config from streams
+        # Note: source modifies self.config, which is a reference to bizon_config.source
+        # This ensures init_job (which creates a new source) sees the enriched config
+        if source and hasattr(source, "set_streams_config") and callable(source.set_streams_config):
+            source.set_streams_config(self.bizon_config.streams)
+        # Build record_schemas list for destination from streams config
+        record_schemas = []
+        for stream in self.bizon_config.streams:
+            if stream.destination.record_schema:
+                record_schema_config = BigQueryRecordSchemaConfig(
+                    destination_id=stream.destination.table_id,
+                    record_schema=stream.destination.record_schema,
+                    clustering_keys=stream.destination.clustering_keys,
+                )
+                record_schemas.append(record_schema_config)
+                logger.info(
+                    f"Stream '{stream.name}': "
+                    f"{getattr(stream.source, 'topic', getattr(stream.source, 'name', 'N/A'))} "
+                    f"-> {stream.destination.table_id}"
+                )
+        # Inject into destination config
+        if record_schemas and hasattr(self.bizon_config.destination.config, "record_schemas"):
+            logger.info(f"Injecting {len(record_schemas)} record schemas into destination config")
+            self.bizon_config.destination.config.record_schemas = record_schemas
     def run(self) -> RunnerStatus:
+        # Create a temporary source to enrich bizon_config.source from streams
+        # The source's set_streams_config() modifies self.config (= bizon_config.source)
+        # This ensures subsequent source instantiations see the enriched config
+        temp_source = self.get_source(bizon_config=self.bizon_config, config=self.config)
+        self._apply_streams_config(temp_source)
+        # Now initialize job (check_connection will use enriched source config)
         job = self.init_job(bizon_config=self.bizon_config, config=self.config)
         backend = self.get_backend(bizon_config=self.bizon_config)
         source = self.get_source(bizon_config=self.bizon_config, config=self.config)
+        sync_metadata = SyncMetadata.from_bizon_config(job_id=job.id, config=self.bizon_config)
+        monitor = self.get_monitoring_client(sync_metadata=sync_metadata, bizon_config=self.bizon_config)
         destination = self.get_destination(
             bizon_config=self.bizon_config,
             backend=backend,
             job_id=job.id,
             source_callback=None,
+            monitor=monitor,
         )
         transform = self.get_transform(bizon_config=self.bizon_config)
-        monitor = self.get_monitoring_client(bizon_config=self.bizon_config)
         destination.buffer.buffer_size = 0  # force buffer to be flushed immediately
         iteration = 0
         while True:
             if source.config.max_iterations and iteration > source.config.max_iterations:
                 logger.info(f"Max iterations {source.config.max_iterations} reached, terminating stream ...")
                 break
-            source_iteration = source.get()
+            with monitor.trace(operation_name="bizon.stream.iteration"):
+                source_iteration = source.get()
+                destination_id_indexed_records = {}
+                if len(source_iteration.records) == 0:
+                    logger.info("No new records found, stopping iteration")
+                    time.sleep(2)
+                    monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS)
+                    iteration += 1
+                    continue
+                for record in source_iteration.records:
+                    if destination_id_indexed_records.get(record.destination_id):
+                        destination_id_indexed_records[record.destination_id].append(record)
+                    else:
+                        destination_id_indexed_records[record.destination_id] = [record]
+                for destination_id, records in destination_id_indexed_records.items():
+                    df_source_records = StreamingRunner.convert_source_records(records)
+                    dsm_headers = monitor.track_source_iteration(records=records)
+                    # Apply transformation
+                    df_source_records = transform.apply_transforms(df_source_records=df_source_records)
+                    df_destination_records = StreamingRunner.convert_to_destination_records(
+                        df_source_records, datetime.now(tz=UTC)
+                    )
+                    # Override destination_id
+                    destination.destination_id = destination_id
+                    destination.write_or_buffer_records(
+                        df_destination_records=df_destination_records,
+                        iteration=iteration,
+                        pagination=None,
+                    )
+                    monitor.track_records_synced(
+                        num_records=len(df_destination_records),
+                        destination_id=destination_id,
+                        extra_tags={"destination_id": destination_id},
+                        headers=dsm_headers,
+                    )
+                if os.getenv("ENVIRONMENT") == "production":
+                    try:
+                        source.commit()
+                    except Exception as e:
+                        logger.error(f"Error committing source: {e}")
+                        monitor.track_pipeline_status(PipelineReturnStatus.ERROR)
+                        return RunnerStatus(stream=PipelineReturnStatus.ERROR)
-            destination_id_indexed_records = {}
-            if len(source_iteration.records) == 0:
-                logger.info("No new records found, stopping iteration")
-                time.sleep(2)
-                monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS)
                 iteration += 1
-                continue
-            for record in source_iteration.records:
-                if destination_id_indexed_records.get(record.destination_id):
-                    destination_id_indexed_records[record.destination_id].append(record)
-                else:
-                    destination_id_indexed_records[record.destination_id] = [record]
-            for destination_id, records in destination_id_indexed_records.items():
-                df_source_records = StreamingRunner.convert_source_records(records)
-                # Apply transformation
-                df_source_records = transform.apply_transforms(df_source_records=df_source_records)
-                df_destination_records = StreamingRunner.convert_to_destination_records(
-                    df_source_records, datetime.now(tz=UTC)
-                )
-                # Override destination_id
-                destination.destination_id = destination_id
-                destination.write_or_buffer_records(
-                    df_destination_records=df_destination_records,
-                    iteration=iteration,
-                    pagination=None,
-                )
-                monitor.track_records_synced(
-                    num_records=len(df_destination_records),
-                    extra_tags={"destination_id": destination_id},
-                )
-            if os.getenv("ENVIRONMENT") == "production":
-                source.commit()
-            iteration += 1
+                monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS)
-            monitor.track_pipeline_status(PipelineReturnStatus.SUCCESS)
         return RunnerStatus(stream=PipelineReturnStatus.SUCCESS)  # return when max iterations is reached

bizon/engine/runner/adapters/thread.py CHANGED Viewed

@@ -16,7 +16,6 @@ class ThreadRunner(AbstractRunner):
     # TODO: refacto this
     def get_kwargs(self):
         extra_kwargs = {}
         if self.bizon_config.engine.queue.type == "python_queue":
@@ -46,7 +45,6 @@ class ThreadRunner(AbstractRunner):
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=self.bizon_config.engine.runner.config.max_workers
         ) as executor:
             future_producer = executor.submit(
                 AbstractRunner.instanciate_and_run_producer,
                 self.bizon_config,

bizon/engine/runner/config.py CHANGED Viewed

@@ -37,7 +37,6 @@ class RunnerFuturesConfig(BaseModel):
 class RunnerConfig(BaseModel):
     type: RunnerTypes = Field(
         description="Runner to use for the pipeline",
         default=RunnerTypes.THREAD,

bizon/engine/runner/runner.py CHANGED Viewed

@@ -27,7 +27,6 @@ from bizon.transform.transform import Transform
 class AbstractRunner(ABC):
     def __init__(self, config: dict):
         # Internal state
         self._is_running: bool = False
@@ -82,7 +81,11 @@ class AbstractRunner(ABC):
     @staticmethod
     def get_destination(
-        bizon_config: BizonConfig, backend: AbstractBackend, job_id: str, source_callback: AbstractSourceCallback
+        bizon_config: BizonConfig,
+        backend: AbstractBackend,
+        job_id: str,
+        source_callback: AbstractSourceCallback,
+        monitor: AbstractMonitor,
     ) -> AbstractDestination:
         """Get an instance of the destination based on the destination config dict"""
@@ -93,6 +96,7 @@ class AbstractRunner(ABC):
             config=bizon_config.destination,
             backend=backend,
             source_callback=source_callback,
+            monitor=monitor,
         )
     @staticmethod
@@ -124,9 +128,9 @@ class AbstractRunner(ABC):
         return Transform(transforms=bizon_config.transforms)
     @staticmethod
-    def get_monitoring_client(bizon_config: BizonConfig) -> AbstractMonitor:
+    def get_monitoring_client(sync_metadata: SyncMetadata, bizon_config: BizonConfig) -> AbstractMonitor:
         """Return the monitoring client instance"""
-        return MonitorFactory.get_monitor(bizon_config)
+        return MonitorFactory.get_monitor(sync_metadata, bizon_config.monitoring)
     @staticmethod
     def get_or_create_job(
@@ -217,7 +221,6 @@ class AbstractRunner(ABC):
         stop_event: Union[multiprocessing.synchronize.Event, threading.Event],
         **kwargs,
     ):
         # Get the source instance
         source = AbstractRunner.get_source(bizon_config=bizon_config, config=config)
@@ -252,23 +255,25 @@ class AbstractRunner(ABC):
             bizon_config=bizon_config, config=config
         ).get_source_callback_instance()
+        sync_metadata = SyncMetadata.from_bizon_config(job_id=job_id, config=bizon_config)
         # Get the queue instance
         queue = AbstractRunner.get_queue(bizon_config=bizon_config, **kwargs)
         # Get the backend instance
         backend = AbstractRunner.get_backend(bizon_config=bizon_config, **kwargs)
+        # Get the monitor instance
+        monitor = AbstractRunner.get_monitoring_client(sync_metadata=sync_metadata, bizon_config=bizon_config)
         # Get the destination instance
         destination = AbstractRunner.get_destination(
-            bizon_config=bizon_config, backend=backend, job_id=job_id, source_callback=source_callback
+            bizon_config=bizon_config, backend=backend, job_id=job_id, source_callback=source_callback, monitor=monitor
         )
         # Get the transform instance
         transform = AbstractRunner.get_transform(bizon_config=bizon_config)
-        # Get the monitor instance
-        monitor = AbstractRunner.get_monitoring_client(bizon_config=bizon_config)
         # Create the consumer instance
         consumer = queue.get_consumer(
             destination=destination,

bizon/monitoring/config.py CHANGED Viewed

@@ -8,7 +8,11 @@ class MonitorType(str, Enum):
     DATADOG = "datadog"
-class DatadogConfig(BaseModel):
+class BaseMonitoringConfig(BaseModel):
+    enable_tracing: bool = Field(default=False, description="Enable tracing for the monitor")
+class DatadogConfig(BaseMonitoringConfig):
     datadog_agent_host: Optional[str] = None
     datadog_host_env_var: Optional[str] = None
     datadog_agent_port: int = 8125
@@ -23,7 +27,13 @@ class DatadogConfig(BaseModel):
         if not self.host_is_configured:
             raise ValueError("Either datadog_agent_host or datadog_host_env_var must be specified")
+    class Config:
+        extra = "forbid"
-class MonitoringConfig(BaseModel):
+class MonitoringConfig(BaseMonitoringConfig):
     type: MonitorType
     config: Optional[DatadogConfig] = None
+    class Config:
+        extra = "forbid"

bizon/monitoring/datadog/monitor.py CHANGED Viewed

@@ -1,44 +1,48 @@
 import os
-from typing import Dict
+from contextlib import contextmanager
+from typing import Dict, List, Union
 from datadog import initialize, statsd
 from loguru import logger
-from bizon.common.models import BizonConfig
+from bizon.common.models import SyncMetadata
 from bizon.engine.pipeline.models import PipelineReturnStatus
+from bizon.monitoring.config import MonitoringConfig
 from bizon.monitoring.monitor import AbstractMonitor
+from bizon.source.models import SourceRecord
 class DatadogMonitor(AbstractMonitor):
-    def __init__(self, pipeline_config: BizonConfig):
-        super().__init__(pipeline_config)
+    def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig):
+        super().__init__(sync_metadata, monitoring_config)
         # In Kubernetes, set the host dynamically
         try:
-            datadog_host_from_env_var = os.getenv(pipeline_config.monitoring.config.datadog_host_env_var)
+            datadog_host_from_env_var = os.getenv(monitoring_config.config.datadog_host_env_var)
             if datadog_host_from_env_var:
                 initialize(
                     statsd_host=datadog_host_from_env_var,
-                    statsd_port=pipeline_config.monitoring.config.datadog_agent_port,
+                    statsd_port=monitoring_config.config.datadog_agent_port,
                 )
             else:
                 initialize(
-                    statsd_host=pipeline_config.monitoring.config.datadog_agent_host,
-                    statsd_port=pipeline_config.monitoring.config.datadog_agent_port,
+                    statsd_host=monitoring_config.config.datadog_agent_host,
+                    statsd_port=monitoring_config.config.datadog_agent_port,
                 )
         except Exception as e:
             logger.info(f"Failed to initialize Datadog agent: {e}")
         self.pipeline_monitor_status = "bizon_pipeline.status"
         self.tags = [
-            f"pipeline_name:{self.pipeline_config.name}",
-            f"pipeline_stream:{self.pipeline_config.source.stream}",
-            f"pipeline_source:{self.pipeline_config.source.name}",
-            f"pipeline_destination:{self.pipeline_config.destination.name}",
-        ] + [f"{key}:{value}" for key, value in self.pipeline_config.monitoring.config.tags.items()]
+            f"pipeline_name:{self.sync_metadata.name}",
+            f"pipeline_stream:{self.sync_metadata.stream_name}",
+            f"pipeline_source:{self.sync_metadata.source_name}",
+            f"pipeline_destination:{self.sync_metadata.destination_name}",
+        ] + [f"{key}:{value}" for key, value in self.monitoring_config.config.tags.items()]
         self.pipeline_active_pipelines = "bizon_pipeline.active_pipelines"
         self.pipeline_records_synced = "bizon_pipeline.records_synced"
+        self.pipeline_large_records = "bizon_pipeline.large_records"
     def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tags: Dict[str, str] = {}) -> None:
         """
@@ -55,7 +59,9 @@ class DatadogMonitor(AbstractMonitor):
             + [f"{key}:{value}" for key, value in extra_tags.items()],
         )
-    def track_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None:
+    def track_records_synced(
+        self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: List[Dict[str, str]] = []
+    ) -> Union[List[Dict[str, str]], None]:
         """
         Track the number of records synced in the pipeline.
@@ -67,3 +73,83 @@ class DatadogMonitor(AbstractMonitor):
             value=num_records,
             tags=self.tags + [f"{key}:{value}" for key, value in extra_tags.items()],
         )
+        if os.getenv("DD_DATA_STREAMS_ENABLED") == "true":
+            from ddtrace.data_streams import set_produce_checkpoint
+            destination_type = self.sync_metadata.destination_alias
+            for header in headers:
+                if "x-datadog-sampling-priority" in header:
+                    del header["x-datadog-sampling-priority"]
+                if "dd-pathway-ctx-base64" in header:
+                    del header["dd-pathway-ctx-base64"]
+                set_produce_checkpoint(destination_type, destination_id, header.setdefault)
+            return headers
+    def track_large_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None:
+        statsd.increment(
+            self.pipeline_large_records,
+            value=num_records,
+            tags=self.tags + [f"{key}:{value}" for key, value in extra_tags.items()],
+        )
+    def track_source_iteration(self, records: List[SourceRecord]) -> Union[List[Dict[str, str]], None]:
+        """
+        Track the number of records consumed from a Kafka topic.
+        Args:
+            kafka_topic (str): The Kafka topic name
+        """
+        if os.getenv("DD_DATA_STREAMS_ENABLED") == "true":
+            from ddtrace.data_streams import set_consume_checkpoint
+            headers_list = []
+            for record in records:
+                headers = record.data.get("headers", {})
+                set_consume_checkpoint("kafka", record.data["topic"], headers.get)
+                headers_list.append(headers)
+            return headers_list
+    @contextmanager
+    def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None):
+        """
+        Create a trace span for monitoring using Datadog APM.
+        Args:
+            operation_name (str): The name of the operation being traced
+            resource (str): The resource being operated on (e.g., topic name, table name)
+            extra_tags (Dict[str, str]): Additional tags for the trace
+        Yields:
+            A span object that can be used to add additional metadata
+        """
+        if not self.monitoring_config.config.enable_tracing:
+            yield None
+            return
+        try:
+            from ddtrace import tracer
+        except ImportError:
+            logger.warning("ddtrace not available, skipping tracing")
+            yield None
+            return
+        try:
+            # Combine tags
+            all_tags = self.tags.copy()
+            if extra_tags:
+                all_tags.extend([f"{key}:{value}" for key, value in extra_tags.items()])
+            # Create the span
+            with tracer.trace(operation_name, resource=resource) as span:
+                # Add tags to the span
+                for tag in all_tags:
+                    if ":" in tag:
+                        key, value = tag.split(":", 1)
+                        span.set_tag(key, value)
+                span.set_tag("_sampling_priority_v1", 1)
+                yield span
+        except Exception as e:
+            logger.warning(f"Failed to create trace: {e}")
+            yield None

bizon/monitoring/monitor.py CHANGED Viewed

@@ -1,15 +1,16 @@
 from abc import ABC, abstractmethod
-from typing import Dict
+from typing import Dict, List, Union
-from bizon.common.models import BizonConfig
+from bizon.common.models import SyncMetadata
 from bizon.engine.pipeline.models import PipelineReturnStatus
-from bizon.monitoring.config import MonitorType
+from bizon.monitoring.config import MonitoringConfig, MonitorType
+from bizon.source.models import SourceRecord
 class AbstractMonitor(ABC):
-    def __init__(self, pipeline_config: BizonConfig):
-        self.pipeline_config = pipeline_config
-        # Initialize the monitor
+    def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig):
+        self.sync_metadata = sync_metadata
+        self.monitoring_config = monitoring_config
     @abstractmethod
     def track_pipeline_status(self, pipeline_status: PipelineReturnStatus, extra_tags: Dict[str, str] = {}) -> None:
@@ -21,22 +22,50 @@ class AbstractMonitor(ABC):
         """
         pass
-    def track_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None:
+    def track_source_iteration(self, records: List[SourceRecord], headers: Dict[str, str] = {}) -> None:
+        """
+        Run a process that tracks the source iteration.
+        """
+        pass
+    def track_records_synced(
+        self, num_records: int, destination_id: str, extra_tags: Dict[str, str] = {}, headers: Dict[str, str] = {}
+    ) -> None:
         """
         Track the number of records synced in the pipeline.
         """
         pass
+    def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None):
+        """
+        Create a trace span for monitoring.
+        Args:
+            operation_name (str): The name of the operation being traced
+            resource (str): The resource being operated on (e.g., topic name, table name)
+            extra_tags (Dict[str, str]): Additional tags for the trace
+        Returns:
+            A context manager that can be used with 'with' statement
+        """
+        pass
+    def track_large_records_synced(self, num_records: int, extra_tags: Dict[str, str] = {}) -> None:
+        """
+        Track the number of large records synced in the destination system. This aims at helping to identify the source of the large records.
+        """
+        pass
 class MonitorFactory:
     @staticmethod
-    def get_monitor(pipeline_config: BizonConfig) -> AbstractMonitor:
-        if pipeline_config.monitoring is None:
+    def get_monitor(sync_metadata: SyncMetadata, monitoring_config: Union[MonitoringConfig, None]) -> AbstractMonitor:
+        if monitoring_config is None:
             from bizon.monitoring.noop.monitor import NoOpMonitor
-            return NoOpMonitor(pipeline_config)
+            return NoOpMonitor(sync_metadata, monitoring_config)
-        if pipeline_config.monitoring.type == MonitorType.DATADOG:
+        if monitoring_config.type == MonitorType.DATADOG:
             from bizon.monitoring.datadog.monitor import DatadogMonitor
-            return DatadogMonitor(pipeline_config)
+            return DatadogMonitor(sync_metadata, monitoring_config)

bizon/monitoring/noop/monitor.py CHANGED Viewed

@@ -1,11 +1,30 @@
-from bizon.common.models import BizonConfig
+from contextlib import contextmanager
+from typing import Dict
+from bizon.common.models import SyncMetadata
 from bizon.engine.pipeline.models import PipelineReturnStatus
+from bizon.monitoring.config import MonitoringConfig
 from bizon.monitoring.monitor import AbstractMonitor
 class NoOpMonitor(AbstractMonitor):
-    def __init__(self, pipeline_config: BizonConfig):
-        super().__init__(pipeline_config)
+    def __init__(self, sync_metadata: SyncMetadata, monitoring_config: MonitoringConfig):
+        super().__init__(sync_metadata, monitoring_config)
     def track_pipeline_status(self, pipeline_status: PipelineReturnStatus) -> None:
         pass
+    @contextmanager
+    def trace(self, operation_name: str, resource: str = None, extra_tags: Dict[str, str] = None):
+        """
+        No-op trace implementation.
+        Args:
+            operation_name (str): The name of the operation being traced
+            resource (str): The resource being operated on (e.g., topic name, table name)
+            extra_tags (Dict[str, str]): Additional tags for the trace
+        Yields:
+            None (no-op implementation)
+        """
+        yield None

bizon/source/auth/authenticators/abstract_oauth.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from abc import abstractmethod
-from typing import Any, List, Mapping, MutableMapping, Tuple, Union
+from collections.abc import Mapping, MutableMapping
+from typing import Any, List, Tuple, Union
 import backoff
+import dpath
 import pendulum
 import requests
 from loguru import logger
@@ -52,8 +54,8 @@ class AbstractOauth2Authenticator(AuthBase):
             "refresh_token": self.get_refresh_token(),
         }
-        if self.get_scopes:
-            payload["scopes"] = self.get_scopes()
+        if self.get_scopes():
+            payload["scope"] = ",".join(self.get_scopes())
         if self.get_refresh_request_body():
             for key, val in self.get_refresh_request_body().items():
@@ -92,6 +94,8 @@ class AbstractOauth2Authenticator(AuthBase):
         :return: a tuple of (access_token, token_lifespan_in_seconds)
         """
         response_json = self._get_refresh_access_token_response()
+        if self.get_response_field_path():
+            response_json = dpath.get(response_json, self.get_response_field_path())
         return response_json[self.get_access_token_name()], int(response_json[self.get_expires_in_name()])
     @abstractmethod
@@ -138,6 +142,10 @@ class AbstractOauth2Authenticator(AuthBase):
     def get_grant_type(self) -> str:
         """Returns grant_type specified for requesting access_token"""
+    @abstractmethod
+    def get_response_field_path(self) -> str:
+        """Returns the path to the response field"""
     @property
     @abstractmethod
     def access_token(self) -> str:

bizon/source/auth/authenticators/abstract_token.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from abc import abstractmethod
-from typing import Any, Mapping
+from collections.abc import Mapping
+from typing import Any
 from requests.auth import AuthBase

bizon/source/auth/authenticators/basic.py CHANGED Viewed

@@ -27,7 +27,7 @@ class BasicHttpAuthenticator(AbstractHeaderAuthenticator):
         return f"{self._auth_method} {self._token}"
     def __init__(self, params: BasicHttpAuthParams):
-        auth_string = f"{params.username}:{params.password}".encode("utf8")
+        auth_string = f"{params.username}:{params.password}".encode()
         b64_encoded = base64.b64encode(auth_string).decode("utf8")
         self._auth_header = params.auth_header
         self._auth_method = params.auth_method

bizon/source/auth/authenticators/cookies.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Mapping, Optional
+from collections.abc import Mapping
+from typing import Any, Optional
 from pydantic import BaseModel, Field
 from requests import PreparedRequest

bizon 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

bizon 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl