PyPI - bizon - Versions diffs - 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

bizon 0.1.2py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

bizon/alerting/alerts.py +0 -1
bizon/common/models.py +182 -4
bizon/connectors/destinations/bigquery/src/config.py +0 -1
bizon/connectors/destinations/bigquery/src/destination.py +11 -8
bizon/connectors/destinations/bigquery_streaming/config/bigquery_streaming.example.yml +74 -0
bizon/connectors/destinations/bigquery_streaming/src/destination.py +4 -5
bizon/connectors/destinations/bigquery_streaming_v2/config/bigquery_streaming_v2.example.yml +79 -0
bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +4 -6
bizon/connectors/destinations/file/config/file.example.yml +40 -0
bizon/connectors/destinations/file/src/config.py +1 -1
bizon/connectors/destinations/file/src/destination.py +0 -5
bizon/connectors/destinations/logger/config/logger.example.yml +30 -0
bizon/connectors/destinations/logger/src/config.py +0 -2
bizon/connectors/destinations/logger/src/destination.py +1 -2
bizon/connectors/sources/cycle/src/source.py +2 -6
bizon/connectors/sources/dummy/src/source.py +0 -4
bizon/connectors/sources/gsheets/src/source.py +2 -3
bizon/connectors/sources/hubspot/src/hubspot_base.py +0 -1
bizon/connectors/sources/hubspot/src/hubspot_objects.py +3 -4
bizon/connectors/sources/hubspot/src/models/hs_object.py +0 -1
bizon/connectors/sources/kafka/config/kafka_streams.example.yml +124 -0
bizon/connectors/sources/kafka/src/config.py +10 -6
bizon/connectors/sources/kafka/src/decode.py +2 -2
bizon/connectors/sources/kafka/src/source.py +147 -46
bizon/connectors/sources/notion/config/api_key.example.yml +35 -0
bizon/connectors/sources/notion/src/__init__.py +0 -0
bizon/connectors/sources/notion/src/config.py +59 -0
bizon/connectors/sources/notion/src/source.py +1159 -0
bizon/connectors/sources/notion/tests/notion_pipeline.py +7 -0
bizon/connectors/sources/notion/tests/test_notion.py +113 -0
bizon/connectors/sources/periscope/src/source.py +0 -6
bizon/connectors/sources/pokeapi/src/source.py +0 -1
bizon/connectors/sources/sana_ai/config/sana.example.yml +25 -0
bizon/connectors/sources/sana_ai/src/source.py +85 -0
bizon/destination/buffer.py +0 -1
bizon/destination/config.py +0 -1
bizon/destination/destination.py +1 -4
bizon/engine/backend/adapters/sqlalchemy/backend.py +2 -5
bizon/engine/backend/adapters/sqlalchemy/config.py +0 -1
bizon/engine/config.py +0 -1
bizon/engine/engine.py +0 -1
bizon/engine/pipeline/consumer.py +0 -1
bizon/engine/pipeline/producer.py +1 -5
bizon/engine/queue/adapters/kafka/config.py +1 -1
bizon/engine/queue/adapters/kafka/queue.py +0 -1
bizon/engine/queue/adapters/python_queue/consumer.py +0 -1
bizon/engine/queue/adapters/python_queue/queue.py +0 -2
bizon/engine/queue/adapters/rabbitmq/consumer.py +0 -1
bizon/engine/queue/adapters/rabbitmq/queue.py +0 -1
bizon/engine/queue/config.py +0 -2
bizon/engine/runner/adapters/process.py +0 -2
bizon/engine/runner/adapters/streaming.py +55 -1
bizon/engine/runner/adapters/thread.py +0 -2
bizon/engine/runner/config.py +0 -1
bizon/engine/runner/runner.py +0 -2
bizon/monitoring/datadog/monitor.py +5 -3
bizon/monitoring/noop/monitor.py +1 -1
bizon/source/auth/authenticators/abstract_oauth.py +11 -3
bizon/source/auth/authenticators/abstract_token.py +2 -1
bizon/source/auth/authenticators/basic.py +1 -1
bizon/source/auth/authenticators/cookies.py +2 -1
bizon/source/auth/authenticators/oauth.py +8 -3
bizon/source/config.py +0 -2
bizon/source/cursor.py +8 -16
bizon/source/discover.py +3 -6
bizon/source/models.py +0 -1
bizon/source/session.py +0 -1
bizon/source/source.py +17 -2
bizon/transform/config.py +0 -2
bizon/transform/transform.py +0 -3
{bizon-0.1.2.dist-info → bizon-0.2.0.dist-info}/METADATA +62 -42
bizon-0.2.0.dist-info/RECORD +136 -0
{bizon-0.1.2.dist-info → bizon-0.2.0.dist-info}/WHEEL +1 -1
bizon-0.2.0.dist-info/entry_points.txt +2 -0
bizon-0.1.2.dist-info/RECORD +0 -123
bizon-0.1.2.dist-info/entry_points.txt +0 -3
{bizon-0.1.2.dist-info → bizon-0.2.0.dist-info/licenses}/LICENSE +0 -0

bizon/connectors/sources/notion/tests/notion_pipeline.py ADDED Viewed

@@ -0,0 +1,7 @@
+import os
+from bizon.engine.engine import RunnerFactory
+if __name__ == "__main__":
+    runner = RunnerFactory.create_from_yaml(filepath=os.path.abspath("test-pipeline-notion.yml"))
+    runner.run()

bizon/connectors/sources/notion/tests/test_notion.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""
+Quick test script for Notion source intermediate functions.
+Usage:
+    # Set your token
+    export NOTION_TOKEN="your_notion_integration_token"
+    # Run interactively
+    python -i test_notion.py
+    # Then test functions:
+    >>> page = source.get_page("page-id-here")
+    >>> blocks = source.fetch_blocks_recursively("page-id-here")
+    >>> for b in blocks[:5]:
+    ...     print(source._block_to_markdown(b))
+"""
+import os
+from bizon.connectors.sources.notion.src.config import NotionSourceConfig, NotionStreams
+from bizon.connectors.sources.notion.src.source import NotionSource
+from bizon.source.auth.authenticators.token import TokenAuthParams
+from bizon.source.auth.config import AuthConfig, AuthType
+def create_notion_source(
+    token: str = None,
+    page_ids: list = None,
+    database_ids: list = None,
+    stream: NotionStreams = NotionStreams.BLOCKS,
+) -> NotionSource:
+    """Create a NotionSource instance for testing."""
+    token = token or os.environ.get("NOTION_TOKEN")
+    if not token:
+        raise ValueError("Provide token or set NOTION_TOKEN environment variable")
+    config = NotionSourceConfig(
+        name="notion",
+        stream=stream,
+        page_ids=page_ids or [],
+        database_ids=database_ids or [],
+        authentication=AuthConfig(
+            type=AuthType.BEARER,
+            params=TokenAuthParams(token=token),
+        ),
+        init_pipeline=False,
+        max_recursion_depth=30,
+    )
+    return NotionSource(config)
+# ==================== HELPER FUNCTIONS ====================
+def get_block(source: NotionSource, block_id: str) -> dict:
+    """Fetch a single block by ID."""
+    response = source.session.get(f"https://api.notion.com/v1/blocks/{block_id}")
+    response.raise_for_status()
+    return response.json()
+def get_page_markdown(source: NotionSource, page_id: str) -> str:
+    """Fetch all blocks from a page and return combined markdown."""
+    blocks = source.fetch_blocks_recursively(page_id, source_page_id=page_id)
+    lines = []
+    for block in blocks:
+        md = source._block_to_markdown(block)
+        if md:
+            # Add indentation based on depth
+            indent = "  " * block.get("depth", 0)
+            lines.append(f"{indent}{md}")
+    return "\n".join(lines)
+def inspect_blocks(source: NotionSource, page_id: str, max_blocks: int = 10):
+    """Fetch and print block details for inspection."""
+    blocks = source.fetch_blocks_recursively(page_id, source_page_id=page_id)
+    print(f"Found {len(blocks)} blocks")
+    for i, block in enumerate(blocks[:max_blocks]):
+        print(f"\n--- Block {i} ({block.get('type')}) ---")
+        print(f"ID: {block.get('id')}")
+        print(f"Depth: {block.get('depth')}, Order: {block.get('page_order')}")
+        print(f"Markdown: {source._block_to_markdown(block)}")
+def list_pages_in_database(source: NotionSource, database_id: str) -> list:
+    """List all page IDs in a database."""
+    return source.get_pages_from_database(database_id, apply_filter=False)
+# ==================== MAIN ====================
+if __name__ == "__main__":
+    # Create source if token is available
+    token = os.environ.get("NOTION_TOKEN")
+    if token:
+        source = create_notion_source(token=token)
+        print("NotionSource created and available as 'source'")
+        print("\nAvailable functions:")
+        print("  source.get_page(page_id)")
+        print("  source.get_database(database_id)")
+        print("  source.get_block_children(block_id)")
+        print("  source.fetch_blocks_recursively(page_id)")
+        print("  source._block_to_markdown(block)")
+        print("  source.search()")
+        print("\nHelper functions:")
+        print("  get_block(source, block_id)")
+        print("  get_page_markdown(source, page_id)")
+        print("  inspect_blocks(source, page_id)")
+        print("  list_pages_in_database(source, database_id)")
+    else:
+        print("Set NOTION_TOKEN env var or call:")
+        print("  source = create_notion_source(token='your_token')")

bizon/connectors/sources/periscope/src/source.py CHANGED Viewed

@@ -41,7 +41,6 @@ class PeriscopeSourceConfig(SourceConfig):
 class PeriscopeSource(AbstractSource):
     def __init__(self, config: PeriscopeSourceConfig):
         super().__init__(config)
         self.config: PeriscopeSourceConfig = config
@@ -127,7 +126,6 @@ class PeriscopeSource(AbstractSource):
         return self.transform_response_to_source_iteration(records_json)
     def get_dashboards_metadata(self, pagination: dict = None) -> SourceIteration:
         params = {
             "client_site_id": self.config.client_site_id,
             "filters": [{"name": "typeFilter", "input": "Dashboard"}],
@@ -186,7 +184,6 @@ class PeriscopeSource(AbstractSource):
         dashboard_charts: List[dict] = []
         for iter_count in range(MAXIMUM_ITERATION):
             # Break the loop if no more charts are available
             if iter_count > 0 and len(iter_charts) == 0:
                 break
@@ -217,10 +214,8 @@ class PeriscopeSource(AbstractSource):
                 iter_textboxes = response.json().get("TextBox")
                 for chart in iter_charts:
-                    # Only fetch charts connected to gorgias-growth-production
                     if str(chart.get("database_id")) == str(self.config.database_id):
                         if chart.get("id") not in charts_list:
                             charts_list.add(chart.get("id"))
                             chart["raw_text"] = None
@@ -250,7 +245,6 @@ class PeriscopeSource(AbstractSource):
         return dashboard_charts
     def get_charts(self, pagination: dict = None) -> SourceIteration:
         BATCH_SIZE = 10
         if not pagination:

bizon/connectors/sources/pokeapi/src/source.py CHANGED Viewed

@@ -23,7 +23,6 @@ class PokeAPISourceConfig(SourceConfig):
 class PeriscopeSource(AbstractSource):
     def __init__(self, config: PokeAPISourceConfig):
         super().__init__(config)
         self.config: PokeAPISourceConfig = config

bizon/connectors/sources/sana_ai/config/sana.example.yml ADDED Viewed

@@ -0,0 +1,25 @@
+name: sana to file
+source:
+  name: sana_ai
+  stream: insight_report
+  domain: my_domain
+  query: 'SELECT "user", "user_type", "user_role", "user_origin", "user_registration_step", "user_creation_date", "user_disabled_date", "user_completion_date", "user_status", "user_last_active_date", "user_attribute_evangelist" FROM "analytics"."users" ORDER BY "user" ASC'
+  authentication:
+    type: oauth
+    params:
+      token_refresh_endpoint: https://my_domain.sana.ai/api/token
+      client_id: <client_id>
+      client_secret: <client_secret>
+      grant_type: client_credentials
+      access_token_name: accessToken
+      expires_in_name: expiresIn
+      response_field_path: data
+      scopes:
+        - read
+        - write
+destination:
+  name: file
+  config:
+    destination_id: sana_ai_user_status

bizon/connectors/sources/sana_ai/src/source.py ADDED Viewed

@@ -0,0 +1,85 @@
+import csv
+import io
+import time
+from typing import Any, List, Tuple
+from loguru import logger
+from pydantic import Field
+from requests.auth import AuthBase
+from bizon.source.auth.builder import AuthBuilder
+from bizon.source.auth.config import AuthType
+from bizon.source.config import SourceConfig
+from bizon.source.models import SourceIteration, SourceRecord
+from bizon.source.source import AbstractSource
+class SanaSourceConfig(SourceConfig):
+    query: str = Field(..., description="Query to get the data from the Sana Insight API")
+    domain: str = Field(..., description="Domain of the Sana instance")
+class SanaSource(AbstractSource):
+    def __init__(self, config: SanaSourceConfig):
+        super().__init__(config)
+        self.config: SanaSourceConfig = config
+        self.base_url = f"https://{config.domain}.sana.ai/api/v1"
+    def get_authenticator(self) -> AuthBase:
+        if self.config.authentication.type.value == AuthType.OAUTH:
+            return AuthBuilder.oauth2(params=self.config.authentication.params)
+    @staticmethod
+    def streams() -> List[str]:
+        return ["insight_report"]
+    @staticmethod
+    def get_config_class() -> SourceConfig:
+        return SanaSourceConfig
+    def check_connection(self) -> Tuple[bool | Any | None]:
+        return True, None
+    def get_total_records_count(self) -> int | None:
+        return None
+    def create_insight_report_job(self, query: str) -> str:
+        """Create an insight report for the given query"""
+        response = self.session.post(f"{self.base_url}/reports/query", json={"query": query, "format": "csv"})
+        return response.json()["data"]["jobId"]
+    def get_insight_report_job(self, job_id: str) -> dict:
+        """Get an insight report job for the given job id"""
+        response = self.session.get(f"{self.base_url}/reports/jobs/{job_id}")
+        return response.json()
+    def get_insight_report(self, pagination: dict) -> SourceIteration:
+        """Return all insight report for the given query"""
+        job_id = self.create_insight_report_job(self.config.query)
+        logger.info(f"Created insight report job {job_id} for query {self.config.query}")
+        response = self.get_insight_report_job(job_id)
+        status = response["data"]["status"]
+        while status != "successful":
+            time.sleep(3)
+            response = self.get_insight_report_job(job_id)
+            status = response["data"]["status"]
+            logger.info(f"Insight report job {job_id} is {status}")
+        link = response["data"]["link"]["url"]
+        logger.info(f"Link for insight report job {job_id} is {link}")
+        csv_response = self.session.get(link)
+        csv_content = csv_response.content.decode("utf-8")
+        reader = csv.DictReader(io.StringIO(csv_content))
+        data = [SourceRecord(id=str(i), data=row) for i, row in enumerate(reader)]
+        return SourceIteration(records=data, next_pagination={})
+    def get(self, pagination: dict = None) -> SourceIteration:
+        if self.config.stream == "insight_report":
+            return self.get_insight_report(pagination)
+        raise NotImplementedError(f"Stream {self.config.stream} not implemented for Sana")

bizon/destination/buffer.py CHANGED Viewed

@@ -9,7 +9,6 @@ from .models import destination_record_schema
 class DestinationBuffer:
     def __init__(self, buffer_size: int, buffer_flush_timeout: int) -> None:
         self.buffer_size = buffer_size * 1024 * 1024  # Convert to bytes
         self.buffer_flush_timeout = buffer_flush_timeout

bizon/destination/config.py CHANGED Viewed

@@ -28,7 +28,6 @@ class RecordSchemaConfig(BaseModel):
 class AbstractDestinationDetailsConfig(BaseModel):
     # Forbid extra keys in the model
     model_config = ConfigDict(extra="forbid")

bizon/destination/destination.py CHANGED Viewed

@@ -44,7 +44,6 @@ class DestinationIteration(BaseModel):
 class AbstractDestination(ABC):
     def __init__(
         self,
         sync_metadata: SyncMetadata,
@@ -144,7 +143,6 @@ class AbstractDestination(ABC):
         # Last iteration, write all records to destination
         if last_iteration:
             if self.buffer.df_destination_records.height == 0 and self.buffer.is_empty:
                 logger.info("No records to write to destination, already written, buffer is empty.")
                 return DestinationBufferStatus.RECORDS_WRITTEN
@@ -289,7 +287,6 @@ class DestinationFactory:
         source_callback: AbstractSourceCallback,
         monitor: AbstractMonitor,
     ) -> AbstractDestination:
         if config.name == DestinationTypes.LOGGER:
             from bizon.connectors.destinations.logger.src.destination import (
                 LoggerDestination,
@@ -355,4 +352,4 @@ class DestinationFactory:
                 monitor=monitor,
             )
-        raise ValueError(f"Destination {config.name}" f"with params {config} not found")
+        raise ValueError(f"Destination {config.name}with params {config} not found")

bizon/engine/backend/adapters/sqlalchemy/backend.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Optional, Union
 from loguru import logger
 from pytz import UTC
 from sqlalchemy import Result, Select, create_engine, func, inspect, select, update
-from sqlalchemy.engine import Engine, create_engine
+from sqlalchemy.engine import Engine
 from sqlalchemy.orm import Session, scoped_session, sessionmaker
 from bizon.engine.backend.backend import AbstractBackend
@@ -26,7 +26,6 @@ from .config import BigQueryConfigDetails, PostgresConfigDetails, SQLiteConfigDe
 class SQLAlchemyBackend(AbstractBackend):
     def __init__(self, config: Union[PostgresConfigDetails, SQLiteConfigDetails], type: BackendTypes, **kwargs):
         super().__init__(config, type)
@@ -81,7 +80,6 @@ class SQLAlchemyBackend(AbstractBackend):
         )
     def _get_engine(self) -> Engine:
         if self.type == BackendTypes.BIGQUERY:
             return self._get_engine_bigquery()
@@ -96,7 +94,7 @@ class SQLAlchemyBackend(AbstractBackend):
         # ONLY FOR UNIT TESTS: SQLite in memory
         if self.type == BackendTypes.SQLITE_IN_MEMORY:
             return create_engine(
-                f"sqlite:///:memory:",
+                "sqlite:///:memory:",
                 echo=self.config.echoEngine,
                 connect_args={"check_same_thread": False},
             )
@@ -388,7 +386,6 @@ class SQLAlchemyBackend(AbstractBackend):
         pagination: Optional[dict] = None,
         session: Session | None = None,
     ) -> DestinationCursor:
         destination_cursor = DestinationCursor(
             name=name,
             source_name=source_name,

bizon/engine/backend/adapters/sqlalchemy/config.py CHANGED Viewed

@@ -55,7 +55,6 @@ class SQLiteInMemoryConfig(AbstractBackendConfig):
 ## BIGQUERY ##
 class BigQueryConfigDetails(SQLAlchemyConfigDetails):
     database: str = Field(
         description="GCP Project name",
         default=...,

bizon/engine/config.py CHANGED Viewed

@@ -23,7 +23,6 @@ from .runner.config import RunnerConfig, RunnerFuturesConfig, RunnerTypes
 class EngineConfig(BaseModel):
     # Forbid extra keys in the model
     model_config = ConfigDict(extra="forbid")

bizon/engine/engine.py CHANGED Viewed

@@ -21,7 +21,6 @@ def replace_env_variables_in_config(config: dict) -> dict:
 class RunnerFactory:
     @staticmethod
     def create_from_config_dict(config: dict) -> AbstractRunner:
         # Replace env variables in config
         config = replace_env_variables_in_config(config=config)

bizon/engine/pipeline/consumer.py CHANGED Viewed

@@ -36,7 +36,6 @@ class AbstractQueueConsumer(ABC):
         pass
     def process_queue_message(self, queue_message: QueueMessage) -> PipelineReturnStatus:
         # Apply the transformation
         try:
             df_source_records = self.transform.apply_transforms(df_source_records=queue_message.df_source_records)

bizon/engine/pipeline/producer.py CHANGED Viewed

@@ -105,7 +105,6 @@ class Producer:
     def run(
         self, job_id: int, stop_event: Union[multiprocessing.synchronize.Event, threading.Event]
     ) -> PipelineReturnStatus:
         return_value: PipelineReturnStatus = PipelineReturnStatus.SUCCESS
         # Init queue
@@ -132,7 +131,6 @@ class Producer:
             return PipelineReturnStatus.BACKEND_ERROR
         while not cursor.is_finished:
             if stop_event.is_set():
                 logger.info("Stop event is set, terminating producer ...")
                 return PipelineReturnStatus.KILLED_BY_RUNNER
@@ -226,9 +224,7 @@ class Producer:
             items_in_queue = f"{self.queue.get_size()} items in queue." if self.queue.get_size() else ""
             logger.info(
-                (
-                    f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
-                )
+                f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
             )
         logger.info("Terminating destination ...")

bizon/engine/queue/adapters/kafka/config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Literal
+from typing import Literal
 from pydantic import BaseModel, Field

bizon/engine/queue/adapters/kafka/queue.py CHANGED Viewed

@@ -13,7 +13,6 @@ from .consumer import KafkaConsumer_
 class KafkaQueue(AbstractQueue):
     def __init__(self, config: KafkaConfigDetails) -> None:
         super().__init__(config)
         self.config: KafkaConfigDetails = config

bizon/engine/queue/adapters/python_queue/consumer.py CHANGED Viewed

@@ -35,7 +35,6 @@ class PythonQueueConsumer(AbstractQueueConsumer):
         self.monitor.track_pipeline_status(PipelineReturnStatus.RUNNING)
     def run(self, stop_event: Union[threading.Event, multiprocessing.synchronize.Event]) -> PipelineReturnStatus:
         while True:
             # Handle kill signal from the runner
             if stop_event.is_set():

bizon/engine/queue/adapters/python_queue/queue.py CHANGED Viewed

@@ -9,7 +9,6 @@ from bizon.destination.destination import AbstractDestination
 from bizon.engine.queue.config import QUEUE_TERMINATION, QueueMessage
 from bizon.engine.queue.queue import AbstractQueue, AbstractQueueConsumer
 from bizon.monitoring.monitor import AbstractMonitor
-from bizon.source.callback import AbstractSourceCallback
 from bizon.source.models import SourceIteration
 from bizon.transform.transform import Transform
@@ -18,7 +17,6 @@ from .consumer import PythonQueueConsumer
 class PythonQueue(AbstractQueue):
     def __init__(self, config: PythonQueueConfigDetails, **kwargs) -> None:
         super().__init__(config)
         self.config: PythonQueueConfigDetails = config

bizon/engine/queue/adapters/rabbitmq/consumer.py CHANGED Viewed

@@ -24,7 +24,6 @@ class RabbitMQConsumer(AbstractQueueConsumer):
         channel.queue_declare(queue=self.config.queue.queue_name)
         for method_frame, properties, body in channel.consume(self.config.queue.queue_name):
             queue_message = QueueMessage.model_validate_json(body)
             if queue_message.signal == QUEUE_TERMINATION:
                 logger.info("Received termination signal, waiting for destination to close gracefully ...")

bizon/engine/queue/adapters/rabbitmq/queue.py CHANGED Viewed

@@ -13,7 +13,6 @@ from .consumer import RabbitMQConsumer
 class RabbitMQ(AbstractQueue):
     def __init__(self, config: RabbitMQConfigDetails) -> None:
         super().__init__(config)
         self.config: RabbitMQConfigDetails = config

bizon/engine/queue/config.py CHANGED Viewed

@@ -27,7 +27,6 @@ class QueueTypes(str, Enum):
 class AbastractQueueConfigDetails(BaseModel, ABC):
     # Forbid extra keys in the model
     model_config = ConfigDict(extra="forbid")
@@ -38,7 +37,6 @@ class AbastractQueueConfigDetails(BaseModel, ABC):
 class AbstractQueueConfig(BaseModel, ABC):
     # Forbid extra keys in the model
     model_config = ConfigDict(extra="forbid")

bizon/engine/runner/adapters/process.py CHANGED Viewed

@@ -8,7 +8,6 @@ from bizon.engine.runner.runner import AbstractRunner
 class ProcessRunner(AbstractRunner):
     def __init__(self, config: dict):
         super().__init__(config)
@@ -36,7 +35,6 @@ class ProcessRunner(AbstractRunner):
         with concurrent.futures.ProcessPoolExecutor(
             max_workers=self.bizon_config.engine.runner.config.max_workers
         ) as executor:
             future_producer = executor.submit(
                 AbstractRunner.instanciate_and_run_producer,
                 self.bizon_config,

bizon/engine/runner/adapters/streaming.py CHANGED Viewed

@@ -9,11 +9,13 @@ from loguru import logger
 from pytz import UTC
 from bizon.common.models import BizonConfig, SyncMetadata
+from bizon.connectors.destinations.bigquery.src.config import BigQueryRecordSchemaConfig
 from bizon.destination.models import transform_to_df_destination_records
 from bizon.engine.pipeline.models import PipelineReturnStatus
 from bizon.engine.runner.config import RunnerStatus
 from bizon.engine.runner.runner import AbstractRunner
 from bizon.source.models import SourceRecord, source_record_schema
+from bizon.source.source import AbstractSource
 class StreamingRunner(AbstractRunner):
@@ -36,7 +38,60 @@ class StreamingRunner(AbstractRunner):
     def convert_to_destination_records(df_source_records: pl.DataFrame, extracted_at: datetime) -> pl.DataFrame:
         return transform_to_df_destination_records(df_source_records=df_source_records, extracted_at=extracted_at)
+    def _apply_streams_config(self, source: AbstractSource = None) -> None:
+        """Apply streams configuration to source and destination.
+        This method is completely source-agnostic. Each source connector is responsible
+        for handling streams config appropriately via set_streams_config().
+        When a top-level 'streams' configuration is present, this method:
+        1. Calls source.set_streams_config() to let the source enrich its own config
+        2. Builds destination record_schemas from streams config
+        3. Injects record_schemas into destination config for backward compatibility
+        The source is responsible for modifying self.config (which points to bizon_config.source)
+        so that subsequent source instantiations see the enriched config.
+        """
+        if not self.bizon_config.streams:
+            return
+        logger.info(f"Applying streams configuration: {len(self.bizon_config.streams)} streams defined")
+        # Let the source enrich its own config from streams
+        # Note: source modifies self.config, which is a reference to bizon_config.source
+        # This ensures init_job (which creates a new source) sees the enriched config
+        if source and hasattr(source, "set_streams_config") and callable(source.set_streams_config):
+            source.set_streams_config(self.bizon_config.streams)
+        # Build record_schemas list for destination from streams config
+        record_schemas = []
+        for stream in self.bizon_config.streams:
+            if stream.destination.record_schema:
+                record_schema_config = BigQueryRecordSchemaConfig(
+                    destination_id=stream.destination.table_id,
+                    record_schema=stream.destination.record_schema,
+                    clustering_keys=stream.destination.clustering_keys,
+                )
+                record_schemas.append(record_schema_config)
+                logger.info(
+                    f"Stream '{stream.name}': "
+                    f"{getattr(stream.source, 'topic', getattr(stream.source, 'name', 'N/A'))} "
+                    f"-> {stream.destination.table_id}"
+                )
+        # Inject into destination config
+        if record_schemas and hasattr(self.bizon_config.destination.config, "record_schemas"):
+            logger.info(f"Injecting {len(record_schemas)} record schemas into destination config")
+            self.bizon_config.destination.config.record_schemas = record_schemas
     def run(self) -> RunnerStatus:
+        # Create a temporary source to enrich bizon_config.source from streams
+        # The source's set_streams_config() modifies self.config (= bizon_config.source)
+        # This ensures subsequent source instantiations see the enriched config
+        temp_source = self.get_source(bizon_config=self.bizon_config, config=self.config)
+        self._apply_streams_config(temp_source)
+        # Now initialize job (check_connection will use enriched source config)
         job = self.init_job(bizon_config=self.bizon_config, config=self.config)
         backend = self.get_backend(bizon_config=self.bizon_config)
         source = self.get_source(bizon_config=self.bizon_config, config=self.config)
@@ -58,7 +113,6 @@ class StreamingRunner(AbstractRunner):
         iteration = 0
         while True:
             if source.config.max_iterations and iteration > source.config.max_iterations:
                 logger.info(f"Max iterations {source.config.max_iterations} reached, terminating stream ...")
                 break

bizon/engine/runner/adapters/thread.py CHANGED Viewed

@@ -16,7 +16,6 @@ class ThreadRunner(AbstractRunner):
     # TODO: refacto this
     def get_kwargs(self):
         extra_kwargs = {}
         if self.bizon_config.engine.queue.type == "python_queue":
@@ -46,7 +45,6 @@ class ThreadRunner(AbstractRunner):
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=self.bizon_config.engine.runner.config.max_workers
         ) as executor:
             future_producer = executor.submit(
                 AbstractRunner.instanciate_and_run_producer,
                 self.bizon_config,

bizon/engine/runner/config.py CHANGED Viewed

@@ -37,7 +37,6 @@ class RunnerFuturesConfig(BaseModel):
 class RunnerConfig(BaseModel):
     type: RunnerTypes = Field(
         description="Runner to use for the pipeline",
         default=RunnerTypes.THREAD,

bizon/engine/runner/runner.py CHANGED Viewed

@@ -27,7 +27,6 @@ from bizon.transform.transform import Transform
 class AbstractRunner(ABC):
     def __init__(self, config: dict):
         # Internal state
         self._is_running: bool = False
@@ -222,7 +221,6 @@ class AbstractRunner(ABC):
         stop_event: Union[multiprocessing.synchronize.Event, threading.Event],
         **kwargs,
     ):
         # Get the source instance
         source = AbstractRunner.get_source(bizon_config=bizon_config, config=config)

bizon 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

bizon 0.1.2py3-none-any.whl → 0.2.0py3-none-any.whl