bizon 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. bizon/alerting/alerts.py +0 -1
  2. bizon/common/models.py +182 -4
  3. bizon/connectors/destinations/bigquery/src/config.py +0 -1
  4. bizon/connectors/destinations/bigquery/src/destination.py +11 -8
  5. bizon/connectors/destinations/bigquery_streaming/config/bigquery_streaming.example.yml +74 -0
  6. bizon/connectors/destinations/bigquery_streaming/src/destination.py +4 -5
  7. bizon/connectors/destinations/bigquery_streaming_v2/config/bigquery_streaming_v2.example.yml +79 -0
  8. bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +4 -6
  9. bizon/connectors/destinations/file/config/file.example.yml +40 -0
  10. bizon/connectors/destinations/file/src/config.py +1 -1
  11. bizon/connectors/destinations/file/src/destination.py +0 -5
  12. bizon/connectors/destinations/logger/config/logger.example.yml +30 -0
  13. bizon/connectors/destinations/logger/src/config.py +0 -2
  14. bizon/connectors/destinations/logger/src/destination.py +1 -2
  15. bizon/connectors/sources/cycle/src/source.py +2 -6
  16. bizon/connectors/sources/dummy/src/source.py +0 -4
  17. bizon/connectors/sources/gsheets/src/source.py +2 -3
  18. bizon/connectors/sources/hubspot/src/hubspot_base.py +0 -1
  19. bizon/connectors/sources/hubspot/src/hubspot_objects.py +3 -4
  20. bizon/connectors/sources/hubspot/src/models/hs_object.py +0 -1
  21. bizon/connectors/sources/kafka/config/kafka_streams.example.yml +124 -0
  22. bizon/connectors/sources/kafka/src/config.py +10 -6
  23. bizon/connectors/sources/kafka/src/decode.py +2 -2
  24. bizon/connectors/sources/kafka/src/source.py +147 -46
  25. bizon/connectors/sources/notion/config/api_key.example.yml +35 -0
  26. bizon/connectors/sources/notion/src/__init__.py +0 -0
  27. bizon/connectors/sources/notion/src/config.py +59 -0
  28. bizon/connectors/sources/notion/src/source.py +1159 -0
  29. bizon/connectors/sources/notion/tests/notion_pipeline.py +7 -0
  30. bizon/connectors/sources/notion/tests/test_notion.py +113 -0
  31. bizon/connectors/sources/periscope/src/source.py +0 -6
  32. bizon/connectors/sources/pokeapi/src/source.py +0 -1
  33. bizon/connectors/sources/sana_ai/config/sana.example.yml +25 -0
  34. bizon/connectors/sources/sana_ai/src/source.py +85 -0
  35. bizon/destination/buffer.py +0 -1
  36. bizon/destination/config.py +0 -1
  37. bizon/destination/destination.py +1 -4
  38. bizon/engine/backend/adapters/sqlalchemy/backend.py +2 -5
  39. bizon/engine/backend/adapters/sqlalchemy/config.py +0 -1
  40. bizon/engine/config.py +0 -1
  41. bizon/engine/engine.py +0 -1
  42. bizon/engine/pipeline/consumer.py +0 -1
  43. bizon/engine/pipeline/producer.py +1 -5
  44. bizon/engine/queue/adapters/kafka/config.py +1 -1
  45. bizon/engine/queue/adapters/kafka/queue.py +0 -1
  46. bizon/engine/queue/adapters/python_queue/consumer.py +0 -1
  47. bizon/engine/queue/adapters/python_queue/queue.py +0 -2
  48. bizon/engine/queue/adapters/rabbitmq/consumer.py +0 -1
  49. bizon/engine/queue/adapters/rabbitmq/queue.py +0 -1
  50. bizon/engine/queue/config.py +0 -2
  51. bizon/engine/runner/adapters/process.py +0 -2
  52. bizon/engine/runner/adapters/streaming.py +55 -1
  53. bizon/engine/runner/adapters/thread.py +0 -2
  54. bizon/engine/runner/config.py +0 -1
  55. bizon/engine/runner/runner.py +0 -2
  56. bizon/monitoring/datadog/monitor.py +5 -3
  57. bizon/monitoring/noop/monitor.py +1 -1
  58. bizon/source/auth/authenticators/abstract_oauth.py +11 -3
  59. bizon/source/auth/authenticators/abstract_token.py +2 -1
  60. bizon/source/auth/authenticators/basic.py +1 -1
  61. bizon/source/auth/authenticators/cookies.py +2 -1
  62. bizon/source/auth/authenticators/oauth.py +8 -3
  63. bizon/source/config.py +0 -2
  64. bizon/source/cursor.py +8 -16
  65. bizon/source/discover.py +3 -6
  66. bizon/source/models.py +0 -1
  67. bizon/source/session.py +0 -1
  68. bizon/source/source.py +17 -2
  69. bizon/transform/config.py +0 -2
  70. bizon/transform/transform.py +0 -3
  71. {bizon-0.1.2.dist-info → bizon-0.2.0.dist-info}/METADATA +62 -42
  72. bizon-0.2.0.dist-info/RECORD +136 -0
  73. {bizon-0.1.2.dist-info → bizon-0.2.0.dist-info}/WHEEL +1 -1
  74. bizon-0.2.0.dist-info/entry_points.txt +2 -0
  75. bizon-0.1.2.dist-info/RECORD +0 -123
  76. bizon-0.1.2.dist-info/entry_points.txt +0 -3
  77. {bizon-0.1.2.dist-info → bizon-0.2.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,7 @@
1
+ import os
2
+
3
+ from bizon.engine.engine import RunnerFactory
4
+
5
+ if __name__ == "__main__":
6
+ runner = RunnerFactory.create_from_yaml(filepath=os.path.abspath("test-pipeline-notion.yml"))
7
+ runner.run()
@@ -0,0 +1,113 @@
1
+ """
2
+ Quick test script for Notion source intermediate functions.
3
+
4
+ Usage:
5
+ # Set your token
6
+ export NOTION_TOKEN="your_notion_integration_token"
7
+
8
+ # Run interactively
9
+ python -i test_notion.py
10
+
11
+ # Then test functions:
12
+ >>> page = source.get_page("page-id-here")
13
+ >>> blocks = source.fetch_blocks_recursively("page-id-here")
14
+ >>> for b in blocks[:5]:
15
+ ... print(source._block_to_markdown(b))
16
+ """
17
+
18
+ import os
19
+
20
+ from bizon.connectors.sources.notion.src.config import NotionSourceConfig, NotionStreams
21
+ from bizon.connectors.sources.notion.src.source import NotionSource
22
+ from bizon.source.auth.authenticators.token import TokenAuthParams
23
+ from bizon.source.auth.config import AuthConfig, AuthType
24
+
25
+
26
+ def create_notion_source(
27
+ token: str = None,
28
+ page_ids: list = None,
29
+ database_ids: list = None,
30
+ stream: NotionStreams = NotionStreams.BLOCKS,
31
+ ) -> NotionSource:
32
+ """Create a NotionSource instance for testing."""
33
+ token = token or os.environ.get("NOTION_TOKEN")
34
+ if not token:
35
+ raise ValueError("Provide token or set NOTION_TOKEN environment variable")
36
+
37
+ config = NotionSourceConfig(
38
+ name="notion",
39
+ stream=stream,
40
+ page_ids=page_ids or [],
41
+ database_ids=database_ids or [],
42
+ authentication=AuthConfig(
43
+ type=AuthType.BEARER,
44
+ params=TokenAuthParams(token=token),
45
+ ),
46
+ init_pipeline=False,
47
+ max_recursion_depth=30,
48
+ )
49
+ return NotionSource(config)
50
+
51
+
52
+ # ==================== HELPER FUNCTIONS ====================
53
+
54
+
55
+ def get_block(source: NotionSource, block_id: str) -> dict:
56
+ """Fetch a single block by ID."""
57
+ response = source.session.get(f"https://api.notion.com/v1/blocks/{block_id}")
58
+ response.raise_for_status()
59
+ return response.json()
60
+
61
+
62
+ def get_page_markdown(source: NotionSource, page_id: str) -> str:
63
+ """Fetch all blocks from a page and return combined markdown."""
64
+ blocks = source.fetch_blocks_recursively(page_id, source_page_id=page_id)
65
+ lines = []
66
+ for block in blocks:
67
+ md = source._block_to_markdown(block)
68
+ if md:
69
+ # Add indentation based on depth
70
+ indent = " " * block.get("depth", 0)
71
+ lines.append(f"{indent}{md}")
72
+ return "\n".join(lines)
73
+
74
+
75
+ def inspect_blocks(source: NotionSource, page_id: str, max_blocks: int = 10):
76
+ """Fetch and print block details for inspection."""
77
+ blocks = source.fetch_blocks_recursively(page_id, source_page_id=page_id)
78
+ print(f"Found {len(blocks)} blocks")
79
+ for i, block in enumerate(blocks[:max_blocks]):
80
+ print(f"\n--- Block {i} ({block.get('type')}) ---")
81
+ print(f"ID: {block.get('id')}")
82
+ print(f"Depth: {block.get('depth')}, Order: {block.get('page_order')}")
83
+ print(f"Markdown: {source._block_to_markdown(block)}")
84
+
85
+
86
+ def list_pages_in_database(source: NotionSource, database_id: str) -> list:
87
+ """List all page IDs in a database."""
88
+ return source.get_pages_from_database(database_id, apply_filter=False)
89
+
90
+
91
+ # ==================== MAIN ====================
92
+
93
+ if __name__ == "__main__":
94
+ # Create source if token is available
95
+ token = os.environ.get("NOTION_TOKEN")
96
+ if token:
97
+ source = create_notion_source(token=token)
98
+ print("NotionSource created and available as 'source'")
99
+ print("\nAvailable functions:")
100
+ print(" source.get_page(page_id)")
101
+ print(" source.get_database(database_id)")
102
+ print(" source.get_block_children(block_id)")
103
+ print(" source.fetch_blocks_recursively(page_id)")
104
+ print(" source._block_to_markdown(block)")
105
+ print(" source.search()")
106
+ print("\nHelper functions:")
107
+ print(" get_block(source, block_id)")
108
+ print(" get_page_markdown(source, page_id)")
109
+ print(" inspect_blocks(source, page_id)")
110
+ print(" list_pages_in_database(source, database_id)")
111
+ else:
112
+ print("Set NOTION_TOKEN env var or call:")
113
+ print(" source = create_notion_source(token='your_token')")
@@ -41,7 +41,6 @@ class PeriscopeSourceConfig(SourceConfig):
41
41
 
42
42
 
43
43
  class PeriscopeSource(AbstractSource):
44
-
45
44
  def __init__(self, config: PeriscopeSourceConfig):
46
45
  super().__init__(config)
47
46
  self.config: PeriscopeSourceConfig = config
@@ -127,7 +126,6 @@ class PeriscopeSource(AbstractSource):
127
126
  return self.transform_response_to_source_iteration(records_json)
128
127
 
129
128
  def get_dashboards_metadata(self, pagination: dict = None) -> SourceIteration:
130
-
131
129
  params = {
132
130
  "client_site_id": self.config.client_site_id,
133
131
  "filters": [{"name": "typeFilter", "input": "Dashboard"}],
@@ -186,7 +184,6 @@ class PeriscopeSource(AbstractSource):
186
184
  dashboard_charts: List[dict] = []
187
185
 
188
186
  for iter_count in range(MAXIMUM_ITERATION):
189
-
190
187
  # Break the loop if no more charts are available
191
188
  if iter_count > 0 and len(iter_charts) == 0:
192
189
  break
@@ -217,10 +214,8 @@ class PeriscopeSource(AbstractSource):
217
214
  iter_textboxes = response.json().get("TextBox")
218
215
 
219
216
  for chart in iter_charts:
220
- # Only fetch charts connected to gorgias-growth-production
221
217
  if str(chart.get("database_id")) == str(self.config.database_id):
222
218
  if chart.get("id") not in charts_list:
223
-
224
219
  charts_list.add(chart.get("id"))
225
220
 
226
221
  chart["raw_text"] = None
@@ -250,7 +245,6 @@ class PeriscopeSource(AbstractSource):
250
245
  return dashboard_charts
251
246
 
252
247
  def get_charts(self, pagination: dict = None) -> SourceIteration:
253
-
254
248
  BATCH_SIZE = 10
255
249
 
256
250
  if not pagination:
@@ -23,7 +23,6 @@ class PokeAPISourceConfig(SourceConfig):
23
23
 
24
24
 
25
25
  class PeriscopeSource(AbstractSource):
26
-
27
26
  def __init__(self, config: PokeAPISourceConfig):
28
27
  super().__init__(config)
29
28
  self.config: PokeAPISourceConfig = config
@@ -0,0 +1,25 @@
1
+ name: sana to file
2
+
3
+ source:
4
+ name: sana_ai
5
+ stream: insight_report
6
+ domain: my_domain
7
+ query: 'SELECT "user", "user_type", "user_role", "user_origin", "user_registration_step", "user_creation_date", "user_disabled_date", "user_completion_date", "user_status", "user_last_active_date", "user_attribute_evangelist" FROM "analytics"."users" ORDER BY "user" ASC'
8
+ authentication:
9
+ type: oauth
10
+ params:
11
+ token_refresh_endpoint: https://my_domain.sana.ai/api/token
12
+ client_id: <client_id>
13
+ client_secret: <client_secret>
14
+ grant_type: client_credentials
15
+ access_token_name: accessToken
16
+ expires_in_name: expiresIn
17
+ response_field_path: data
18
+ scopes:
19
+ - read
20
+ - write
21
+
22
+ destination:
23
+ name: file
24
+ config:
25
+ destination_id: sana_ai_user_status
@@ -0,0 +1,85 @@
1
+ import csv
2
+ import io
3
+ import time
4
+ from typing import Any, List, Tuple
5
+
6
+ from loguru import logger
7
+ from pydantic import Field
8
+ from requests.auth import AuthBase
9
+
10
+ from bizon.source.auth.builder import AuthBuilder
11
+ from bizon.source.auth.config import AuthType
12
+ from bizon.source.config import SourceConfig
13
+ from bizon.source.models import SourceIteration, SourceRecord
14
+ from bizon.source.source import AbstractSource
15
+
16
+
17
+ class SanaSourceConfig(SourceConfig):
18
+ query: str = Field(..., description="Query to get the data from the Sana Insight API")
19
+ domain: str = Field(..., description="Domain of the Sana instance")
20
+
21
+
22
+ class SanaSource(AbstractSource):
23
+ def __init__(self, config: SanaSourceConfig):
24
+ super().__init__(config)
25
+ self.config: SanaSourceConfig = config
26
+ self.base_url = f"https://{config.domain}.sana.ai/api/v1"
27
+
28
+ def get_authenticator(self) -> AuthBase:
29
+ if self.config.authentication.type.value == AuthType.OAUTH:
30
+ return AuthBuilder.oauth2(params=self.config.authentication.params)
31
+
32
+ @staticmethod
33
+ def streams() -> List[str]:
34
+ return ["insight_report"]
35
+
36
+ @staticmethod
37
+ def get_config_class() -> SourceConfig:
38
+ return SanaSourceConfig
39
+
40
+ def check_connection(self) -> Tuple[bool | Any | None]:
41
+ return True, None
42
+
43
+ def get_total_records_count(self) -> int | None:
44
+ return None
45
+
46
+ def create_insight_report_job(self, query: str) -> str:
47
+ """Create an insight report for the given query"""
48
+ response = self.session.post(f"{self.base_url}/reports/query", json={"query": query, "format": "csv"})
49
+ return response.json()["data"]["jobId"]
50
+
51
+ def get_insight_report_job(self, job_id: str) -> dict:
52
+ """Get an insight report job for the given job id"""
53
+ response = self.session.get(f"{self.base_url}/reports/jobs/{job_id}")
54
+ return response.json()
55
+
56
+ def get_insight_report(self, pagination: dict) -> SourceIteration:
57
+ """Return all insight report for the given query"""
58
+
59
+ job_id = self.create_insight_report_job(self.config.query)
60
+ logger.info(f"Created insight report job {job_id} for query {self.config.query}")
61
+
62
+ response = self.get_insight_report_job(job_id)
63
+ status = response["data"]["status"]
64
+ while status != "successful":
65
+ time.sleep(3)
66
+ response = self.get_insight_report_job(job_id)
67
+ status = response["data"]["status"]
68
+ logger.info(f"Insight report job {job_id} is {status}")
69
+
70
+ link = response["data"]["link"]["url"]
71
+ logger.info(f"Link for insight report job {job_id} is {link}")
72
+
73
+ csv_response = self.session.get(link)
74
+ csv_content = csv_response.content.decode("utf-8")
75
+
76
+ reader = csv.DictReader(io.StringIO(csv_content))
77
+ data = [SourceRecord(id=str(i), data=row) for i, row in enumerate(reader)]
78
+
79
+ return SourceIteration(records=data, next_pagination={})
80
+
81
+ def get(self, pagination: dict = None) -> SourceIteration:
82
+ if self.config.stream == "insight_report":
83
+ return self.get_insight_report(pagination)
84
+
85
+ raise NotImplementedError(f"Stream {self.config.stream} not implemented for Sana")
@@ -9,7 +9,6 @@ from .models import destination_record_schema
9
9
 
10
10
 
11
11
  class DestinationBuffer:
12
-
13
12
  def __init__(self, buffer_size: int, buffer_flush_timeout: int) -> None:
14
13
  self.buffer_size = buffer_size * 1024 * 1024 # Convert to bytes
15
14
  self.buffer_flush_timeout = buffer_flush_timeout
@@ -28,7 +28,6 @@ class RecordSchemaConfig(BaseModel):
28
28
 
29
29
 
30
30
  class AbstractDestinationDetailsConfig(BaseModel):
31
-
32
31
  # Forbid extra keys in the model
33
32
  model_config = ConfigDict(extra="forbid")
34
33
 
@@ -44,7 +44,6 @@ class DestinationIteration(BaseModel):
44
44
 
45
45
 
46
46
  class AbstractDestination(ABC):
47
-
48
47
  def __init__(
49
48
  self,
50
49
  sync_metadata: SyncMetadata,
@@ -144,7 +143,6 @@ class AbstractDestination(ABC):
144
143
 
145
144
  # Last iteration, write all records to destination
146
145
  if last_iteration:
147
-
148
146
  if self.buffer.df_destination_records.height == 0 and self.buffer.is_empty:
149
147
  logger.info("No records to write to destination, already written, buffer is empty.")
150
148
  return DestinationBufferStatus.RECORDS_WRITTEN
@@ -289,7 +287,6 @@ class DestinationFactory:
289
287
  source_callback: AbstractSourceCallback,
290
288
  monitor: AbstractMonitor,
291
289
  ) -> AbstractDestination:
292
-
293
290
  if config.name == DestinationTypes.LOGGER:
294
291
  from bizon.connectors.destinations.logger.src.destination import (
295
292
  LoggerDestination,
@@ -355,4 +352,4 @@ class DestinationFactory:
355
352
  monitor=monitor,
356
353
  )
357
354
 
358
- raise ValueError(f"Destination {config.name}" f"with params {config} not found")
355
+ raise ValueError(f"Destination {config.name}with params {config} not found")
@@ -5,7 +5,7 @@ from typing import Optional, Union
5
5
  from loguru import logger
6
6
  from pytz import UTC
7
7
  from sqlalchemy import Result, Select, create_engine, func, inspect, select, update
8
- from sqlalchemy.engine import Engine, create_engine
8
+ from sqlalchemy.engine import Engine
9
9
  from sqlalchemy.orm import Session, scoped_session, sessionmaker
10
10
 
11
11
  from bizon.engine.backend.backend import AbstractBackend
@@ -26,7 +26,6 @@ from .config import BigQueryConfigDetails, PostgresConfigDetails, SQLiteConfigDe
26
26
 
27
27
 
28
28
  class SQLAlchemyBackend(AbstractBackend):
29
-
30
29
  def __init__(self, config: Union[PostgresConfigDetails, SQLiteConfigDetails], type: BackendTypes, **kwargs):
31
30
  super().__init__(config, type)
32
31
 
@@ -81,7 +80,6 @@ class SQLAlchemyBackend(AbstractBackend):
81
80
  )
82
81
 
83
82
  def _get_engine(self) -> Engine:
84
-
85
83
  if self.type == BackendTypes.BIGQUERY:
86
84
  return self._get_engine_bigquery()
87
85
 
@@ -96,7 +94,7 @@ class SQLAlchemyBackend(AbstractBackend):
96
94
  # ONLY FOR UNIT TESTS: SQLite in memory
97
95
  if self.type == BackendTypes.SQLITE_IN_MEMORY:
98
96
  return create_engine(
99
- f"sqlite:///:memory:",
97
+ "sqlite:///:memory:",
100
98
  echo=self.config.echoEngine,
101
99
  connect_args={"check_same_thread": False},
102
100
  )
@@ -388,7 +386,6 @@ class SQLAlchemyBackend(AbstractBackend):
388
386
  pagination: Optional[dict] = None,
389
387
  session: Session | None = None,
390
388
  ) -> DestinationCursor:
391
-
392
389
  destination_cursor = DestinationCursor(
393
390
  name=name,
394
391
  source_name=source_name,
@@ -55,7 +55,6 @@ class SQLiteInMemoryConfig(AbstractBackendConfig):
55
55
 
56
56
  ## BIGQUERY ##
57
57
  class BigQueryConfigDetails(SQLAlchemyConfigDetails):
58
-
59
58
  database: str = Field(
60
59
  description="GCP Project name",
61
60
  default=...,
bizon/engine/config.py CHANGED
@@ -23,7 +23,6 @@ from .runner.config import RunnerConfig, RunnerFuturesConfig, RunnerTypes
23
23
 
24
24
 
25
25
  class EngineConfig(BaseModel):
26
-
27
26
  # Forbid extra keys in the model
28
27
  model_config = ConfigDict(extra="forbid")
29
28
 
bizon/engine/engine.py CHANGED
@@ -21,7 +21,6 @@ def replace_env_variables_in_config(config: dict) -> dict:
21
21
  class RunnerFactory:
22
22
  @staticmethod
23
23
  def create_from_config_dict(config: dict) -> AbstractRunner:
24
-
25
24
  # Replace env variables in config
26
25
  config = replace_env_variables_in_config(config=config)
27
26
 
@@ -36,7 +36,6 @@ class AbstractQueueConsumer(ABC):
36
36
  pass
37
37
 
38
38
  def process_queue_message(self, queue_message: QueueMessage) -> PipelineReturnStatus:
39
-
40
39
  # Apply the transformation
41
40
  try:
42
41
  df_source_records = self.transform.apply_transforms(df_source_records=queue_message.df_source_records)
@@ -105,7 +105,6 @@ class Producer:
105
105
  def run(
106
106
  self, job_id: int, stop_event: Union[multiprocessing.synchronize.Event, threading.Event]
107
107
  ) -> PipelineReturnStatus:
108
-
109
108
  return_value: PipelineReturnStatus = PipelineReturnStatus.SUCCESS
110
109
 
111
110
  # Init queue
@@ -132,7 +131,6 @@ class Producer:
132
131
  return PipelineReturnStatus.BACKEND_ERROR
133
132
 
134
133
  while not cursor.is_finished:
135
-
136
134
  if stop_event.is_set():
137
135
  logger.info("Stop event is set, terminating producer ...")
138
136
  return PipelineReturnStatus.KILLED_BY_RUNNER
@@ -226,9 +224,7 @@ class Producer:
226
224
  items_in_queue = f"{self.queue.get_size()} items in queue." if self.queue.get_size() else ""
227
225
 
228
226
  logger.info(
229
- (
230
- f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
231
- )
227
+ f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
232
228
  )
233
229
 
234
230
  logger.info("Terminating destination ...")
@@ -1,4 +1,4 @@
1
- from typing import List, Literal
1
+ from typing import Literal
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
 
@@ -13,7 +13,6 @@ from .consumer import KafkaConsumer_
13
13
 
14
14
 
15
15
  class KafkaQueue(AbstractQueue):
16
-
17
16
  def __init__(self, config: KafkaConfigDetails) -> None:
18
17
  super().__init__(config)
19
18
  self.config: KafkaConfigDetails = config
@@ -35,7 +35,6 @@ class PythonQueueConsumer(AbstractQueueConsumer):
35
35
  self.monitor.track_pipeline_status(PipelineReturnStatus.RUNNING)
36
36
 
37
37
  def run(self, stop_event: Union[threading.Event, multiprocessing.synchronize.Event]) -> PipelineReturnStatus:
38
-
39
38
  while True:
40
39
  # Handle kill signal from the runner
41
40
  if stop_event.is_set():
@@ -9,7 +9,6 @@ from bizon.destination.destination import AbstractDestination
9
9
  from bizon.engine.queue.config import QUEUE_TERMINATION, QueueMessage
10
10
  from bizon.engine.queue.queue import AbstractQueue, AbstractQueueConsumer
11
11
  from bizon.monitoring.monitor import AbstractMonitor
12
- from bizon.source.callback import AbstractSourceCallback
13
12
  from bizon.source.models import SourceIteration
14
13
  from bizon.transform.transform import Transform
15
14
 
@@ -18,7 +17,6 @@ from .consumer import PythonQueueConsumer
18
17
 
19
18
 
20
19
  class PythonQueue(AbstractQueue):
21
-
22
20
  def __init__(self, config: PythonQueueConfigDetails, **kwargs) -> None:
23
21
  super().__init__(config)
24
22
  self.config: PythonQueueConfigDetails = config
@@ -24,7 +24,6 @@ class RabbitMQConsumer(AbstractQueueConsumer):
24
24
  channel.queue_declare(queue=self.config.queue.queue_name)
25
25
 
26
26
  for method_frame, properties, body in channel.consume(self.config.queue.queue_name):
27
-
28
27
  queue_message = QueueMessage.model_validate_json(body)
29
28
  if queue_message.signal == QUEUE_TERMINATION:
30
29
  logger.info("Received termination signal, waiting for destination to close gracefully ...")
@@ -13,7 +13,6 @@ from .consumer import RabbitMQConsumer
13
13
 
14
14
 
15
15
  class RabbitMQ(AbstractQueue):
16
-
17
16
  def __init__(self, config: RabbitMQConfigDetails) -> None:
18
17
  super().__init__(config)
19
18
  self.config: RabbitMQConfigDetails = config
@@ -27,7 +27,6 @@ class QueueTypes(str, Enum):
27
27
 
28
28
 
29
29
  class AbastractQueueConfigDetails(BaseModel, ABC):
30
-
31
30
  # Forbid extra keys in the model
32
31
  model_config = ConfigDict(extra="forbid")
33
32
 
@@ -38,7 +37,6 @@ class AbastractQueueConfigDetails(BaseModel, ABC):
38
37
 
39
38
 
40
39
  class AbstractQueueConfig(BaseModel, ABC):
41
-
42
40
  # Forbid extra keys in the model
43
41
  model_config = ConfigDict(extra="forbid")
44
42
 
@@ -8,7 +8,6 @@ from bizon.engine.runner.runner import AbstractRunner
8
8
 
9
9
 
10
10
  class ProcessRunner(AbstractRunner):
11
-
12
11
  def __init__(self, config: dict):
13
12
  super().__init__(config)
14
13
 
@@ -36,7 +35,6 @@ class ProcessRunner(AbstractRunner):
36
35
  with concurrent.futures.ProcessPoolExecutor(
37
36
  max_workers=self.bizon_config.engine.runner.config.max_workers
38
37
  ) as executor:
39
-
40
38
  future_producer = executor.submit(
41
39
  AbstractRunner.instanciate_and_run_producer,
42
40
  self.bizon_config,
@@ -9,11 +9,13 @@ from loguru import logger
9
9
  from pytz import UTC
10
10
 
11
11
  from bizon.common.models import BizonConfig, SyncMetadata
12
+ from bizon.connectors.destinations.bigquery.src.config import BigQueryRecordSchemaConfig
12
13
  from bizon.destination.models import transform_to_df_destination_records
13
14
  from bizon.engine.pipeline.models import PipelineReturnStatus
14
15
  from bizon.engine.runner.config import RunnerStatus
15
16
  from bizon.engine.runner.runner import AbstractRunner
16
17
  from bizon.source.models import SourceRecord, source_record_schema
18
+ from bizon.source.source import AbstractSource
17
19
 
18
20
 
19
21
  class StreamingRunner(AbstractRunner):
@@ -36,7 +38,60 @@ class StreamingRunner(AbstractRunner):
36
38
  def convert_to_destination_records(df_source_records: pl.DataFrame, extracted_at: datetime) -> pl.DataFrame:
37
39
  return transform_to_df_destination_records(df_source_records=df_source_records, extracted_at=extracted_at)
38
40
 
41
+ def _apply_streams_config(self, source: AbstractSource = None) -> None:
42
+ """Apply streams configuration to source and destination.
43
+
44
+ This method is completely source-agnostic. Each source connector is responsible
45
+ for handling streams config appropriately via set_streams_config().
46
+
47
+ When a top-level 'streams' configuration is present, this method:
48
+ 1. Calls source.set_streams_config() to let the source enrich its own config
49
+ 2. Builds destination record_schemas from streams config
50
+ 3. Injects record_schemas into destination config for backward compatibility
51
+
52
+ The source is responsible for modifying self.config (which points to bizon_config.source)
53
+ so that subsequent source instantiations see the enriched config.
54
+ """
55
+ if not self.bizon_config.streams:
56
+ return
57
+
58
+ logger.info(f"Applying streams configuration: {len(self.bizon_config.streams)} streams defined")
59
+
60
+ # Let the source enrich its own config from streams
61
+ # Note: source modifies self.config, which is a reference to bizon_config.source
62
+ # This ensures init_job (which creates a new source) sees the enriched config
63
+ if source and hasattr(source, "set_streams_config") and callable(source.set_streams_config):
64
+ source.set_streams_config(self.bizon_config.streams)
65
+
66
+ # Build record_schemas list for destination from streams config
67
+ record_schemas = []
68
+ for stream in self.bizon_config.streams:
69
+ if stream.destination.record_schema:
70
+ record_schema_config = BigQueryRecordSchemaConfig(
71
+ destination_id=stream.destination.table_id,
72
+ record_schema=stream.destination.record_schema,
73
+ clustering_keys=stream.destination.clustering_keys,
74
+ )
75
+ record_schemas.append(record_schema_config)
76
+ logger.info(
77
+ f"Stream '{stream.name}': "
78
+ f"{getattr(stream.source, 'topic', getattr(stream.source, 'name', 'N/A'))} "
79
+ f"-> {stream.destination.table_id}"
80
+ )
81
+
82
+ # Inject into destination config
83
+ if record_schemas and hasattr(self.bizon_config.destination.config, "record_schemas"):
84
+ logger.info(f"Injecting {len(record_schemas)} record schemas into destination config")
85
+ self.bizon_config.destination.config.record_schemas = record_schemas
86
+
39
87
  def run(self) -> RunnerStatus:
88
+ # Create a temporary source to enrich bizon_config.source from streams
89
+ # The source's set_streams_config() modifies self.config (= bizon_config.source)
90
+ # This ensures subsequent source instantiations see the enriched config
91
+ temp_source = self.get_source(bizon_config=self.bizon_config, config=self.config)
92
+ self._apply_streams_config(temp_source)
93
+
94
+ # Now initialize job (check_connection will use enriched source config)
40
95
  job = self.init_job(bizon_config=self.bizon_config, config=self.config)
41
96
  backend = self.get_backend(bizon_config=self.bizon_config)
42
97
  source = self.get_source(bizon_config=self.bizon_config, config=self.config)
@@ -58,7 +113,6 @@ class StreamingRunner(AbstractRunner):
58
113
  iteration = 0
59
114
 
60
115
  while True:
61
-
62
116
  if source.config.max_iterations and iteration > source.config.max_iterations:
63
117
  logger.info(f"Max iterations {source.config.max_iterations} reached, terminating stream ...")
64
118
  break
@@ -16,7 +16,6 @@ class ThreadRunner(AbstractRunner):
16
16
 
17
17
  # TODO: refacto this
18
18
  def get_kwargs(self):
19
-
20
19
  extra_kwargs = {}
21
20
 
22
21
  if self.bizon_config.engine.queue.type == "python_queue":
@@ -46,7 +45,6 @@ class ThreadRunner(AbstractRunner):
46
45
  with concurrent.futures.ThreadPoolExecutor(
47
46
  max_workers=self.bizon_config.engine.runner.config.max_workers
48
47
  ) as executor:
49
-
50
48
  future_producer = executor.submit(
51
49
  AbstractRunner.instanciate_and_run_producer,
52
50
  self.bizon_config,
@@ -37,7 +37,6 @@ class RunnerFuturesConfig(BaseModel):
37
37
 
38
38
 
39
39
  class RunnerConfig(BaseModel):
40
-
41
40
  type: RunnerTypes = Field(
42
41
  description="Runner to use for the pipeline",
43
42
  default=RunnerTypes.THREAD,
@@ -27,7 +27,6 @@ from bizon.transform.transform import Transform
27
27
 
28
28
  class AbstractRunner(ABC):
29
29
  def __init__(self, config: dict):
30
-
31
30
  # Internal state
32
31
  self._is_running: bool = False
33
32
 
@@ -222,7 +221,6 @@ class AbstractRunner(ABC):
222
221
  stop_event: Union[multiprocessing.synchronize.Event, threading.Event],
223
222
  **kwargs,
224
223
  ):
225
-
226
224
  # Get the source instance
227
225
  source = AbstractRunner.get_source(bizon_config=bizon_config, config=config)
228
226