bizon 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. bizon/alerting/alerts.py +0 -1
  2. bizon/common/models.py +184 -4
  3. bizon/connectors/destinations/bigquery/src/config.py +1 -1
  4. bizon/connectors/destinations/bigquery/src/destination.py +14 -9
  5. bizon/connectors/destinations/bigquery_streaming/config/bigquery_streaming.example.yml +74 -0
  6. bizon/connectors/destinations/bigquery_streaming/src/config.py +6 -5
  7. bizon/connectors/destinations/bigquery_streaming/src/destination.py +13 -9
  8. bizon/connectors/destinations/bigquery_streaming_v2/config/bigquery_streaming_v2.example.yml +79 -0
  9. bizon/connectors/destinations/bigquery_streaming_v2/src/config.py +6 -1
  10. bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +232 -49
  11. bizon/connectors/destinations/bigquery_streaming_v2/src/proto_utils.py +1 -13
  12. bizon/connectors/destinations/file/config/file.example.yml +40 -0
  13. bizon/connectors/destinations/file/src/config.py +2 -1
  14. bizon/connectors/destinations/file/src/destination.py +3 -6
  15. bizon/connectors/destinations/logger/config/logger.example.yml +30 -0
  16. bizon/connectors/destinations/logger/src/config.py +1 -2
  17. bizon/connectors/destinations/logger/src/destination.py +4 -2
  18. bizon/connectors/sources/cycle/src/source.py +2 -6
  19. bizon/connectors/sources/dummy/src/source.py +0 -4
  20. bizon/connectors/sources/gsheets/src/source.py +2 -3
  21. bizon/connectors/sources/hubspot/src/hubspot_base.py +0 -1
  22. bizon/connectors/sources/hubspot/src/hubspot_objects.py +3 -4
  23. bizon/connectors/sources/hubspot/src/models/hs_object.py +0 -1
  24. bizon/connectors/sources/kafka/config/kafka.example.yml +1 -3
  25. bizon/connectors/sources/kafka/config/kafka_debezium.example.yml +1 -3
  26. bizon/connectors/sources/kafka/config/kafka_streams.example.yml +124 -0
  27. bizon/connectors/sources/kafka/src/config.py +10 -12
  28. bizon/connectors/sources/kafka/src/decode.py +65 -60
  29. bizon/connectors/sources/kafka/src/source.py +182 -61
  30. bizon/connectors/sources/kafka/tests/kafka_pipeline.py +1 -1
  31. bizon/connectors/sources/notion/config/api_key.example.yml +35 -0
  32. bizon/connectors/sources/notion/src/__init__.py +0 -0
  33. bizon/connectors/sources/notion/src/config.py +59 -0
  34. bizon/connectors/sources/notion/src/source.py +1159 -0
  35. bizon/connectors/sources/notion/tests/notion_pipeline.py +7 -0
  36. bizon/connectors/sources/notion/tests/test_notion.py +113 -0
  37. bizon/connectors/sources/periscope/src/source.py +0 -6
  38. bizon/connectors/sources/pokeapi/src/source.py +0 -1
  39. bizon/connectors/sources/sana_ai/config/sana.example.yml +25 -0
  40. bizon/connectors/sources/sana_ai/src/source.py +85 -0
  41. bizon/destination/buffer.py +0 -1
  42. bizon/destination/config.py +9 -1
  43. bizon/destination/destination.py +38 -9
  44. bizon/engine/backend/adapters/sqlalchemy/backend.py +2 -5
  45. bizon/engine/backend/adapters/sqlalchemy/config.py +0 -1
  46. bizon/engine/config.py +0 -1
  47. bizon/engine/engine.py +0 -1
  48. bizon/engine/pipeline/consumer.py +0 -1
  49. bizon/engine/pipeline/producer.py +1 -5
  50. bizon/engine/queue/adapters/kafka/config.py +1 -1
  51. bizon/engine/queue/adapters/kafka/queue.py +0 -1
  52. bizon/engine/queue/adapters/python_queue/consumer.py +0 -1
  53. bizon/engine/queue/adapters/python_queue/queue.py +0 -2
  54. bizon/engine/queue/adapters/rabbitmq/consumer.py +0 -1
  55. bizon/engine/queue/adapters/rabbitmq/queue.py +0 -1
  56. bizon/engine/queue/config.py +0 -2
  57. bizon/engine/runner/adapters/process.py +0 -2
  58. bizon/engine/runner/adapters/streaming.py +114 -42
  59. bizon/engine/runner/adapters/thread.py +0 -2
  60. bizon/engine/runner/config.py +0 -1
  61. bizon/engine/runner/runner.py +14 -9
  62. bizon/monitoring/config.py +12 -2
  63. bizon/monitoring/datadog/monitor.py +100 -14
  64. bizon/monitoring/monitor.py +41 -12
  65. bizon/monitoring/noop/monitor.py +22 -3
  66. bizon/source/auth/authenticators/abstract_oauth.py +11 -3
  67. bizon/source/auth/authenticators/abstract_token.py +2 -1
  68. bizon/source/auth/authenticators/basic.py +1 -1
  69. bizon/source/auth/authenticators/cookies.py +2 -1
  70. bizon/source/auth/authenticators/oauth.py +8 -3
  71. bizon/source/config.py +0 -2
  72. bizon/source/cursor.py +8 -16
  73. bizon/source/discover.py +3 -6
  74. bizon/source/models.py +0 -1
  75. bizon/source/session.py +0 -1
  76. bizon/source/source.py +18 -3
  77. bizon/transform/config.py +0 -2
  78. bizon/transform/transform.py +0 -3
  79. {bizon-0.1.1.dist-info → bizon-0.2.0.dist-info}/METADATA +62 -41
  80. bizon-0.2.0.dist-info/RECORD +136 -0
  81. {bizon-0.1.1.dist-info → bizon-0.2.0.dist-info}/WHEEL +1 -1
  82. bizon-0.2.0.dist-info/entry_points.txt +2 -0
  83. bizon-0.1.1.dist-info/RECORD +0 -123
  84. bizon-0.1.1.dist-info/entry_points.txt +0 -3
  85. {bizon-0.1.1.dist-info → bizon-0.2.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,7 @@
1
+ import os
2
+
3
+ from bizon.engine.engine import RunnerFactory
4
+
5
+ if __name__ == "__main__":
6
+ runner = RunnerFactory.create_from_yaml(filepath=os.path.abspath("test-pipeline-notion.yml"))
7
+ runner.run()
@@ -0,0 +1,113 @@
1
+ """
2
+ Quick test script for Notion source intermediate functions.
3
+
4
+ Usage:
5
+ # Set your token
6
+ export NOTION_TOKEN="your_notion_integration_token"
7
+
8
+ # Run interactively
9
+ python -i test_notion.py
10
+
11
+ # Then test functions:
12
+ >>> page = source.get_page("page-id-here")
13
+ >>> blocks = source.fetch_blocks_recursively("page-id-here")
14
+ >>> for b in blocks[:5]:
15
+ ... print(source._block_to_markdown(b))
16
+ """
17
+
18
+ import os
19
+
20
+ from bizon.connectors.sources.notion.src.config import NotionSourceConfig, NotionStreams
21
+ from bizon.connectors.sources.notion.src.source import NotionSource
22
+ from bizon.source.auth.authenticators.token import TokenAuthParams
23
+ from bizon.source.auth.config import AuthConfig, AuthType
24
+
25
+
26
+ def create_notion_source(
27
+ token: str = None,
28
+ page_ids: list = None,
29
+ database_ids: list = None,
30
+ stream: NotionStreams = NotionStreams.BLOCKS,
31
+ ) -> NotionSource:
32
+ """Create a NotionSource instance for testing."""
33
+ token = token or os.environ.get("NOTION_TOKEN")
34
+ if not token:
35
+ raise ValueError("Provide token or set NOTION_TOKEN environment variable")
36
+
37
+ config = NotionSourceConfig(
38
+ name="notion",
39
+ stream=stream,
40
+ page_ids=page_ids or [],
41
+ database_ids=database_ids or [],
42
+ authentication=AuthConfig(
43
+ type=AuthType.BEARER,
44
+ params=TokenAuthParams(token=token),
45
+ ),
46
+ init_pipeline=False,
47
+ max_recursion_depth=30,
48
+ )
49
+ return NotionSource(config)
50
+
51
+
52
+ # ==================== HELPER FUNCTIONS ====================
53
+
54
+
55
+ def get_block(source: NotionSource, block_id: str) -> dict:
56
+ """Fetch a single block by ID."""
57
+ response = source.session.get(f"https://api.notion.com/v1/blocks/{block_id}")
58
+ response.raise_for_status()
59
+ return response.json()
60
+
61
+
62
+ def get_page_markdown(source: NotionSource, page_id: str) -> str:
63
+ """Fetch all blocks from a page and return combined markdown."""
64
+ blocks = source.fetch_blocks_recursively(page_id, source_page_id=page_id)
65
+ lines = []
66
+ for block in blocks:
67
+ md = source._block_to_markdown(block)
68
+ if md:
69
+ # Add indentation based on depth
70
+ indent = " " * block.get("depth", 0)
71
+ lines.append(f"{indent}{md}")
72
+ return "\n".join(lines)
73
+
74
+
75
+ def inspect_blocks(source: NotionSource, page_id: str, max_blocks: int = 10):
76
+ """Fetch and print block details for inspection."""
77
+ blocks = source.fetch_blocks_recursively(page_id, source_page_id=page_id)
78
+ print(f"Found {len(blocks)} blocks")
79
+ for i, block in enumerate(blocks[:max_blocks]):
80
+ print(f"\n--- Block {i} ({block.get('type')}) ---")
81
+ print(f"ID: {block.get('id')}")
82
+ print(f"Depth: {block.get('depth')}, Order: {block.get('page_order')}")
83
+ print(f"Markdown: {source._block_to_markdown(block)}")
84
+
85
+
86
+ def list_pages_in_database(source: NotionSource, database_id: str) -> list:
87
+ """List all page IDs in a database."""
88
+ return source.get_pages_from_database(database_id, apply_filter=False)
89
+
90
+
91
+ # ==================== MAIN ====================
92
+
93
+ if __name__ == "__main__":
94
+ # Create source if token is available
95
+ token = os.environ.get("NOTION_TOKEN")
96
+ if token:
97
+ source = create_notion_source(token=token)
98
+ print("NotionSource created and available as 'source'")
99
+ print("\nAvailable functions:")
100
+ print(" source.get_page(page_id)")
101
+ print(" source.get_database(database_id)")
102
+ print(" source.get_block_children(block_id)")
103
+ print(" source.fetch_blocks_recursively(page_id)")
104
+ print(" source._block_to_markdown(block)")
105
+ print(" source.search()")
106
+ print("\nHelper functions:")
107
+ print(" get_block(source, block_id)")
108
+ print(" get_page_markdown(source, page_id)")
109
+ print(" inspect_blocks(source, page_id)")
110
+ print(" list_pages_in_database(source, database_id)")
111
+ else:
112
+ print("Set NOTION_TOKEN env var or call:")
113
+ print(" source = create_notion_source(token='your_token')")
@@ -41,7 +41,6 @@ class PeriscopeSourceConfig(SourceConfig):
41
41
 
42
42
 
43
43
  class PeriscopeSource(AbstractSource):
44
-
45
44
  def __init__(self, config: PeriscopeSourceConfig):
46
45
  super().__init__(config)
47
46
  self.config: PeriscopeSourceConfig = config
@@ -127,7 +126,6 @@ class PeriscopeSource(AbstractSource):
127
126
  return self.transform_response_to_source_iteration(records_json)
128
127
 
129
128
  def get_dashboards_metadata(self, pagination: dict = None) -> SourceIteration:
130
-
131
129
  params = {
132
130
  "client_site_id": self.config.client_site_id,
133
131
  "filters": [{"name": "typeFilter", "input": "Dashboard"}],
@@ -186,7 +184,6 @@ class PeriscopeSource(AbstractSource):
186
184
  dashboard_charts: List[dict] = []
187
185
 
188
186
  for iter_count in range(MAXIMUM_ITERATION):
189
-
190
187
  # Break the loop if no more charts are available
191
188
  if iter_count > 0 and len(iter_charts) == 0:
192
189
  break
@@ -217,10 +214,8 @@ class PeriscopeSource(AbstractSource):
217
214
  iter_textboxes = response.json().get("TextBox")
218
215
 
219
216
  for chart in iter_charts:
220
- # Only fetch charts connected to gorgias-growth-production
221
217
  if str(chart.get("database_id")) == str(self.config.database_id):
222
218
  if chart.get("id") not in charts_list:
223
-
224
219
  charts_list.add(chart.get("id"))
225
220
 
226
221
  chart["raw_text"] = None
@@ -250,7 +245,6 @@ class PeriscopeSource(AbstractSource):
250
245
  return dashboard_charts
251
246
 
252
247
  def get_charts(self, pagination: dict = None) -> SourceIteration:
253
-
254
248
  BATCH_SIZE = 10
255
249
 
256
250
  if not pagination:
@@ -23,7 +23,6 @@ class PokeAPISourceConfig(SourceConfig):
23
23
 
24
24
 
25
25
  class PeriscopeSource(AbstractSource):
26
-
27
26
  def __init__(self, config: PokeAPISourceConfig):
28
27
  super().__init__(config)
29
28
  self.config: PokeAPISourceConfig = config
@@ -0,0 +1,25 @@
1
+ name: sana to file
2
+
3
+ source:
4
+ name: sana_ai
5
+ stream: insight_report
6
+ domain: my_domain
7
+ query: 'SELECT "user", "user_type", "user_role", "user_origin", "user_registration_step", "user_creation_date", "user_disabled_date", "user_completion_date", "user_status", "user_last_active_date", "user_attribute_evangelist" FROM "analytics"."users" ORDER BY "user" ASC'
8
+ authentication:
9
+ type: oauth
10
+ params:
11
+ token_refresh_endpoint: https://my_domain.sana.ai/api/token
12
+ client_id: <client_id>
13
+ client_secret: <client_secret>
14
+ grant_type: client_credentials
15
+ access_token_name: accessToken
16
+ expires_in_name: expiresIn
17
+ response_field_path: data
18
+ scopes:
19
+ - read
20
+ - write
21
+
22
+ destination:
23
+ name: file
24
+ config:
25
+ destination_id: sana_ai_user_status
@@ -0,0 +1,85 @@
1
+ import csv
2
+ import io
3
+ import time
4
+ from typing import Any, List, Tuple
5
+
6
+ from loguru import logger
7
+ from pydantic import Field
8
+ from requests.auth import AuthBase
9
+
10
+ from bizon.source.auth.builder import AuthBuilder
11
+ from bizon.source.auth.config import AuthType
12
+ from bizon.source.config import SourceConfig
13
+ from bizon.source.models import SourceIteration, SourceRecord
14
+ from bizon.source.source import AbstractSource
15
+
16
+
17
+ class SanaSourceConfig(SourceConfig):
18
+ query: str = Field(..., description="Query to get the data from the Sana Insight API")
19
+ domain: str = Field(..., description="Domain of the Sana instance")
20
+
21
+
22
+ class SanaSource(AbstractSource):
23
+ def __init__(self, config: SanaSourceConfig):
24
+ super().__init__(config)
25
+ self.config: SanaSourceConfig = config
26
+ self.base_url = f"https://{config.domain}.sana.ai/api/v1"
27
+
28
+ def get_authenticator(self) -> AuthBase:
29
+ if self.config.authentication.type.value == AuthType.OAUTH:
30
+ return AuthBuilder.oauth2(params=self.config.authentication.params)
31
+
32
+ @staticmethod
33
+ def streams() -> List[str]:
34
+ return ["insight_report"]
35
+
36
+ @staticmethod
37
+ def get_config_class() -> SourceConfig:
38
+ return SanaSourceConfig
39
+
40
+ def check_connection(self) -> Tuple[bool | Any | None]:
41
+ return True, None
42
+
43
+ def get_total_records_count(self) -> int | None:
44
+ return None
45
+
46
+ def create_insight_report_job(self, query: str) -> str:
47
+ """Create an insight report for the given query"""
48
+ response = self.session.post(f"{self.base_url}/reports/query", json={"query": query, "format": "csv"})
49
+ return response.json()["data"]["jobId"]
50
+
51
+ def get_insight_report_job(self, job_id: str) -> dict:
52
+ """Get an insight report job for the given job id"""
53
+ response = self.session.get(f"{self.base_url}/reports/jobs/{job_id}")
54
+ return response.json()
55
+
56
+ def get_insight_report(self, pagination: dict) -> SourceIteration:
57
+ """Return all insight report for the given query"""
58
+
59
+ job_id = self.create_insight_report_job(self.config.query)
60
+ logger.info(f"Created insight report job {job_id} for query {self.config.query}")
61
+
62
+ response = self.get_insight_report_job(job_id)
63
+ status = response["data"]["status"]
64
+ while status != "successful":
65
+ time.sleep(3)
66
+ response = self.get_insight_report_job(job_id)
67
+ status = response["data"]["status"]
68
+ logger.info(f"Insight report job {job_id} is {status}")
69
+
70
+ link = response["data"]["link"]["url"]
71
+ logger.info(f"Link for insight report job {job_id} is {link}")
72
+
73
+ csv_response = self.session.get(link)
74
+ csv_content = csv_response.content.decode("utf-8")
75
+
76
+ reader = csv.DictReader(io.StringIO(csv_content))
77
+ data = [SourceRecord(id=str(i), data=row) for i, row in enumerate(reader)]
78
+
79
+ return SourceIteration(records=data, next_pagination={})
80
+
81
+ def get(self, pagination: dict = None) -> SourceIteration:
82
+ if self.config.stream == "insight_report":
83
+ return self.get_insight_report(pagination)
84
+
85
+ raise NotImplementedError(f"Stream {self.config.stream} not implemented for Sana")
@@ -9,7 +9,6 @@ from .models import destination_record_schema
9
9
 
10
10
 
11
11
  class DestinationBuffer:
12
-
13
12
  def __init__(self, buffer_size: int, buffer_flush_timeout: int) -> None:
14
13
  self.buffer_size = buffer_size * 1024 * 1024 # Convert to bytes
15
14
  self.buffer_flush_timeout = buffer_flush_timeout
@@ -28,7 +28,6 @@ class RecordSchemaConfig(BaseModel):
28
28
 
29
29
 
30
30
  class AbstractDestinationDetailsConfig(BaseModel):
31
-
32
31
  # Forbid extra keys in the model
33
32
  model_config = ConfigDict(extra="forbid")
34
33
 
@@ -42,6 +41,11 @@ class AbstractDestinationDetailsConfig(BaseModel):
42
41
  description="Maximum time in seconds for buffering after which the records will be written to the destination. Set to 0 to deactivate the timeout buffer check.", # noqa
43
42
  )
44
43
 
44
+ max_concurrent_threads: int = Field(
45
+ default=10,
46
+ description="Maximum number of concurrent threads to use for writing to the destination.",
47
+ )
48
+
45
49
  record_schemas: Optional[list[RecordSchemaConfig]] = Field(
46
50
  default=None, description="Schemas for the records. Required if unnest is set to true."
47
51
  )
@@ -71,4 +75,8 @@ class AbstractDestinationConfig(BaseModel):
71
75
  model_config = ConfigDict(extra="forbid")
72
76
 
73
77
  name: DestinationTypes = Field(..., description="Name of the destination")
78
+ alias: str = Field(
79
+ ...,
80
+ description="Alias of the destination, used for tracking the system name (ie bigquery for bigquery_streaming)",
81
+ )
74
82
  config: AbstractDestinationDetailsConfig = Field(..., description="Configuration for the destination")
@@ -10,6 +10,7 @@ from pydantic import BaseModel, Field
10
10
  from bizon.common.models import SyncMetadata
11
11
  from bizon.engine.backend.backend import AbstractBackend
12
12
  from bizon.engine.backend.models import JobStatus
13
+ from bizon.monitoring.monitor import AbstractMonitor
13
14
  from bizon.source.callback import AbstractSourceCallback
14
15
  from bizon.source.config import SourceSyncModes
15
16
 
@@ -43,17 +44,18 @@ class DestinationIteration(BaseModel):
43
44
 
44
45
 
45
46
  class AbstractDestination(ABC):
46
-
47
47
  def __init__(
48
48
  self,
49
49
  sync_metadata: SyncMetadata,
50
50
  config: AbstractDestinationDetailsConfig,
51
51
  backend: AbstractBackend,
52
52
  source_callback: AbstractSourceCallback,
53
+ monitor: AbstractMonitor,
53
54
  ):
54
55
  self.sync_metadata = sync_metadata
55
56
  self.config = config
56
57
  self.backend = backend
58
+ self.monitor = monitor
57
59
  self.buffer = DestinationBuffer(
58
60
  buffer_size=self.config.buffer_size, buffer_flush_timeout=self.config.buffer_flush_timeout
59
61
  )
@@ -141,7 +143,6 @@ class AbstractDestination(ABC):
141
143
 
142
144
  # Last iteration, write all records to destination
143
145
  if last_iteration:
144
-
145
146
  if self.buffer.df_destination_records.height == 0 and self.buffer.is_empty:
146
147
  logger.info("No records to write to destination, already written, buffer is empty.")
147
148
  return DestinationBufferStatus.RECORDS_WRITTEN
@@ -191,6 +192,14 @@ class AbstractDestination(ABC):
191
192
  logger.info(
192
193
  f"Buffer ripeness {round(self.buffer.ripeness / 60, 2)} min. Max ripeness {round(self.buffer.buffer_flush_timeout / 60, 2)} min." # noqa
193
194
  )
195
+ logger.info(
196
+ f"Current records size to process: {round(df_destination_records.estimated_size(unit='b') / 1024 / 1024, 2)} Mb."
197
+ )
198
+
199
+ if df_destination_records.estimated_size(unit="b") > self.buffer.buffer_size:
200
+ raise ValueError(
201
+ f"Records size {round(df_destination_records.estimated_size(unit='b') / 1024 / 1024, 2)} Mb is greater than buffer size {round(self.buffer.buffer_size / 1024 / 1024, 2)} Mb. Please increase destination buffer_size or reduce batch_size from the source."
202
+ )
194
203
 
195
204
  # Write buffer to destination if buffer is ripe and create a new buffer for the new iteration
196
205
  if self.buffer.is_ripe:
@@ -276,15 +285,19 @@ class DestinationFactory:
276
285
  config: AbstractDestinationConfig,
277
286
  backend: AbstractBackend,
278
287
  source_callback: AbstractSourceCallback,
288
+ monitor: AbstractMonitor,
279
289
  ) -> AbstractDestination:
280
-
281
290
  if config.name == DestinationTypes.LOGGER:
282
291
  from bizon.connectors.destinations.logger.src.destination import (
283
292
  LoggerDestination,
284
293
  )
285
294
 
286
295
  return LoggerDestination(
287
- sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback
296
+ sync_metadata=sync_metadata,
297
+ config=config.config,
298
+ backend=backend,
299
+ source_callback=source_callback,
300
+ monitor=monitor,
288
301
  )
289
302
 
290
303
  elif config.name == DestinationTypes.BIGQUERY:
@@ -293,7 +306,11 @@ class DestinationFactory:
293
306
  )
294
307
 
295
308
  return BigQueryDestination(
296
- sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback
309
+ sync_metadata=sync_metadata,
310
+ config=config.config,
311
+ backend=backend,
312
+ source_callback=source_callback,
313
+ monitor=monitor,
297
314
  )
298
315
 
299
316
  elif config.name == DestinationTypes.BIGQUERY_STREAMING:
@@ -302,7 +319,11 @@ class DestinationFactory:
302
319
  )
303
320
 
304
321
  return BigQueryStreamingDestination(
305
- sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback
322
+ sync_metadata=sync_metadata,
323
+ config=config.config,
324
+ backend=backend,
325
+ source_callback=source_callback,
326
+ monitor=monitor,
306
327
  )
307
328
 
308
329
  elif config.name == DestinationTypes.BIGQUERY_STREAMING_V2:
@@ -311,7 +332,11 @@ class DestinationFactory:
311
332
  )
312
333
 
313
334
  return BigQueryStreamingV2Destination(
314
- sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback
335
+ sync_metadata=sync_metadata,
336
+ config=config.config,
337
+ backend=backend,
338
+ source_callback=source_callback,
339
+ monitor=monitor,
315
340
  )
316
341
 
317
342
  elif config.name == DestinationTypes.FILE:
@@ -320,7 +345,11 @@ class DestinationFactory:
320
345
  )
321
346
 
322
347
  return FileDestination(
323
- sync_metadata=sync_metadata, config=config.config, backend=backend, source_callback=source_callback
348
+ sync_metadata=sync_metadata,
349
+ config=config.config,
350
+ backend=backend,
351
+ source_callback=source_callback,
352
+ monitor=monitor,
324
353
  )
325
354
 
326
- raise ValueError(f"Destination {config.name}" f"with params {config} not found")
355
+ raise ValueError(f"Destination {config.name}with params {config} not found")
@@ -5,7 +5,7 @@ from typing import Optional, Union
5
5
  from loguru import logger
6
6
  from pytz import UTC
7
7
  from sqlalchemy import Result, Select, create_engine, func, inspect, select, update
8
- from sqlalchemy.engine import Engine, create_engine
8
+ from sqlalchemy.engine import Engine
9
9
  from sqlalchemy.orm import Session, scoped_session, sessionmaker
10
10
 
11
11
  from bizon.engine.backend.backend import AbstractBackend
@@ -26,7 +26,6 @@ from .config import BigQueryConfigDetails, PostgresConfigDetails, SQLiteConfigDe
26
26
 
27
27
 
28
28
  class SQLAlchemyBackend(AbstractBackend):
29
-
30
29
  def __init__(self, config: Union[PostgresConfigDetails, SQLiteConfigDetails], type: BackendTypes, **kwargs):
31
30
  super().__init__(config, type)
32
31
 
@@ -81,7 +80,6 @@ class SQLAlchemyBackend(AbstractBackend):
81
80
  )
82
81
 
83
82
  def _get_engine(self) -> Engine:
84
-
85
83
  if self.type == BackendTypes.BIGQUERY:
86
84
  return self._get_engine_bigquery()
87
85
 
@@ -96,7 +94,7 @@ class SQLAlchemyBackend(AbstractBackend):
96
94
  # ONLY FOR UNIT TESTS: SQLite in memory
97
95
  if self.type == BackendTypes.SQLITE_IN_MEMORY:
98
96
  return create_engine(
99
- f"sqlite:///:memory:",
97
+ "sqlite:///:memory:",
100
98
  echo=self.config.echoEngine,
101
99
  connect_args={"check_same_thread": False},
102
100
  )
@@ -388,7 +386,6 @@ class SQLAlchemyBackend(AbstractBackend):
388
386
  pagination: Optional[dict] = None,
389
387
  session: Session | None = None,
390
388
  ) -> DestinationCursor:
391
-
392
389
  destination_cursor = DestinationCursor(
393
390
  name=name,
394
391
  source_name=source_name,
@@ -55,7 +55,6 @@ class SQLiteInMemoryConfig(AbstractBackendConfig):
55
55
 
56
56
  ## BIGQUERY ##
57
57
  class BigQueryConfigDetails(SQLAlchemyConfigDetails):
58
-
59
58
  database: str = Field(
60
59
  description="GCP Project name",
61
60
  default=...,
bizon/engine/config.py CHANGED
@@ -23,7 +23,6 @@ from .runner.config import RunnerConfig, RunnerFuturesConfig, RunnerTypes
23
23
 
24
24
 
25
25
  class EngineConfig(BaseModel):
26
-
27
26
  # Forbid extra keys in the model
28
27
  model_config = ConfigDict(extra="forbid")
29
28
 
bizon/engine/engine.py CHANGED
@@ -21,7 +21,6 @@ def replace_env_variables_in_config(config: dict) -> dict:
21
21
  class RunnerFactory:
22
22
  @staticmethod
23
23
  def create_from_config_dict(config: dict) -> AbstractRunner:
24
-
25
24
  # Replace env variables in config
26
25
  config = replace_env_variables_in_config(config=config)
27
26
 
@@ -36,7 +36,6 @@ class AbstractQueueConsumer(ABC):
36
36
  pass
37
37
 
38
38
  def process_queue_message(self, queue_message: QueueMessage) -> PipelineReturnStatus:
39
-
40
39
  # Apply the transformation
41
40
  try:
42
41
  df_source_records = self.transform.apply_transforms(df_source_records=queue_message.df_source_records)
@@ -105,7 +105,6 @@ class Producer:
105
105
  def run(
106
106
  self, job_id: int, stop_event: Union[multiprocessing.synchronize.Event, threading.Event]
107
107
  ) -> PipelineReturnStatus:
108
-
109
108
  return_value: PipelineReturnStatus = PipelineReturnStatus.SUCCESS
110
109
 
111
110
  # Init queue
@@ -132,7 +131,6 @@ class Producer:
132
131
  return PipelineReturnStatus.BACKEND_ERROR
133
132
 
134
133
  while not cursor.is_finished:
135
-
136
134
  if stop_event.is_set():
137
135
  logger.info("Stop event is set, terminating producer ...")
138
136
  return PipelineReturnStatus.KILLED_BY_RUNNER
@@ -226,9 +224,7 @@ class Producer:
226
224
  items_in_queue = f"{self.queue.get_size()} items in queue." if self.queue.get_size() else ""
227
225
 
228
226
  logger.info(
229
- (
230
- f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
231
- )
227
+ f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
232
228
  )
233
229
 
234
230
  logger.info("Terminating destination ...")
@@ -1,4 +1,4 @@
1
- from typing import List, Literal
1
+ from typing import Literal
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
 
@@ -13,7 +13,6 @@ from .consumer import KafkaConsumer_
13
13
 
14
14
 
15
15
  class KafkaQueue(AbstractQueue):
16
-
17
16
  def __init__(self, config: KafkaConfigDetails) -> None:
18
17
  super().__init__(config)
19
18
  self.config: KafkaConfigDetails = config
@@ -35,7 +35,6 @@ class PythonQueueConsumer(AbstractQueueConsumer):
35
35
  self.monitor.track_pipeline_status(PipelineReturnStatus.RUNNING)
36
36
 
37
37
  def run(self, stop_event: Union[threading.Event, multiprocessing.synchronize.Event]) -> PipelineReturnStatus:
38
-
39
38
  while True:
40
39
  # Handle kill signal from the runner
41
40
  if stop_event.is_set():
@@ -9,7 +9,6 @@ from bizon.destination.destination import AbstractDestination
9
9
  from bizon.engine.queue.config import QUEUE_TERMINATION, QueueMessage
10
10
  from bizon.engine.queue.queue import AbstractQueue, AbstractQueueConsumer
11
11
  from bizon.monitoring.monitor import AbstractMonitor
12
- from bizon.source.callback import AbstractSourceCallback
13
12
  from bizon.source.models import SourceIteration
14
13
  from bizon.transform.transform import Transform
15
14
 
@@ -18,7 +17,6 @@ from .consumer import PythonQueueConsumer
18
17
 
19
18
 
20
19
  class PythonQueue(AbstractQueue):
21
-
22
20
  def __init__(self, config: PythonQueueConfigDetails, **kwargs) -> None:
23
21
  super().__init__(config)
24
22
  self.config: PythonQueueConfigDetails = config
@@ -24,7 +24,6 @@ class RabbitMQConsumer(AbstractQueueConsumer):
24
24
  channel.queue_declare(queue=self.config.queue.queue_name)
25
25
 
26
26
  for method_frame, properties, body in channel.consume(self.config.queue.queue_name):
27
-
28
27
  queue_message = QueueMessage.model_validate_json(body)
29
28
  if queue_message.signal == QUEUE_TERMINATION:
30
29
  logger.info("Received termination signal, waiting for destination to close gracefully ...")
@@ -13,7 +13,6 @@ from .consumer import RabbitMQConsumer
13
13
 
14
14
 
15
15
  class RabbitMQ(AbstractQueue):
16
-
17
16
  def __init__(self, config: RabbitMQConfigDetails) -> None:
18
17
  super().__init__(config)
19
18
  self.config: RabbitMQConfigDetails = config
@@ -27,7 +27,6 @@ class QueueTypes(str, Enum):
27
27
 
28
28
 
29
29
  class AbastractQueueConfigDetails(BaseModel, ABC):
30
-
31
30
  # Forbid extra keys in the model
32
31
  model_config = ConfigDict(extra="forbid")
33
32
 
@@ -38,7 +37,6 @@ class AbastractQueueConfigDetails(BaseModel, ABC):
38
37
 
39
38
 
40
39
  class AbstractQueueConfig(BaseModel, ABC):
41
-
42
40
  # Forbid extra keys in the model
43
41
  model_config = ConfigDict(extra="forbid")
44
42
 
@@ -8,7 +8,6 @@ from bizon.engine.runner.runner import AbstractRunner
8
8
 
9
9
 
10
10
  class ProcessRunner(AbstractRunner):
11
-
12
11
  def __init__(self, config: dict):
13
12
  super().__init__(config)
14
13
 
@@ -36,7 +35,6 @@ class ProcessRunner(AbstractRunner):
36
35
  with concurrent.futures.ProcessPoolExecutor(
37
36
  max_workers=self.bizon_config.engine.runner.config.max_workers
38
37
  ) as executor:
39
-
40
38
  future_producer = executor.submit(
41
39
  AbstractRunner.instanciate_and_run_producer,
42
40
  self.bizon_config,