bizon 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bizon/alerting/alerts.py +0 -1
- bizon/common/models.py +182 -4
- bizon/connectors/destinations/bigquery/src/config.py +0 -1
- bizon/connectors/destinations/bigquery/src/destination.py +11 -8
- bizon/connectors/destinations/bigquery_streaming/config/bigquery_streaming.example.yml +74 -0
- bizon/connectors/destinations/bigquery_streaming/src/destination.py +4 -5
- bizon/connectors/destinations/bigquery_streaming_v2/config/bigquery_streaming_v2.example.yml +79 -0
- bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +4 -6
- bizon/connectors/destinations/file/config/file.example.yml +40 -0
- bizon/connectors/destinations/file/src/config.py +1 -1
- bizon/connectors/destinations/file/src/destination.py +0 -5
- bizon/connectors/destinations/logger/config/logger.example.yml +30 -0
- bizon/connectors/destinations/logger/src/config.py +0 -2
- bizon/connectors/destinations/logger/src/destination.py +1 -2
- bizon/connectors/sources/cycle/src/source.py +2 -6
- bizon/connectors/sources/dummy/src/source.py +0 -4
- bizon/connectors/sources/gsheets/src/source.py +2 -3
- bizon/connectors/sources/hubspot/src/hubspot_base.py +0 -1
- bizon/connectors/sources/hubspot/src/hubspot_objects.py +3 -4
- bizon/connectors/sources/hubspot/src/models/hs_object.py +0 -1
- bizon/connectors/sources/kafka/config/kafka_streams.example.yml +124 -0
- bizon/connectors/sources/kafka/src/config.py +10 -6
- bizon/connectors/sources/kafka/src/decode.py +2 -2
- bizon/connectors/sources/kafka/src/source.py +147 -46
- bizon/connectors/sources/notion/config/api_key.example.yml +35 -0
- bizon/connectors/sources/notion/src/__init__.py +0 -0
- bizon/connectors/sources/notion/src/config.py +59 -0
- bizon/connectors/sources/notion/src/source.py +1159 -0
- bizon/connectors/sources/notion/tests/notion_pipeline.py +7 -0
- bizon/connectors/sources/notion/tests/test_notion.py +113 -0
- bizon/connectors/sources/periscope/src/source.py +0 -6
- bizon/connectors/sources/pokeapi/src/source.py +0 -1
- bizon/connectors/sources/sana_ai/config/sana.example.yml +25 -0
- bizon/connectors/sources/sana_ai/src/source.py +85 -0
- bizon/destination/buffer.py +0 -1
- bizon/destination/config.py +0 -1
- bizon/destination/destination.py +1 -4
- bizon/engine/backend/adapters/sqlalchemy/backend.py +2 -5
- bizon/engine/backend/adapters/sqlalchemy/config.py +0 -1
- bizon/engine/config.py +0 -1
- bizon/engine/engine.py +0 -1
- bizon/engine/pipeline/consumer.py +0 -1
- bizon/engine/pipeline/producer.py +1 -5
- bizon/engine/queue/adapters/kafka/config.py +1 -1
- bizon/engine/queue/adapters/kafka/queue.py +0 -1
- bizon/engine/queue/adapters/python_queue/consumer.py +0 -1
- bizon/engine/queue/adapters/python_queue/queue.py +0 -2
- bizon/engine/queue/adapters/rabbitmq/consumer.py +0 -1
- bizon/engine/queue/adapters/rabbitmq/queue.py +0 -1
- bizon/engine/queue/config.py +0 -2
- bizon/engine/runner/adapters/process.py +0 -2
- bizon/engine/runner/adapters/streaming.py +55 -1
- bizon/engine/runner/adapters/thread.py +0 -2
- bizon/engine/runner/config.py +0 -1
- bizon/engine/runner/runner.py +0 -2
- bizon/monitoring/datadog/monitor.py +5 -3
- bizon/monitoring/noop/monitor.py +1 -1
- bizon/source/auth/authenticators/abstract_oauth.py +11 -3
- bizon/source/auth/authenticators/abstract_token.py +2 -1
- bizon/source/auth/authenticators/basic.py +1 -1
- bizon/source/auth/authenticators/cookies.py +2 -1
- bizon/source/auth/authenticators/oauth.py +8 -3
- bizon/source/config.py +0 -2
- bizon/source/cursor.py +8 -16
- bizon/source/discover.py +3 -6
- bizon/source/models.py +0 -1
- bizon/source/session.py +0 -1
- bizon/source/source.py +17 -2
- bizon/transform/config.py +0 -2
- bizon/transform/transform.py +0 -3
- {bizon-0.1.2.dist-info → bizon-0.2.0.dist-info}/METADATA +62 -42
- bizon-0.2.0.dist-info/RECORD +136 -0
- {bizon-0.1.2.dist-info → bizon-0.2.0.dist-info}/WHEEL +1 -1
- bizon-0.2.0.dist-info/entry_points.txt +2 -0
- bizon-0.1.2.dist-info/RECORD +0 -123
- bizon-0.1.2.dist-info/entry_points.txt +0 -3
- {bizon-0.1.2.dist-info → bizon-0.2.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quick test script for Notion source intermediate functions.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
# Set your token
|
|
6
|
+
export NOTION_TOKEN="your_notion_integration_token"
|
|
7
|
+
|
|
8
|
+
# Run interactively
|
|
9
|
+
python -i test_notion.py
|
|
10
|
+
|
|
11
|
+
# Then test functions:
|
|
12
|
+
>>> page = source.get_page("page-id-here")
|
|
13
|
+
>>> blocks = source.fetch_blocks_recursively("page-id-here")
|
|
14
|
+
>>> for b in blocks[:5]:
|
|
15
|
+
... print(source._block_to_markdown(b))
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
from bizon.connectors.sources.notion.src.config import NotionSourceConfig, NotionStreams
|
|
21
|
+
from bizon.connectors.sources.notion.src.source import NotionSource
|
|
22
|
+
from bizon.source.auth.authenticators.token import TokenAuthParams
|
|
23
|
+
from bizon.source.auth.config import AuthConfig, AuthType
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_notion_source(
|
|
27
|
+
token: str = None,
|
|
28
|
+
page_ids: list = None,
|
|
29
|
+
database_ids: list = None,
|
|
30
|
+
stream: NotionStreams = NotionStreams.BLOCKS,
|
|
31
|
+
) -> NotionSource:
|
|
32
|
+
"""Create a NotionSource instance for testing."""
|
|
33
|
+
token = token or os.environ.get("NOTION_TOKEN")
|
|
34
|
+
if not token:
|
|
35
|
+
raise ValueError("Provide token or set NOTION_TOKEN environment variable")
|
|
36
|
+
|
|
37
|
+
config = NotionSourceConfig(
|
|
38
|
+
name="notion",
|
|
39
|
+
stream=stream,
|
|
40
|
+
page_ids=page_ids or [],
|
|
41
|
+
database_ids=database_ids or [],
|
|
42
|
+
authentication=AuthConfig(
|
|
43
|
+
type=AuthType.BEARER,
|
|
44
|
+
params=TokenAuthParams(token=token),
|
|
45
|
+
),
|
|
46
|
+
init_pipeline=False,
|
|
47
|
+
max_recursion_depth=30,
|
|
48
|
+
)
|
|
49
|
+
return NotionSource(config)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ==================== HELPER FUNCTIONS ====================
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_block(source: NotionSource, block_id: str) -> dict:
|
|
56
|
+
"""Fetch a single block by ID."""
|
|
57
|
+
response = source.session.get(f"https://api.notion.com/v1/blocks/{block_id}")
|
|
58
|
+
response.raise_for_status()
|
|
59
|
+
return response.json()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_page_markdown(source: NotionSource, page_id: str) -> str:
|
|
63
|
+
"""Fetch all blocks from a page and return combined markdown."""
|
|
64
|
+
blocks = source.fetch_blocks_recursively(page_id, source_page_id=page_id)
|
|
65
|
+
lines = []
|
|
66
|
+
for block in blocks:
|
|
67
|
+
md = source._block_to_markdown(block)
|
|
68
|
+
if md:
|
|
69
|
+
# Add indentation based on depth
|
|
70
|
+
indent = " " * block.get("depth", 0)
|
|
71
|
+
lines.append(f"{indent}{md}")
|
|
72
|
+
return "\n".join(lines)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def inspect_blocks(source: NotionSource, page_id: str, max_blocks: int = 10):
|
|
76
|
+
"""Fetch and print block details for inspection."""
|
|
77
|
+
blocks = source.fetch_blocks_recursively(page_id, source_page_id=page_id)
|
|
78
|
+
print(f"Found {len(blocks)} blocks")
|
|
79
|
+
for i, block in enumerate(blocks[:max_blocks]):
|
|
80
|
+
print(f"\n--- Block {i} ({block.get('type')}) ---")
|
|
81
|
+
print(f"ID: {block.get('id')}")
|
|
82
|
+
print(f"Depth: {block.get('depth')}, Order: {block.get('page_order')}")
|
|
83
|
+
print(f"Markdown: {source._block_to_markdown(block)}")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def list_pages_in_database(source: NotionSource, database_id: str) -> list:
|
|
87
|
+
"""List all page IDs in a database."""
|
|
88
|
+
return source.get_pages_from_database(database_id, apply_filter=False)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ==================== MAIN ====================
|
|
92
|
+
|
|
93
|
+
if __name__ == "__main__":
|
|
94
|
+
# Create source if token is available
|
|
95
|
+
token = os.environ.get("NOTION_TOKEN")
|
|
96
|
+
if token:
|
|
97
|
+
source = create_notion_source(token=token)
|
|
98
|
+
print("NotionSource created and available as 'source'")
|
|
99
|
+
print("\nAvailable functions:")
|
|
100
|
+
print(" source.get_page(page_id)")
|
|
101
|
+
print(" source.get_database(database_id)")
|
|
102
|
+
print(" source.get_block_children(block_id)")
|
|
103
|
+
print(" source.fetch_blocks_recursively(page_id)")
|
|
104
|
+
print(" source._block_to_markdown(block)")
|
|
105
|
+
print(" source.search()")
|
|
106
|
+
print("\nHelper functions:")
|
|
107
|
+
print(" get_block(source, block_id)")
|
|
108
|
+
print(" get_page_markdown(source, page_id)")
|
|
109
|
+
print(" inspect_blocks(source, page_id)")
|
|
110
|
+
print(" list_pages_in_database(source, database_id)")
|
|
111
|
+
else:
|
|
112
|
+
print("Set NOTION_TOKEN env var or call:")
|
|
113
|
+
print(" source = create_notion_source(token='your_token')")
|
|
@@ -41,7 +41,6 @@ class PeriscopeSourceConfig(SourceConfig):
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class PeriscopeSource(AbstractSource):
|
|
44
|
-
|
|
45
44
|
def __init__(self, config: PeriscopeSourceConfig):
|
|
46
45
|
super().__init__(config)
|
|
47
46
|
self.config: PeriscopeSourceConfig = config
|
|
@@ -127,7 +126,6 @@ class PeriscopeSource(AbstractSource):
|
|
|
127
126
|
return self.transform_response_to_source_iteration(records_json)
|
|
128
127
|
|
|
129
128
|
def get_dashboards_metadata(self, pagination: dict = None) -> SourceIteration:
|
|
130
|
-
|
|
131
129
|
params = {
|
|
132
130
|
"client_site_id": self.config.client_site_id,
|
|
133
131
|
"filters": [{"name": "typeFilter", "input": "Dashboard"}],
|
|
@@ -186,7 +184,6 @@ class PeriscopeSource(AbstractSource):
|
|
|
186
184
|
dashboard_charts: List[dict] = []
|
|
187
185
|
|
|
188
186
|
for iter_count in range(MAXIMUM_ITERATION):
|
|
189
|
-
|
|
190
187
|
# Break the loop if no more charts are available
|
|
191
188
|
if iter_count > 0 and len(iter_charts) == 0:
|
|
192
189
|
break
|
|
@@ -217,10 +214,8 @@ class PeriscopeSource(AbstractSource):
|
|
|
217
214
|
iter_textboxes = response.json().get("TextBox")
|
|
218
215
|
|
|
219
216
|
for chart in iter_charts:
|
|
220
|
-
# Only fetch charts connected to gorgias-growth-production
|
|
221
217
|
if str(chart.get("database_id")) == str(self.config.database_id):
|
|
222
218
|
if chart.get("id") not in charts_list:
|
|
223
|
-
|
|
224
219
|
charts_list.add(chart.get("id"))
|
|
225
220
|
|
|
226
221
|
chart["raw_text"] = None
|
|
@@ -250,7 +245,6 @@ class PeriscopeSource(AbstractSource):
|
|
|
250
245
|
return dashboard_charts
|
|
251
246
|
|
|
252
247
|
def get_charts(self, pagination: dict = None) -> SourceIteration:
|
|
253
|
-
|
|
254
248
|
BATCH_SIZE = 10
|
|
255
249
|
|
|
256
250
|
if not pagination:
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
name: sana to file
|
|
2
|
+
|
|
3
|
+
source:
|
|
4
|
+
name: sana_ai
|
|
5
|
+
stream: insight_report
|
|
6
|
+
domain: my_domain
|
|
7
|
+
query: 'SELECT "user", "user_type", "user_role", "user_origin", "user_registration_step", "user_creation_date", "user_disabled_date", "user_completion_date", "user_status", "user_last_active_date", "user_attribute_evangelist" FROM "analytics"."users" ORDER BY "user" ASC'
|
|
8
|
+
authentication:
|
|
9
|
+
type: oauth
|
|
10
|
+
params:
|
|
11
|
+
token_refresh_endpoint: https://my_domain.sana.ai/api/token
|
|
12
|
+
client_id: <client_id>
|
|
13
|
+
client_secret: <client_secret>
|
|
14
|
+
grant_type: client_credentials
|
|
15
|
+
access_token_name: accessToken
|
|
16
|
+
expires_in_name: expiresIn
|
|
17
|
+
response_field_path: data
|
|
18
|
+
scopes:
|
|
19
|
+
- read
|
|
20
|
+
- write
|
|
21
|
+
|
|
22
|
+
destination:
|
|
23
|
+
name: file
|
|
24
|
+
config:
|
|
25
|
+
destination_id: sana_ai_user_status
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import io
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, List, Tuple
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
from requests.auth import AuthBase
|
|
9
|
+
|
|
10
|
+
from bizon.source.auth.builder import AuthBuilder
|
|
11
|
+
from bizon.source.auth.config import AuthType
|
|
12
|
+
from bizon.source.config import SourceConfig
|
|
13
|
+
from bizon.source.models import SourceIteration, SourceRecord
|
|
14
|
+
from bizon.source.source import AbstractSource
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SanaSourceConfig(SourceConfig):
|
|
18
|
+
query: str = Field(..., description="Query to get the data from the Sana Insight API")
|
|
19
|
+
domain: str = Field(..., description="Domain of the Sana instance")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SanaSource(AbstractSource):
|
|
23
|
+
def __init__(self, config: SanaSourceConfig):
|
|
24
|
+
super().__init__(config)
|
|
25
|
+
self.config: SanaSourceConfig = config
|
|
26
|
+
self.base_url = f"https://{config.domain}.sana.ai/api/v1"
|
|
27
|
+
|
|
28
|
+
def get_authenticator(self) -> AuthBase:
|
|
29
|
+
if self.config.authentication.type.value == AuthType.OAUTH:
|
|
30
|
+
return AuthBuilder.oauth2(params=self.config.authentication.params)
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def streams() -> List[str]:
|
|
34
|
+
return ["insight_report"]
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def get_config_class() -> SourceConfig:
|
|
38
|
+
return SanaSourceConfig
|
|
39
|
+
|
|
40
|
+
def check_connection(self) -> Tuple[bool | Any | None]:
|
|
41
|
+
return True, None
|
|
42
|
+
|
|
43
|
+
def get_total_records_count(self) -> int | None:
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
def create_insight_report_job(self, query: str) -> str:
|
|
47
|
+
"""Create an insight report for the given query"""
|
|
48
|
+
response = self.session.post(f"{self.base_url}/reports/query", json={"query": query, "format": "csv"})
|
|
49
|
+
return response.json()["data"]["jobId"]
|
|
50
|
+
|
|
51
|
+
def get_insight_report_job(self, job_id: str) -> dict:
|
|
52
|
+
"""Get an insight report job for the given job id"""
|
|
53
|
+
response = self.session.get(f"{self.base_url}/reports/jobs/{job_id}")
|
|
54
|
+
return response.json()
|
|
55
|
+
|
|
56
|
+
def get_insight_report(self, pagination: dict) -> SourceIteration:
|
|
57
|
+
"""Return all insight report for the given query"""
|
|
58
|
+
|
|
59
|
+
job_id = self.create_insight_report_job(self.config.query)
|
|
60
|
+
logger.info(f"Created insight report job {job_id} for query {self.config.query}")
|
|
61
|
+
|
|
62
|
+
response = self.get_insight_report_job(job_id)
|
|
63
|
+
status = response["data"]["status"]
|
|
64
|
+
while status != "successful":
|
|
65
|
+
time.sleep(3)
|
|
66
|
+
response = self.get_insight_report_job(job_id)
|
|
67
|
+
status = response["data"]["status"]
|
|
68
|
+
logger.info(f"Insight report job {job_id} is {status}")
|
|
69
|
+
|
|
70
|
+
link = response["data"]["link"]["url"]
|
|
71
|
+
logger.info(f"Link for insight report job {job_id} is {link}")
|
|
72
|
+
|
|
73
|
+
csv_response = self.session.get(link)
|
|
74
|
+
csv_content = csv_response.content.decode("utf-8")
|
|
75
|
+
|
|
76
|
+
reader = csv.DictReader(io.StringIO(csv_content))
|
|
77
|
+
data = [SourceRecord(id=str(i), data=row) for i, row in enumerate(reader)]
|
|
78
|
+
|
|
79
|
+
return SourceIteration(records=data, next_pagination={})
|
|
80
|
+
|
|
81
|
+
def get(self, pagination: dict = None) -> SourceIteration:
|
|
82
|
+
if self.config.stream == "insight_report":
|
|
83
|
+
return self.get_insight_report(pagination)
|
|
84
|
+
|
|
85
|
+
raise NotImplementedError(f"Stream {self.config.stream} not implemented for Sana")
|
bizon/destination/buffer.py
CHANGED
|
@@ -9,7 +9,6 @@ from .models import destination_record_schema
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class DestinationBuffer:
|
|
12
|
-
|
|
13
12
|
def __init__(self, buffer_size: int, buffer_flush_timeout: int) -> None:
|
|
14
13
|
self.buffer_size = buffer_size * 1024 * 1024 # Convert to bytes
|
|
15
14
|
self.buffer_flush_timeout = buffer_flush_timeout
|
bizon/destination/config.py
CHANGED
bizon/destination/destination.py
CHANGED
|
@@ -44,7 +44,6 @@ class DestinationIteration(BaseModel):
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
class AbstractDestination(ABC):
|
|
47
|
-
|
|
48
47
|
def __init__(
|
|
49
48
|
self,
|
|
50
49
|
sync_metadata: SyncMetadata,
|
|
@@ -144,7 +143,6 @@ class AbstractDestination(ABC):
|
|
|
144
143
|
|
|
145
144
|
# Last iteration, write all records to destination
|
|
146
145
|
if last_iteration:
|
|
147
|
-
|
|
148
146
|
if self.buffer.df_destination_records.height == 0 and self.buffer.is_empty:
|
|
149
147
|
logger.info("No records to write to destination, already written, buffer is empty.")
|
|
150
148
|
return DestinationBufferStatus.RECORDS_WRITTEN
|
|
@@ -289,7 +287,6 @@ class DestinationFactory:
|
|
|
289
287
|
source_callback: AbstractSourceCallback,
|
|
290
288
|
monitor: AbstractMonitor,
|
|
291
289
|
) -> AbstractDestination:
|
|
292
|
-
|
|
293
290
|
if config.name == DestinationTypes.LOGGER:
|
|
294
291
|
from bizon.connectors.destinations.logger.src.destination import (
|
|
295
292
|
LoggerDestination,
|
|
@@ -355,4 +352,4 @@ class DestinationFactory:
|
|
|
355
352
|
monitor=monitor,
|
|
356
353
|
)
|
|
357
354
|
|
|
358
|
-
raise ValueError(f"Destination {config.name}
|
|
355
|
+
raise ValueError(f"Destination {config.name}with params {config} not found")
|
|
@@ -5,7 +5,7 @@ from typing import Optional, Union
|
|
|
5
5
|
from loguru import logger
|
|
6
6
|
from pytz import UTC
|
|
7
7
|
from sqlalchemy import Result, Select, create_engine, func, inspect, select, update
|
|
8
|
-
from sqlalchemy.engine import Engine
|
|
8
|
+
from sqlalchemy.engine import Engine
|
|
9
9
|
from sqlalchemy.orm import Session, scoped_session, sessionmaker
|
|
10
10
|
|
|
11
11
|
from bizon.engine.backend.backend import AbstractBackend
|
|
@@ -26,7 +26,6 @@ from .config import BigQueryConfigDetails, PostgresConfigDetails, SQLiteConfigDe
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class SQLAlchemyBackend(AbstractBackend):
|
|
29
|
-
|
|
30
29
|
def __init__(self, config: Union[PostgresConfigDetails, SQLiteConfigDetails], type: BackendTypes, **kwargs):
|
|
31
30
|
super().__init__(config, type)
|
|
32
31
|
|
|
@@ -81,7 +80,6 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
81
80
|
)
|
|
82
81
|
|
|
83
82
|
def _get_engine(self) -> Engine:
|
|
84
|
-
|
|
85
83
|
if self.type == BackendTypes.BIGQUERY:
|
|
86
84
|
return self._get_engine_bigquery()
|
|
87
85
|
|
|
@@ -96,7 +94,7 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
96
94
|
# ONLY FOR UNIT TESTS: SQLite in memory
|
|
97
95
|
if self.type == BackendTypes.SQLITE_IN_MEMORY:
|
|
98
96
|
return create_engine(
|
|
99
|
-
|
|
97
|
+
"sqlite:///:memory:",
|
|
100
98
|
echo=self.config.echoEngine,
|
|
101
99
|
connect_args={"check_same_thread": False},
|
|
102
100
|
)
|
|
@@ -388,7 +386,6 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
388
386
|
pagination: Optional[dict] = None,
|
|
389
387
|
session: Session | None = None,
|
|
390
388
|
) -> DestinationCursor:
|
|
391
|
-
|
|
392
389
|
destination_cursor = DestinationCursor(
|
|
393
390
|
name=name,
|
|
394
391
|
source_name=source_name,
|
bizon/engine/config.py
CHANGED
bizon/engine/engine.py
CHANGED
|
@@ -21,7 +21,6 @@ def replace_env_variables_in_config(config: dict) -> dict:
|
|
|
21
21
|
class RunnerFactory:
|
|
22
22
|
@staticmethod
|
|
23
23
|
def create_from_config_dict(config: dict) -> AbstractRunner:
|
|
24
|
-
|
|
25
24
|
# Replace env variables in config
|
|
26
25
|
config = replace_env_variables_in_config(config=config)
|
|
27
26
|
|
|
@@ -36,7 +36,6 @@ class AbstractQueueConsumer(ABC):
|
|
|
36
36
|
pass
|
|
37
37
|
|
|
38
38
|
def process_queue_message(self, queue_message: QueueMessage) -> PipelineReturnStatus:
|
|
39
|
-
|
|
40
39
|
# Apply the transformation
|
|
41
40
|
try:
|
|
42
41
|
df_source_records = self.transform.apply_transforms(df_source_records=queue_message.df_source_records)
|
|
@@ -105,7 +105,6 @@ class Producer:
|
|
|
105
105
|
def run(
|
|
106
106
|
self, job_id: int, stop_event: Union[multiprocessing.synchronize.Event, threading.Event]
|
|
107
107
|
) -> PipelineReturnStatus:
|
|
108
|
-
|
|
109
108
|
return_value: PipelineReturnStatus = PipelineReturnStatus.SUCCESS
|
|
110
109
|
|
|
111
110
|
# Init queue
|
|
@@ -132,7 +131,6 @@ class Producer:
|
|
|
132
131
|
return PipelineReturnStatus.BACKEND_ERROR
|
|
133
132
|
|
|
134
133
|
while not cursor.is_finished:
|
|
135
|
-
|
|
136
134
|
if stop_event.is_set():
|
|
137
135
|
logger.info("Stop event is set, terminating producer ...")
|
|
138
136
|
return PipelineReturnStatus.KILLED_BY_RUNNER
|
|
@@ -226,9 +224,7 @@ class Producer:
|
|
|
226
224
|
items_in_queue = f"{self.queue.get_size()} items in queue." if self.queue.get_size() else ""
|
|
227
225
|
|
|
228
226
|
logger.info(
|
|
229
|
-
(
|
|
230
|
-
f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
|
|
231
|
-
)
|
|
227
|
+
f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
|
|
232
228
|
)
|
|
233
229
|
|
|
234
230
|
logger.info("Terminating destination ...")
|
|
@@ -35,7 +35,6 @@ class PythonQueueConsumer(AbstractQueueConsumer):
|
|
|
35
35
|
self.monitor.track_pipeline_status(PipelineReturnStatus.RUNNING)
|
|
36
36
|
|
|
37
37
|
def run(self, stop_event: Union[threading.Event, multiprocessing.synchronize.Event]) -> PipelineReturnStatus:
|
|
38
|
-
|
|
39
38
|
while True:
|
|
40
39
|
# Handle kill signal from the runner
|
|
41
40
|
if stop_event.is_set():
|
|
@@ -9,7 +9,6 @@ from bizon.destination.destination import AbstractDestination
|
|
|
9
9
|
from bizon.engine.queue.config import QUEUE_TERMINATION, QueueMessage
|
|
10
10
|
from bizon.engine.queue.queue import AbstractQueue, AbstractQueueConsumer
|
|
11
11
|
from bizon.monitoring.monitor import AbstractMonitor
|
|
12
|
-
from bizon.source.callback import AbstractSourceCallback
|
|
13
12
|
from bizon.source.models import SourceIteration
|
|
14
13
|
from bizon.transform.transform import Transform
|
|
15
14
|
|
|
@@ -18,7 +17,6 @@ from .consumer import PythonQueueConsumer
|
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
class PythonQueue(AbstractQueue):
|
|
21
|
-
|
|
22
20
|
def __init__(self, config: PythonQueueConfigDetails, **kwargs) -> None:
|
|
23
21
|
super().__init__(config)
|
|
24
22
|
self.config: PythonQueueConfigDetails = config
|
|
@@ -24,7 +24,6 @@ class RabbitMQConsumer(AbstractQueueConsumer):
|
|
|
24
24
|
channel.queue_declare(queue=self.config.queue.queue_name)
|
|
25
25
|
|
|
26
26
|
for method_frame, properties, body in channel.consume(self.config.queue.queue_name):
|
|
27
|
-
|
|
28
27
|
queue_message = QueueMessage.model_validate_json(body)
|
|
29
28
|
if queue_message.signal == QUEUE_TERMINATION:
|
|
30
29
|
logger.info("Received termination signal, waiting for destination to close gracefully ...")
|
bizon/engine/queue/config.py
CHANGED
|
@@ -27,7 +27,6 @@ class QueueTypes(str, Enum):
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class AbastractQueueConfigDetails(BaseModel, ABC):
|
|
30
|
-
|
|
31
30
|
# Forbid extra keys in the model
|
|
32
31
|
model_config = ConfigDict(extra="forbid")
|
|
33
32
|
|
|
@@ -38,7 +37,6 @@ class AbastractQueueConfigDetails(BaseModel, ABC):
|
|
|
38
37
|
|
|
39
38
|
|
|
40
39
|
class AbstractQueueConfig(BaseModel, ABC):
|
|
41
|
-
|
|
42
40
|
# Forbid extra keys in the model
|
|
43
41
|
model_config = ConfigDict(extra="forbid")
|
|
44
42
|
|
|
@@ -8,7 +8,6 @@ from bizon.engine.runner.runner import AbstractRunner
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class ProcessRunner(AbstractRunner):
|
|
11
|
-
|
|
12
11
|
def __init__(self, config: dict):
|
|
13
12
|
super().__init__(config)
|
|
14
13
|
|
|
@@ -36,7 +35,6 @@ class ProcessRunner(AbstractRunner):
|
|
|
36
35
|
with concurrent.futures.ProcessPoolExecutor(
|
|
37
36
|
max_workers=self.bizon_config.engine.runner.config.max_workers
|
|
38
37
|
) as executor:
|
|
39
|
-
|
|
40
38
|
future_producer = executor.submit(
|
|
41
39
|
AbstractRunner.instanciate_and_run_producer,
|
|
42
40
|
self.bizon_config,
|
|
@@ -9,11 +9,13 @@ from loguru import logger
|
|
|
9
9
|
from pytz import UTC
|
|
10
10
|
|
|
11
11
|
from bizon.common.models import BizonConfig, SyncMetadata
|
|
12
|
+
from bizon.connectors.destinations.bigquery.src.config import BigQueryRecordSchemaConfig
|
|
12
13
|
from bizon.destination.models import transform_to_df_destination_records
|
|
13
14
|
from bizon.engine.pipeline.models import PipelineReturnStatus
|
|
14
15
|
from bizon.engine.runner.config import RunnerStatus
|
|
15
16
|
from bizon.engine.runner.runner import AbstractRunner
|
|
16
17
|
from bizon.source.models import SourceRecord, source_record_schema
|
|
18
|
+
from bizon.source.source import AbstractSource
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
class StreamingRunner(AbstractRunner):
|
|
@@ -36,7 +38,60 @@ class StreamingRunner(AbstractRunner):
|
|
|
36
38
|
def convert_to_destination_records(df_source_records: pl.DataFrame, extracted_at: datetime) -> pl.DataFrame:
|
|
37
39
|
return transform_to_df_destination_records(df_source_records=df_source_records, extracted_at=extracted_at)
|
|
38
40
|
|
|
41
|
+
def _apply_streams_config(self, source: AbstractSource = None) -> None:
|
|
42
|
+
"""Apply streams configuration to source and destination.
|
|
43
|
+
|
|
44
|
+
This method is completely source-agnostic. Each source connector is responsible
|
|
45
|
+
for handling streams config appropriately via set_streams_config().
|
|
46
|
+
|
|
47
|
+
When a top-level 'streams' configuration is present, this method:
|
|
48
|
+
1. Calls source.set_streams_config() to let the source enrich its own config
|
|
49
|
+
2. Builds destination record_schemas from streams config
|
|
50
|
+
3. Injects record_schemas into destination config for backward compatibility
|
|
51
|
+
|
|
52
|
+
The source is responsible for modifying self.config (which points to bizon_config.source)
|
|
53
|
+
so that subsequent source instantiations see the enriched config.
|
|
54
|
+
"""
|
|
55
|
+
if not self.bizon_config.streams:
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
logger.info(f"Applying streams configuration: {len(self.bizon_config.streams)} streams defined")
|
|
59
|
+
|
|
60
|
+
# Let the source enrich its own config from streams
|
|
61
|
+
# Note: source modifies self.config, which is a reference to bizon_config.source
|
|
62
|
+
# This ensures init_job (which creates a new source) sees the enriched config
|
|
63
|
+
if source and hasattr(source, "set_streams_config") and callable(source.set_streams_config):
|
|
64
|
+
source.set_streams_config(self.bizon_config.streams)
|
|
65
|
+
|
|
66
|
+
# Build record_schemas list for destination from streams config
|
|
67
|
+
record_schemas = []
|
|
68
|
+
for stream in self.bizon_config.streams:
|
|
69
|
+
if stream.destination.record_schema:
|
|
70
|
+
record_schema_config = BigQueryRecordSchemaConfig(
|
|
71
|
+
destination_id=stream.destination.table_id,
|
|
72
|
+
record_schema=stream.destination.record_schema,
|
|
73
|
+
clustering_keys=stream.destination.clustering_keys,
|
|
74
|
+
)
|
|
75
|
+
record_schemas.append(record_schema_config)
|
|
76
|
+
logger.info(
|
|
77
|
+
f"Stream '{stream.name}': "
|
|
78
|
+
f"{getattr(stream.source, 'topic', getattr(stream.source, 'name', 'N/A'))} "
|
|
79
|
+
f"-> {stream.destination.table_id}"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Inject into destination config
|
|
83
|
+
if record_schemas and hasattr(self.bizon_config.destination.config, "record_schemas"):
|
|
84
|
+
logger.info(f"Injecting {len(record_schemas)} record schemas into destination config")
|
|
85
|
+
self.bizon_config.destination.config.record_schemas = record_schemas
|
|
86
|
+
|
|
39
87
|
def run(self) -> RunnerStatus:
|
|
88
|
+
# Create a temporary source to enrich bizon_config.source from streams
|
|
89
|
+
# The source's set_streams_config() modifies self.config (= bizon_config.source)
|
|
90
|
+
# This ensures subsequent source instantiations see the enriched config
|
|
91
|
+
temp_source = self.get_source(bizon_config=self.bizon_config, config=self.config)
|
|
92
|
+
self._apply_streams_config(temp_source)
|
|
93
|
+
|
|
94
|
+
# Now initialize job (check_connection will use enriched source config)
|
|
40
95
|
job = self.init_job(bizon_config=self.bizon_config, config=self.config)
|
|
41
96
|
backend = self.get_backend(bizon_config=self.bizon_config)
|
|
42
97
|
source = self.get_source(bizon_config=self.bizon_config, config=self.config)
|
|
@@ -58,7 +113,6 @@ class StreamingRunner(AbstractRunner):
|
|
|
58
113
|
iteration = 0
|
|
59
114
|
|
|
60
115
|
while True:
|
|
61
|
-
|
|
62
116
|
if source.config.max_iterations and iteration > source.config.max_iterations:
|
|
63
117
|
logger.info(f"Max iterations {source.config.max_iterations} reached, terminating stream ...")
|
|
64
118
|
break
|
|
@@ -16,7 +16,6 @@ class ThreadRunner(AbstractRunner):
|
|
|
16
16
|
|
|
17
17
|
# TODO: refacto this
|
|
18
18
|
def get_kwargs(self):
|
|
19
|
-
|
|
20
19
|
extra_kwargs = {}
|
|
21
20
|
|
|
22
21
|
if self.bizon_config.engine.queue.type == "python_queue":
|
|
@@ -46,7 +45,6 @@ class ThreadRunner(AbstractRunner):
|
|
|
46
45
|
with concurrent.futures.ThreadPoolExecutor(
|
|
47
46
|
max_workers=self.bizon_config.engine.runner.config.max_workers
|
|
48
47
|
) as executor:
|
|
49
|
-
|
|
50
48
|
future_producer = executor.submit(
|
|
51
49
|
AbstractRunner.instanciate_and_run_producer,
|
|
52
50
|
self.bizon_config,
|
bizon/engine/runner/config.py
CHANGED
bizon/engine/runner/runner.py
CHANGED
|
@@ -27,7 +27,6 @@ from bizon.transform.transform import Transform
|
|
|
27
27
|
|
|
28
28
|
class AbstractRunner(ABC):
|
|
29
29
|
def __init__(self, config: dict):
|
|
30
|
-
|
|
31
30
|
# Internal state
|
|
32
31
|
self._is_running: bool = False
|
|
33
32
|
|
|
@@ -222,7 +221,6 @@ class AbstractRunner(ABC):
|
|
|
222
221
|
stop_event: Union[multiprocessing.synchronize.Event, threading.Event],
|
|
223
222
|
**kwargs,
|
|
224
223
|
):
|
|
225
|
-
|
|
226
224
|
# Get the source instance
|
|
227
225
|
source = AbstractRunner.get_source(bizon_config=bizon_config, config=config)
|
|
228
226
|
|