bizon 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bizon/common/models.py +2 -0
- bizon/destinations/bigquery_streaming/src/config.py +55 -0
- bizon/destinations/bigquery_streaming/src/destination.py +148 -0
- bizon/destinations/bigquery_streaming/src/proto_utils.py +91 -0
- bizon/destinations/config.py +1 -0
- bizon/destinations/destination.py +18 -3
- bizon/destinations/models.py +27 -0
- bizon/engine/pipeline/producer.py +11 -0
- bizon/engine/queue/adapters/kafka/queue.py +4 -0
- bizon/engine/queue/adapters/python_queue/queue.py +6 -0
- bizon/engine/queue/adapters/rabbitmq/queue.py +5 -0
- bizon/engine/queue/queue.py +6 -1
- bizon/engine/runner/adapters/thread.py +2 -0
- bizon/source/discover.py +1 -1
- bizon/sources/kafka/src/source.py +31 -32
- bizon/sources/kafka/tests/kafka_pipeline.py +1 -1
- {bizon-0.0.9.dist-info → bizon-0.0.11.dist-info}/METADATA +2 -1
- {bizon-0.0.9.dist-info → bizon-0.0.11.dist-info}/RECORD +21 -18
- {bizon-0.0.9.dist-info → bizon-0.0.11.dist-info}/LICENSE +0 -0
- {bizon-0.0.9.dist-info → bizon-0.0.11.dist-info}/WHEEL +0 -0
- {bizon-0.0.9.dist-info → bizon-0.0.11.dist-info}/entry_points.txt +0 -0
bizon/common/models.py
CHANGED
|
@@ -3,6 +3,7 @@ from typing import Union
|
|
|
3
3
|
from pydantic import BaseModel, ConfigDict, Field
|
|
4
4
|
|
|
5
5
|
from bizon.destinations.bigquery.src.config import BigQueryConfig
|
|
6
|
+
from bizon.destinations.bigquery_streaming.src.config import BigQueryStreamingConfig
|
|
6
7
|
from bizon.destinations.file.src.config import FileDestinationConfig
|
|
7
8
|
from bizon.destinations.logger.src.config import LoggerConfig
|
|
8
9
|
from bizon.engine.config import EngineConfig
|
|
@@ -24,6 +25,7 @@ class BizonConfig(BaseModel):
|
|
|
24
25
|
|
|
25
26
|
destination: Union[
|
|
26
27
|
BigQueryConfig,
|
|
28
|
+
BigQueryStreamingConfig,
|
|
27
29
|
LoggerConfig,
|
|
28
30
|
FileDestinationConfig,
|
|
29
31
|
] = Field(
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Literal, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field, field_validator
|
|
5
|
+
|
|
6
|
+
from bizon.destinations.config import (
|
|
7
|
+
AbstractDestinationConfig,
|
|
8
|
+
AbstractDestinationDetailsConfig,
|
|
9
|
+
DestinationTypes,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GCSBufferFormat(str, Enum):
|
|
14
|
+
PARQUET = "parquet"
|
|
15
|
+
CSV = "csv"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TimePartitioning(str, Enum):
|
|
19
|
+
DAY = "DAY"
|
|
20
|
+
HOUR = "HOUR"
|
|
21
|
+
MONTH = "MONTH"
|
|
22
|
+
YEAR = "YEAR"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BigQueryAuthentication(BaseModel):
|
|
26
|
+
service_account_key: str = Field(
|
|
27
|
+
description="Service Account Key JSON string. If empty it will be infered",
|
|
28
|
+
default="",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class BigQueryConfigDetails(AbstractDestinationDetailsConfig):
|
|
33
|
+
project_id: str
|
|
34
|
+
dataset_id: str
|
|
35
|
+
dataset_location: Optional[str] = "US"
|
|
36
|
+
table_id: Optional[str] = Field(
|
|
37
|
+
default=None, description="Table ID, if not provided it will be inferred from source name"
|
|
38
|
+
)
|
|
39
|
+
time_partitioning: Optional[TimePartitioning] = Field(
|
|
40
|
+
default=TimePartitioning.DAY, description="BigQuery Time partitioning type"
|
|
41
|
+
)
|
|
42
|
+
authentication: Optional[BigQueryAuthentication] = None
|
|
43
|
+
|
|
44
|
+
buffer_size: int = Field(default=0, description="Buffer size in MB")
|
|
45
|
+
|
|
46
|
+
@field_validator("buffer_size", mode="after")
|
|
47
|
+
def validate_buffer_size(cls, value: int) -> int:
|
|
48
|
+
if value != 0:
|
|
49
|
+
raise ValueError("Buffer size must be 0, we directly stream to BigQuery")
|
|
50
|
+
return value
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class BigQueryStreamingConfig(AbstractDestinationConfig):
|
|
54
|
+
name: Literal[DestinationTypes.BIGQUERY_STREAMING]
|
|
55
|
+
config: BigQueryConfigDetails
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import tempfile
|
|
4
|
+
from typing import List, Tuple
|
|
5
|
+
|
|
6
|
+
from google.api_core.exceptions import NotFound
|
|
7
|
+
from google.cloud import bigquery, bigquery_storage_v1, storage
|
|
8
|
+
from google.cloud.bigquery import DatasetReference, TimePartitioning
|
|
9
|
+
from google.cloud.bigquery_storage_v1.types import AppendRowsRequest, ProtoRows
|
|
10
|
+
from loguru import logger
|
|
11
|
+
|
|
12
|
+
from bizon.common.models import SyncMetadata
|
|
13
|
+
from bizon.destinations.config import NormalizationType
|
|
14
|
+
from bizon.destinations.destination import AbstractDestination
|
|
15
|
+
from bizon.destinations.models import DestinationRecord
|
|
16
|
+
from bizon.engine.backend.backend import AbstractBackend
|
|
17
|
+
|
|
18
|
+
from .config import BigQueryConfigDetails
|
|
19
|
+
from .proto_utils import get_proto_schema_and_class
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BigQueryStreamingDestination(AbstractDestination):
|
|
23
|
+
|
|
24
|
+
def __init__(self, sync_metadata: SyncMetadata, config: BigQueryConfigDetails, backend: AbstractBackend):
|
|
25
|
+
super().__init__(sync_metadata, config, backend)
|
|
26
|
+
self.config: BigQueryConfigDetails = config
|
|
27
|
+
|
|
28
|
+
if config.authentication and config.authentication.service_account_key:
|
|
29
|
+
with tempfile.NamedTemporaryFile(delete=False) as temp:
|
|
30
|
+
temp.write(config.authentication.service_account_key.encode())
|
|
31
|
+
temp_file_path = temp.name
|
|
32
|
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = temp_file_path
|
|
33
|
+
|
|
34
|
+
self.project_id = config.project_id
|
|
35
|
+
self.bq_client = bigquery.Client(project=self.project_id)
|
|
36
|
+
self.bq_storage_client = bigquery_storage_v1.BigQueryWriteClient()
|
|
37
|
+
self.gcs_client = storage.Client(project=self.project_id)
|
|
38
|
+
self.dataset_id = config.dataset_id
|
|
39
|
+
self.dataset_location = config.dataset_location
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def table_id(self) -> str:
|
|
43
|
+
tabled_id = self.config.table_id or f"{self.sync_metadata.source_name}_{self.sync_metadata.stream_name}"
|
|
44
|
+
return f"{self.project_id}.{self.dataset_id}.{tabled_id}"
|
|
45
|
+
|
|
46
|
+
def get_bigquery_schema(self, destination_records: List[DestinationRecord]) -> List[bigquery.SchemaField]:
|
|
47
|
+
|
|
48
|
+
# we keep raw data in the column source_data
|
|
49
|
+
if self.config.normalization.type == NormalizationType.NONE:
|
|
50
|
+
return [
|
|
51
|
+
bigquery.SchemaField("_source_record_id", "STRING", mode="REQUIRED"),
|
|
52
|
+
bigquery.SchemaField("_source_timestamp", "TIMESTAMP", mode="REQUIRED"),
|
|
53
|
+
bigquery.SchemaField("_source_data", "STRING", mode="NULLABLE"),
|
|
54
|
+
bigquery.SchemaField("_bizon_extracted_at", "TIMESTAMP", mode="REQUIRED"),
|
|
55
|
+
bigquery.SchemaField(
|
|
56
|
+
"_bizon_loaded_at", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP()"
|
|
57
|
+
),
|
|
58
|
+
bigquery.SchemaField("_bizon_id", "STRING", mode="REQUIRED"),
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
elif self.config.normalization.type == NormalizationType.DEBEZIUM:
|
|
62
|
+
assert (
|
|
63
|
+
"_bizon_message_key" in destination_records[0].source_data
|
|
64
|
+
), "Debezium records must have a '_bizon_message_key' key"
|
|
65
|
+
message_keys = json.loads(destination_records[0].source_data["_bizon_message_key"])
|
|
66
|
+
return [bigquery.SchemaField(key, "STRING", mode="NULLABLE") for key in message_keys] + [
|
|
67
|
+
bigquery.SchemaField("_source_data", "STRING", mode="NULLABLE"),
|
|
68
|
+
bigquery.SchemaField("_source_record_id", "STRING", mode="REQUIRED"),
|
|
69
|
+
bigquery.SchemaField("_source_timestamp", "TIMESTAMP", mode="REQUIRED"),
|
|
70
|
+
bigquery.SchemaField("_bizon_extracted_at", "TIMESTAMP", mode="REQUIRED"),
|
|
71
|
+
bigquery.SchemaField(
|
|
72
|
+
"_bizon_loaded_at", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP()"
|
|
73
|
+
),
|
|
74
|
+
bigquery.SchemaField("_bizon_id", "STRING", mode="REQUIRED"),
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
# If normalization is tabular, we parse key / value pairs to columns
|
|
78
|
+
elif self.config.normalization.type == NormalizationType.TABULAR:
|
|
79
|
+
first_record_keys = destination_records[0].source_data.keys()
|
|
80
|
+
return [bigquery.SchemaField(key, "STRING", mode="NULLABLE") for key in first_record_keys] + [
|
|
81
|
+
bigquery.SchemaField("_source_record_id", "STRING", mode="REQUIRED"),
|
|
82
|
+
bigquery.SchemaField("_source_timestamp", "TIMESTAMP", mode="REQUIRED"),
|
|
83
|
+
bigquery.SchemaField("_bizon_extracted_at", "TIMESTAMP", mode="REQUIRED"),
|
|
84
|
+
bigquery.SchemaField(
|
|
85
|
+
"_bizon_loaded_at", "TIMESTAMP", mode="REQUIRED", default_value_expression="CURRENT_TIMESTAMP()"
|
|
86
|
+
),
|
|
87
|
+
bigquery.SchemaField("_bizon_id", "STRING", mode="REQUIRED"),
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
raise NotImplementedError(f"Normalization type {self.config.normalization.type} is not supported")
|
|
91
|
+
|
|
92
|
+
def check_connection(self) -> bool:
|
|
93
|
+
dataset_ref = DatasetReference(self.project_id, self.dataset_id)
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
self.bq_client.get_dataset(dataset_ref)
|
|
97
|
+
except NotFound:
|
|
98
|
+
dataset = bigquery.Dataset(dataset_ref)
|
|
99
|
+
dataset.location = self.dataset_location
|
|
100
|
+
dataset = self.bq_client.create_dataset(dataset)
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
def load_to_bigquery_via_streaming(self, destination_records: List[DestinationRecord]) -> str:
|
|
104
|
+
clustering_keys = []
|
|
105
|
+
|
|
106
|
+
if self.config.normalization.type == NormalizationType.DEBEZIUM:
|
|
107
|
+
clustering_keys = list(json.loads(destination_records[0].source_data["_bizon_message_key"]).keys())
|
|
108
|
+
|
|
109
|
+
# Create table if it doesnt exist
|
|
110
|
+
schema = self.get_bigquery_schema(destination_records=destination_records)
|
|
111
|
+
table = bigquery.Table(self.table_id, schema=schema)
|
|
112
|
+
time_partitioning = TimePartitioning(field="_bizon_loaded_at", type_=self.config.time_partitioning)
|
|
113
|
+
table.time_partitioning = time_partitioning
|
|
114
|
+
|
|
115
|
+
if clustering_keys:
|
|
116
|
+
table.clustering_fields = clustering_keys
|
|
117
|
+
|
|
118
|
+
table = self.bq_client.create_table(table, exists_ok=True)
|
|
119
|
+
|
|
120
|
+
# Create the stream
|
|
121
|
+
write_client = self.bq_storage_client
|
|
122
|
+
tabled_id = self.config.table_id or f"{self.sync_metadata.source_name}_{self.sync_metadata.stream_name}"
|
|
123
|
+
parent = write_client.table_path(self.project_id, self.dataset_id, tabled_id)
|
|
124
|
+
stream_name = f"{parent}/_default"
|
|
125
|
+
|
|
126
|
+
# Generating the protocol buffer representation of the message descriptor.
|
|
127
|
+
proto_schema, TableRow = get_proto_schema_and_class(clustering_keys)
|
|
128
|
+
|
|
129
|
+
serialized_rows = [
|
|
130
|
+
record.to_protobuf_serialization(
|
|
131
|
+
TableRow, debezium=self.config.normalization.type == NormalizationType.DEBEZIUM
|
|
132
|
+
)
|
|
133
|
+
for record in destination_records
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
request = AppendRowsRequest(
|
|
137
|
+
write_stream=stream_name,
|
|
138
|
+
proto_rows=AppendRowsRequest.ProtoData(
|
|
139
|
+
rows=ProtoRows(serialized_rows=serialized_rows),
|
|
140
|
+
writer_schema=proto_schema,
|
|
141
|
+
),
|
|
142
|
+
)
|
|
143
|
+
response = write_client.append_rows(iter([request]))
|
|
144
|
+
assert response.code().name == "OK"
|
|
145
|
+
|
|
146
|
+
def write_records(self, destination_records: List[DestinationRecord]) -> Tuple[bool, str]:
|
|
147
|
+
self.load_to_bigquery_via_streaming(destination_records=destination_records)
|
|
148
|
+
return True, ""
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from typing import List, Tuple, Type
|
|
2
|
+
|
|
3
|
+
from google.cloud.bigquery_storage_v1.types import ProtoSchema
|
|
4
|
+
from google.protobuf.descriptor_pb2 import (
|
|
5
|
+
DescriptorProto,
|
|
6
|
+
FieldDescriptorProto,
|
|
7
|
+
FileDescriptorProto,
|
|
8
|
+
)
|
|
9
|
+
from google.protobuf.descriptor_pool import DescriptorPool
|
|
10
|
+
from google.protobuf.message import Message
|
|
11
|
+
from google.protobuf.message_factory import GetMessageClassesForFiles
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_proto_schema_and_class(clustering_keys: List[str] = None) -> Tuple[ProtoSchema, Type[Message]]:
|
|
15
|
+
# Define the FileDescriptorProto
|
|
16
|
+
file_descriptor_proto = FileDescriptorProto()
|
|
17
|
+
file_descriptor_proto.name = "dynamic.proto"
|
|
18
|
+
file_descriptor_proto.package = "dynamic_package"
|
|
19
|
+
|
|
20
|
+
# Define the TableRow message schema
|
|
21
|
+
message_descriptor = DescriptorProto()
|
|
22
|
+
message_descriptor.name = "TableRow"
|
|
23
|
+
|
|
24
|
+
# Add fields to the message, only use TYPE_STRING, BigQuery does not support other types
|
|
25
|
+
# It does not imapact data types in final table
|
|
26
|
+
|
|
27
|
+
# https://stackoverflow.com/questions/70489919/protobuf-type-for-bigquery-timestamp-field
|
|
28
|
+
fields = [
|
|
29
|
+
{"name": "_bizon_id", "type": FieldDescriptorProto.TYPE_STRING, "label": FieldDescriptorProto.LABEL_REQUIRED},
|
|
30
|
+
{
|
|
31
|
+
"name": "_bizon_extracted_at",
|
|
32
|
+
"type": FieldDescriptorProto.TYPE_STRING,
|
|
33
|
+
"label": FieldDescriptorProto.LABEL_REQUIRED,
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"name": "_bizon_loaded_at",
|
|
37
|
+
"type": FieldDescriptorProto.TYPE_STRING,
|
|
38
|
+
"label": FieldDescriptorProto.LABEL_REQUIRED,
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"name": "_source_record_id",
|
|
42
|
+
"type": FieldDescriptorProto.TYPE_STRING,
|
|
43
|
+
"label": FieldDescriptorProto.LABEL_REQUIRED,
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"name": "_source_timestamp",
|
|
47
|
+
"type": FieldDescriptorProto.TYPE_STRING,
|
|
48
|
+
"label": FieldDescriptorProto.LABEL_REQUIRED,
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"name": "_source_data",
|
|
52
|
+
"type": FieldDescriptorProto.TYPE_STRING,
|
|
53
|
+
"label": FieldDescriptorProto.LABEL_OPTIONAL,
|
|
54
|
+
},
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
if clustering_keys:
|
|
58
|
+
for key in clustering_keys:
|
|
59
|
+
fields.append(
|
|
60
|
+
{
|
|
61
|
+
"name": key,
|
|
62
|
+
"type": FieldDescriptorProto.TYPE_STRING,
|
|
63
|
+
"label": FieldDescriptorProto.LABEL_OPTIONAL,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
for i, field in enumerate(fields, start=1):
|
|
68
|
+
field_descriptor = message_descriptor.field.add()
|
|
69
|
+
field_descriptor.name = field["name"]
|
|
70
|
+
field_descriptor.number = i
|
|
71
|
+
field_descriptor.type = field["type"]
|
|
72
|
+
field_descriptor.label = field["label"]
|
|
73
|
+
|
|
74
|
+
# Add the message to the file descriptor
|
|
75
|
+
file_descriptor_proto.message_type.add().CopyFrom(message_descriptor)
|
|
76
|
+
|
|
77
|
+
# Create a DescriptorPool and register the FileDescriptorProto
|
|
78
|
+
pool = DescriptorPool()
|
|
79
|
+
pool.Add(file_descriptor_proto)
|
|
80
|
+
|
|
81
|
+
# Use the registered file name to fetch the message classes
|
|
82
|
+
message_classes = GetMessageClassesForFiles(["dynamic.proto"], pool=pool)
|
|
83
|
+
|
|
84
|
+
# Fetch the TableRow class
|
|
85
|
+
table_row_class = message_classes["dynamic_package.TableRow"]
|
|
86
|
+
|
|
87
|
+
# Create the ProtoSchema
|
|
88
|
+
proto_schema = ProtoSchema()
|
|
89
|
+
proto_schema.proto_descriptor.CopyFrom(message_descriptor)
|
|
90
|
+
|
|
91
|
+
return proto_schema, table_row_class
|
bizon/destinations/config.py
CHANGED
|
@@ -85,11 +85,16 @@ class AbstractDestination(ABC):
|
|
|
85
85
|
pagination=self.buffer.pagination,
|
|
86
86
|
)
|
|
87
87
|
|
|
88
|
+
logger.info(
|
|
89
|
+
f"Writing in destination from source iteration {self.buffer.from_iteration} to {self.buffer.to_iteration}"
|
|
90
|
+
)
|
|
91
|
+
|
|
88
92
|
success, error_msg = self.write_records(destination_records=self.buffer.records)
|
|
89
93
|
|
|
90
94
|
if success:
|
|
91
95
|
# We wrote records to destination so we keep it
|
|
92
96
|
destination_iteration.records_written = len(self.buffer.records)
|
|
97
|
+
logger.info(f"Successfully wrote {destination_iteration.records_written} records to destination")
|
|
93
98
|
|
|
94
99
|
else:
|
|
95
100
|
# We failed to write records to destination so we keep the error message
|
|
@@ -117,6 +122,11 @@ class AbstractDestination(ABC):
|
|
|
117
122
|
|
|
118
123
|
# Last iteration, write all records to destination
|
|
119
124
|
if last_iteration:
|
|
125
|
+
|
|
126
|
+
if len(self.buffer.records) == 0 and self.config.buffer_size == 0:
|
|
127
|
+
logger.warning("No records to write to destination, already written, buffer is empty.")
|
|
128
|
+
return DestinationBufferStatus.RECORDS_WRITTEN
|
|
129
|
+
|
|
120
130
|
logger.debug("Writing last iteration records to destination")
|
|
121
131
|
assert len(destination_records) == 0, "Last iteration should not have any records"
|
|
122
132
|
destination_iteration = self.buffer_flush_handler(session=session)
|
|
@@ -147,9 +157,9 @@ class AbstractDestination(ABC):
|
|
|
147
157
|
logger.warning("No records to write to destination. Check source and queue provider.")
|
|
148
158
|
return DestinationBufferStatus.NO_RECORDS
|
|
149
159
|
|
|
150
|
-
# Write records to destination if buffer size is 0
|
|
160
|
+
# Write records to destination if buffer size is 0 or streaming
|
|
151
161
|
if self.buffer.buffer_size == 0:
|
|
152
|
-
logger.info("Writing
|
|
162
|
+
logger.info("Writing records to destination.")
|
|
153
163
|
self.buffer.add_source_iteration_records_to_buffer(
|
|
154
164
|
iteration=iteration, records=destination_records, pagination=pagination
|
|
155
165
|
)
|
|
@@ -160,7 +170,7 @@ class AbstractDestination(ABC):
|
|
|
160
170
|
logger.debug(f"Buffer free space {self.buffer.buffer_free_space_pct}%")
|
|
161
171
|
logger.debug(f"Buffer current size {self.buffer.current_size} bytes")
|
|
162
172
|
logger.info(
|
|
163
|
-
f"Buffer ripeness {self.buffer.ripeness / 60} min. Max ripeness {self.buffer.buffer_flush_timeout / 60} min." # noqa
|
|
173
|
+
f"Buffer ripeness {round(self.buffer.ripeness / 60, 2)} min. Max ripeness {round(self.buffer.buffer_flush_timeout / 60, 2)} min." # noqa
|
|
164
174
|
)
|
|
165
175
|
|
|
166
176
|
# Write buffer to destination if buffer is ripe and create a new buffer for the new iteration
|
|
@@ -258,6 +268,11 @@ class DestinationFactory:
|
|
|
258
268
|
|
|
259
269
|
return BigQueryDestination(sync_metadata=sync_metadata, config=config.config, backend=backend)
|
|
260
270
|
|
|
271
|
+
elif config.name == DestinationTypes.BIGQUERY_STREAMING:
|
|
272
|
+
from .bigquery_streaming.src.destination import BigQueryStreamingDestination
|
|
273
|
+
|
|
274
|
+
return BigQueryStreamingDestination(sync_metadata=sync_metadata, config=config.config, backend=backend)
|
|
275
|
+
|
|
261
276
|
elif config.name == DestinationTypes.FILE:
|
|
262
277
|
from .file.src.destination import FileDestination
|
|
263
278
|
|
bizon/destinations/models.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from datetime import datetime
|
|
3
|
+
from typing import Type
|
|
3
4
|
from uuid import uuid4
|
|
4
5
|
|
|
6
|
+
from google.protobuf.message import Message
|
|
5
7
|
from pydantic import BaseModel, Field
|
|
6
8
|
from pytz import UTC
|
|
7
9
|
|
|
@@ -81,3 +83,28 @@ class DestinationRecord(BaseModel):
|
|
|
81
83
|
"_source_timestamp": self.source_timestamp,
|
|
82
84
|
"_source_data": json.dumps(self.source_data),
|
|
83
85
|
}
|
|
86
|
+
|
|
87
|
+
def to_protobuf_serialization(self, TableRowClass: Type[Message], debezium=False):
|
|
88
|
+
|
|
89
|
+
record = TableRowClass()
|
|
90
|
+
record._bizon_id = self.bizon_id
|
|
91
|
+
record._bizon_extracted_at = str(int(self.bizon_extracted_at.timestamp()))
|
|
92
|
+
record._bizon_loaded_at = str(int(self.bizon_loaded_at.timestamp()))
|
|
93
|
+
record._source_record_id = self.source_record_id
|
|
94
|
+
record._source_timestamp = str(int(self.source_timestamp.timestamp()))
|
|
95
|
+
|
|
96
|
+
if debezium:
|
|
97
|
+
parsed_debezium_keys = json.loads(self.source_data["_bizon_message_key"])
|
|
98
|
+
if parsed_debezium_keys:
|
|
99
|
+
for _key in parsed_debezium_keys:
|
|
100
|
+
setattr(record, _key, str(parsed_debezium_keys[_key]))
|
|
101
|
+
if self.source_data.get("op") == "d":
|
|
102
|
+
source_data = {"__deleted": True, **self.source_data["before"]}
|
|
103
|
+
else:
|
|
104
|
+
source_data = {"__deleted": False, **self.source_data["after"]}
|
|
105
|
+
|
|
106
|
+
record._source_data = json.dumps(source_data)
|
|
107
|
+
else:
|
|
108
|
+
record._source_data = json.dumps(self.source_data)
|
|
109
|
+
|
|
110
|
+
return record.SerializeToString()
|
|
@@ -104,6 +104,8 @@ class Producer:
|
|
|
104
104
|
|
|
105
105
|
while not cursor.is_finished:
|
|
106
106
|
|
|
107
|
+
timestamp_start_iteration = datetime.now(tz=UTC)
|
|
108
|
+
|
|
107
109
|
# Handle the case where last cursor already reach max_iterations
|
|
108
110
|
terminate = self.handle_max_iterations(cursor)
|
|
109
111
|
if terminate:
|
|
@@ -178,6 +180,15 @@ class Producer:
|
|
|
178
180
|
return_value = PipelineReturnStatus.SOURCE_ERROR
|
|
179
181
|
break
|
|
180
182
|
|
|
183
|
+
# Items in queue
|
|
184
|
+
items_in_queue = f"{self.queue.get_size()} items in queue." if self.queue.get_size() else ""
|
|
185
|
+
|
|
186
|
+
logger.info(
|
|
187
|
+
(
|
|
188
|
+
f"Iteration {cursor.iteration} finished in {datetime.now(tz=UTC) - timestamp_start_iteration}. {items_in_queue}"
|
|
189
|
+
)
|
|
190
|
+
)
|
|
191
|
+
|
|
181
192
|
logger.info("Terminating destination ...")
|
|
182
193
|
|
|
183
194
|
try:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from typing import Union
|
|
2
3
|
|
|
3
4
|
from kafka import KafkaProducer
|
|
4
5
|
from loguru import logger
|
|
@@ -36,6 +37,9 @@ class KafkaQueue(AbstractQueue):
|
|
|
36
37
|
def on_error(e):
|
|
37
38
|
logger.error(f"Error sending message: {e}")
|
|
38
39
|
|
|
40
|
+
def get_size(self) -> Union[int, None]:
|
|
41
|
+
return None
|
|
42
|
+
|
|
39
43
|
def put_queue_message(self, queue_message: QueueMessage):
|
|
40
44
|
future = self.producer.send(
|
|
41
45
|
topic=self.config.queue.topic,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import random
|
|
2
2
|
import time
|
|
3
3
|
from multiprocessing import Queue
|
|
4
|
+
from typing import Union
|
|
4
5
|
|
|
5
6
|
from loguru import logger
|
|
6
7
|
|
|
@@ -52,6 +53,11 @@ class PythonQueue(AbstractQueue):
|
|
|
52
53
|
time.sleep(random.random())
|
|
53
54
|
return self.get()
|
|
54
55
|
|
|
56
|
+
def get_size(self) -> Union[int, None]:
|
|
57
|
+
if hasattr(self.queue, "qsize"):
|
|
58
|
+
return self.queue.qsize()
|
|
59
|
+
return None
|
|
60
|
+
|
|
55
61
|
def terminate(self, iteration: int) -> bool:
|
|
56
62
|
self.put(source_records=[], iteration=iteration, signal=QUEUE_TERMINATION)
|
|
57
63
|
logger.info("Sent termination signal to destination.")
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
1
3
|
import pika
|
|
2
4
|
from loguru import logger
|
|
3
5
|
|
|
@@ -31,6 +33,9 @@ class RabbitMQ(AbstractQueue):
|
|
|
31
33
|
body=queue_message.model_dump_json(),
|
|
32
34
|
)
|
|
33
35
|
|
|
36
|
+
def get_size(self) -> Union[int, None]:
|
|
37
|
+
return None
|
|
38
|
+
|
|
34
39
|
def get(self) -> QueueMessage:
|
|
35
40
|
raise NotImplementedError(
|
|
36
41
|
"RabbitMQ does not support getting messages from the queue, directly use callback in consumer."
|
bizon/engine/queue/queue.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
from pytz import UTC
|
|
@@ -45,6 +45,11 @@ class AbstractQueue(ABC):
|
|
|
45
45
|
"""Get a QueueMessage object from the queue system"""
|
|
46
46
|
pass
|
|
47
47
|
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def get_size(self) -> Union[int, None]:
|
|
50
|
+
"""If queue is compatible, return size of the queue"""
|
|
51
|
+
pass
|
|
52
|
+
|
|
48
53
|
@abstractmethod
|
|
49
54
|
def terminate(self, iteration: int) -> bool:
|
|
50
55
|
"""Send a termination signal in the queue system"""
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import concurrent.futures
|
|
2
2
|
import time
|
|
3
|
+
import traceback
|
|
3
4
|
|
|
4
5
|
from loguru import logger
|
|
5
6
|
|
|
@@ -75,5 +76,6 @@ class ThreadRunner(AbstractRunner):
|
|
|
75
76
|
future_consumer.result()
|
|
76
77
|
except Exception as e:
|
|
77
78
|
logger.error(f"Consumer thread stopped running with error {e}")
|
|
79
|
+
logger.error(traceback.format_exc())
|
|
78
80
|
|
|
79
81
|
return True
|
bizon/source/discover.py
CHANGED
|
@@ -143,7 +143,7 @@ def parse_streams_from_filepath(source_name: str, filepath: str, skip_unavailabl
|
|
|
143
143
|
|
|
144
144
|
# Transform the relative path to a python import path and import the module
|
|
145
145
|
python_import_path = get_python_import_path(relative_path)
|
|
146
|
-
logger.
|
|
146
|
+
logger.debug(f"Importing {python_import_path}")
|
|
147
147
|
|
|
148
148
|
try:
|
|
149
149
|
source_module = importlib.import_module(python_import_path, package="sources")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
import struct
|
|
4
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from functools import lru_cache
|
|
@@ -18,6 +18,9 @@ from bizon.source.config import SourceConfig
|
|
|
18
18
|
from bizon.source.models import SourceIteration, SourceRecord
|
|
19
19
|
from bizon.source.source import AbstractSource
|
|
20
20
|
|
|
21
|
+
silent_logger = logging.getLogger()
|
|
22
|
+
silent_logger.addHandler(logging.StreamHandler())
|
|
23
|
+
|
|
21
24
|
|
|
22
25
|
class SchemaRegistryType(str, Enum):
|
|
23
26
|
APICURIO = "apicurio"
|
|
@@ -98,7 +101,10 @@ class KafkaSource(AbstractSource):
|
|
|
98
101
|
}
|
|
99
102
|
|
|
100
103
|
# Consumer instance
|
|
101
|
-
self.consumer = Consumer(self.kafka_consumer_conf)
|
|
104
|
+
self.consumer = Consumer(self.kafka_consumer_conf, logger=silent_logger)
|
|
105
|
+
|
|
106
|
+
# Consumers for each worker thread
|
|
107
|
+
self.consumers_cached: Mapping[int, Consumer] = {}
|
|
102
108
|
|
|
103
109
|
@staticmethod
|
|
104
110
|
def streams() -> List[str]:
|
|
@@ -194,25 +200,17 @@ class KafkaSource(AbstractSource):
|
|
|
194
200
|
global_id = self.parse_global_id_from_serialized_message(header_message)
|
|
195
201
|
return self.get_parsed_avro_schema(global_id).to_json()
|
|
196
202
|
|
|
197
|
-
def
|
|
203
|
+
def parse_encoded_messages(self, encoded_messages: list) -> List[SourceRecord]:
|
|
204
|
+
|
|
198
205
|
records = []
|
|
199
|
-
encoded_messages = []
|
|
200
206
|
|
|
201
207
|
# Set the source timestamp to now, otherwise it will be overwritten by the message timestamp
|
|
202
208
|
source_timestamp = datetime.now(tz=timezone.utc)
|
|
203
209
|
|
|
204
|
-
# Set consumer offset params
|
|
205
|
-
consumer = Consumer(self.kafka_consumer_conf)
|
|
206
|
-
consumer.assign([TopicPartition(self.config.topic, partition, topic_offsets.get_partition_offset(partition))])
|
|
207
|
-
consumer.seek(TopicPartition(self.config.topic, partition, topic_offsets.get_partition_offset(partition)))
|
|
208
|
-
|
|
209
|
-
# Read messages
|
|
210
|
-
encoded_messages.extend(consumer.consume(self.config.batch_size, timeout=self.config.consumer_timeout))
|
|
211
|
-
|
|
212
210
|
for message in encoded_messages:
|
|
213
211
|
if not message.value():
|
|
214
212
|
logger.debug(
|
|
215
|
-
f"Message for partition {partition} and offset {message.offset()} and topic {self.config.topic} is empty, skipping."
|
|
213
|
+
f"Message for partition {message.partition()} and offset {message.offset()} and topic {self.config.topic} is empty, skipping."
|
|
216
214
|
)
|
|
217
215
|
continue
|
|
218
216
|
|
|
@@ -233,43 +231,44 @@ class KafkaSource(AbstractSource):
|
|
|
233
231
|
data[self.config.timestamp_ms_name] / 1000, tz=timezone.utc
|
|
234
232
|
)
|
|
235
233
|
|
|
234
|
+
self.topic_offsets.set_partition_offset(message.partition(), message.offset() + 1)
|
|
235
|
+
|
|
236
236
|
records.append(
|
|
237
237
|
SourceRecord(
|
|
238
|
-
id=f"part_{partition}_offset_{message.offset()}",
|
|
238
|
+
id=f"part_{message.partition()}_offset_{message.offset()}",
|
|
239
239
|
timestamp=source_timestamp,
|
|
240
240
|
data=data,
|
|
241
241
|
)
|
|
242
242
|
)
|
|
243
|
+
|
|
243
244
|
except Exception as e:
|
|
244
245
|
logger.error(
|
|
245
|
-
f"Error while decoding message for partition {partition}: {e} at offset {message.offset()}"
|
|
246
|
+
f"Error while decoding message for partition {message.partition()}: {e} at offset {message.offset()}"
|
|
246
247
|
)
|
|
247
248
|
continue
|
|
248
249
|
|
|
249
|
-
# Update the offset for the partition
|
|
250
|
-
if encoded_messages:
|
|
251
|
-
topic_offsets.set_partition_offset(partition, encoded_messages[-1].offset() + 1)
|
|
252
|
-
else:
|
|
253
|
-
logger.warning(f"No new messages found for partition {partition}")
|
|
254
|
-
|
|
255
|
-
consumer.close()
|
|
256
|
-
|
|
257
250
|
return records
|
|
258
251
|
|
|
259
252
|
def read_topic(self, pagination: dict = None) -> SourceIteration:
|
|
260
253
|
nb_partitions = self.get_number_of_partitions()
|
|
261
254
|
|
|
262
255
|
# Setup offset_pagination
|
|
263
|
-
topic_offsets = TopicOffsets.model_validate(pagination) if pagination else self.get_offset_partitions()
|
|
256
|
+
self.topic_offsets = TopicOffsets.model_validate(pagination) if pagination else self.get_offset_partitions()
|
|
264
257
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
records.extend(partition_records)
|
|
258
|
+
self.consumer.assign(
|
|
259
|
+
[
|
|
260
|
+
TopicPartition(self.config.topic, partition, self.topic_offsets.get_partition_offset(partition))
|
|
261
|
+
for partition in range(nb_partitions)
|
|
262
|
+
]
|
|
263
|
+
)
|
|
272
264
|
|
|
265
|
+
t1 = datetime.now()
|
|
266
|
+
encoded_messages = self.consumer.consume(self.config.batch_size, timeout=self.config.consumer_timeout)
|
|
267
|
+
logger.info(f"Read Kafka: {len(encoded_messages)} messages in {datetime.now() - t1}")
|
|
268
|
+
|
|
269
|
+
records = self.parse_encoded_messages(encoded_messages)
|
|
270
|
+
|
|
271
|
+
# Update the offset for the partition
|
|
273
272
|
if not records:
|
|
274
273
|
logger.info("No new records found, stopping iteration")
|
|
275
274
|
return SourceIteration(
|
|
@@ -278,7 +277,7 @@ class KafkaSource(AbstractSource):
|
|
|
278
277
|
)
|
|
279
278
|
|
|
280
279
|
return SourceIteration(
|
|
281
|
-
next_pagination=topic_offsets.model_dump(),
|
|
280
|
+
next_pagination=self.topic_offsets.model_dump(),
|
|
282
281
|
records=records,
|
|
283
282
|
)
|
|
284
283
|
|
|
@@ -4,6 +4,6 @@ from bizon.engine.engine import RunnerFactory
|
|
|
4
4
|
|
|
5
5
|
if __name__ == "__main__":
|
|
6
6
|
runner = RunnerFactory.create_from_yaml(
|
|
7
|
-
filepath=os.path.abspath("bizon/sources/kafka/config/
|
|
7
|
+
filepath=os.path.abspath("bizon/sources/kafka/config/kafka_teams_users.yml")
|
|
8
8
|
)
|
|
9
9
|
runner.run()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: bizon
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.11
|
|
4
4
|
Summary: Extract and load your data reliably from API Clients with native fault-tolerant and checkpointing mechanism.
|
|
5
5
|
Author: Antoine Balliet
|
|
6
6
|
Author-email: antoine.balliet@gmail.com
|
|
@@ -31,6 +31,7 @@ Requires-Dist: loguru (>=0.7.2,<0.8.0)
|
|
|
31
31
|
Requires-Dist: pandas (>=2.2.2,<3.0.0) ; extra == "bigquery"
|
|
32
32
|
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
|
|
33
33
|
Requires-Dist: pika (>=1.3.2,<2.0.0) ; extra == "rabbitmq"
|
|
34
|
+
Requires-Dist: protobuf (==4.24.0)
|
|
34
35
|
Requires-Dist: psycopg2-binary (>=2.9.9,<3.0.0) ; extra == "postgres"
|
|
35
36
|
Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
|
|
36
37
|
Requires-Dist: pydantic (>=2.8.2,<3.0.0)
|
|
@@ -4,18 +4,21 @@ bizon/cli/main.py,sha256=QOTy8nrfj7bJSXkd98OywKpYUD3zShsBUxBiodmeku8,3212
|
|
|
4
4
|
bizon/cli/utils.py,sha256=aZ47YjFfifHkW95bAVzWfEQD3ZnxGSMT32bkRLmc5-c,953
|
|
5
5
|
bizon/common/errors/backoff.py,sha256=z7RkQt1Npdh0sfD3hBDaiWQKe4iqS6ewvT1Q4Fds5aU,508
|
|
6
6
|
bizon/common/errors/errors.py,sha256=mrYx1uE2kOuR2pEaB7ztK1l2m0E4V-_-hxq-DuILerY,682
|
|
7
|
-
bizon/common/models.py,sha256=
|
|
7
|
+
bizon/common/models.py,sha256=SE06zZjvJbljyR0nTB0wmVotQS-YDr9Y1OGbLT7XVek,1869
|
|
8
8
|
bizon/destinations/bigquery/config/bigquery.example.yml,sha256=mvKtFS_PUuekyMh9xssuwRfFwLtR-rVvpIy5xmF5__k,1261
|
|
9
9
|
bizon/destinations/bigquery/src/config.py,sha256=QlD-FdBJ8Q6nKPrOf5q28lHnyFE8khT41dSR1s2meeM,1378
|
|
10
10
|
bizon/destinations/bigquery/src/destination.py,sha256=tPxE0IpHbR4zDkW5HaiHkgeDRDY2AibIPzY9iftZ2Uc,11079
|
|
11
|
+
bizon/destinations/bigquery_streaming/src/config.py,sha256=6NtsOJJ8rl0U96DpQdYvE50i8Wry6d8OiuBR1FKHjNE,1582
|
|
12
|
+
bizon/destinations/bigquery_streaming/src/destination.py,sha256=6sSSdhqv4LIMbrIDcInKed0pNR0r_qKU3eG2BCX0JYQ,7322
|
|
13
|
+
bizon/destinations/bigquery_streaming/src/proto_utils.py,sha256=n6OP5KEnyVdl17NjCxVPNsV7zewp1fbYDckbaHFrIUM,3305
|
|
11
14
|
bizon/destinations/buffer.py,sha256=bFYkaoge-3AyKfGolqsuB3PWWtdPt65Fllrz-3X_uMI,2594
|
|
12
|
-
bizon/destinations/config.py,sha256=
|
|
13
|
-
bizon/destinations/destination.py,sha256=
|
|
15
|
+
bizon/destinations/config.py,sha256=gBiEfVYARlH9BcSKI5Fb6rLkSUfIElf39MeMdk0HJOc,1732
|
|
16
|
+
bizon/destinations/destination.py,sha256=D6EMs_sq3HG2x9yR54aknDgx3TrU2VbxfmzpH1GWVCc,11795
|
|
14
17
|
bizon/destinations/file/src/config.py,sha256=C4BBIKzBH5343iLGR3aCubAGjPo0b2LegsCLjb77uFA,513
|
|
15
18
|
bizon/destinations/file/src/destination.py,sha256=1VCrVdtzAzwSKgYq0JUOc3r2cM7314dV-eIoAFhM_64,1003
|
|
16
19
|
bizon/destinations/logger/src/config.py,sha256=AWY3R9q3ZjD3uQ_KBq8VcW60deKSIHe3qtgCKjdywKk,433
|
|
17
20
|
bizon/destinations/logger/src/destination.py,sha256=xTt03F3AMI9KhQno2tGoCr3eacrO62qjnOlpeEHk6tQ,868
|
|
18
|
-
bizon/destinations/models.py,sha256=
|
|
21
|
+
bizon/destinations/models.py,sha256=w2wZy8alcqc89rdmrwDUWxVZvI01ON7m6rcidKqk-Cc,4834
|
|
19
22
|
bizon/engine/backend/adapters/sqlalchemy/backend.py,sha256=R0CztRGc3_6PdIIgbbrDYD2OJRNhq9PPmD6PYK7-fjk,15567
|
|
20
23
|
bizon/engine/backend/adapters/sqlalchemy/config.py,sha256=K-FpE_-VHnTSAQOduouhXFVy43EkrKbeZLqr9_OfeMw,1846
|
|
21
24
|
bizon/engine/backend/backend.py,sha256=Bodqoo5qJHV0H2zJJeGytaHGiNZmBjnLBxiRgq6M3kE,5844
|
|
@@ -25,20 +28,20 @@ bizon/engine/config.py,sha256=cKgI1IfzDncoxG3FsKUz-Aa3fU41ucQPaafjjhKeU90,2039
|
|
|
25
28
|
bizon/engine/engine.py,sha256=bdQksSQfxkeAHbbe52_MbqTJieOURjlMGYtkCCaDtuc,990
|
|
26
29
|
bizon/engine/pipeline/consumer.py,sha256=HU3G2_h5ZUM217mnKSktdvib2nRc9r8OzvqWodRdFk0,424
|
|
27
30
|
bizon/engine/pipeline/models.py,sha256=kfr_kqkJMEVlWX35rJiYMCuEBCrNhsx9R0a19E39i14,216
|
|
28
|
-
bizon/engine/pipeline/producer.py,sha256=
|
|
31
|
+
bizon/engine/pipeline/producer.py,sha256=k0dzSa6_7PiTJF0UtX0BAfvSTS0h7XFVZ3JN_3_ZvQQ,8330
|
|
29
32
|
bizon/engine/queue/adapters/kafka/config.py,sha256=o7GAb_ls9N0nQV04B6Y4XjLo-Q57x28r63gjFG9LvVg,1091
|
|
30
33
|
bizon/engine/queue/adapters/kafka/consumer.py,sha256=mh25mTjO7w6CGwJDWtxHVocwZi6DbTIVncm81rmhKrw,2576
|
|
31
|
-
bizon/engine/queue/adapters/kafka/queue.py,sha256=
|
|
34
|
+
bizon/engine/queue/adapters/kafka/queue.py,sha256=IS6akN7F81lkAajQdgqSqlqAg3r8uXbw6SdByDgvdMM,1965
|
|
32
35
|
bizon/engine/queue/adapters/python_queue/config.py,sha256=D_CAuWJtdMQmQcm9gq9YBrkeFHAxZKRc7kIISliyp_4,847
|
|
33
36
|
bizon/engine/queue/adapters/python_queue/consumer.py,sha256=yEoDF6QEmr9gjNGxXRqypdIHIJ50lQh_fFDhDXk6_g8,1566
|
|
34
|
-
bizon/engine/queue/adapters/python_queue/queue.py,sha256=
|
|
37
|
+
bizon/engine/queue/adapters/python_queue/queue.py,sha256=VVc5A7qU2wgWEeeG6UOmgkmoIiwZ7GZGjSiBThloFzk,2259
|
|
35
38
|
bizon/engine/queue/adapters/rabbitmq/config.py,sha256=9N_7WREvNjJgcNTC3Y2kHII-iId2MZa3ssHHks6PyAs,987
|
|
36
39
|
bizon/engine/queue/adapters/rabbitmq/consumer.py,sha256=cN6K8wSBIQUSuRD7VsNltS6ElZ32PW92ZXiugzIDPJU,2019
|
|
37
|
-
bizon/engine/queue/adapters/rabbitmq/queue.py,sha256=
|
|
40
|
+
bizon/engine/queue/adapters/rabbitmq/queue.py,sha256=gaTCIY_mCfWt8LCjfEymZuIiwqPkYQoVvaOacRYgLJo,1709
|
|
38
41
|
bizon/engine/queue/config.py,sha256=PN9Je_Q9Sxo-3fI8lI6rZPQ9oeWatnD4rzUTWj3NhnA,792
|
|
39
|
-
bizon/engine/queue/queue.py,sha256=
|
|
42
|
+
bizon/engine/queue/queue.py,sha256=_pOhmDZs79V7XgjthrKsFxZvae_4_cvol97jcZ-YR3g,2926
|
|
40
43
|
bizon/engine/runner/adapters/process.py,sha256=idyknLADcmhCS4614WtyO-FqaYChV243gvjzPWvk0KE,2525
|
|
41
|
-
bizon/engine/runner/adapters/thread.py,sha256=
|
|
44
|
+
bizon/engine/runner/adapters/thread.py,sha256=QyDW-D8fkpYFefKth7OQoDRzURuxYdX7on2NephBLzY,2683
|
|
42
45
|
bizon/engine/runner/config.py,sha256=QPgfy6YnS-EW8nhpTg1aRHshbGz5QTrQ5R3pDmLkIE0,1272
|
|
43
46
|
bizon/engine/runner/runner.py,sha256=1njU4KoFPhkP_oMWewH5bWTxt38Vhz-Y4e5hpcl2jF4,7812
|
|
44
47
|
bizon/source/auth/authenticators/abstract_oauth.py,sha256=gJ40Sbrt0lnHfLupzkzOvUmse3X0Fp2XRHHqjqnVXdI,5274
|
|
@@ -51,7 +54,7 @@ bizon/source/auth/builder.py,sha256=hc4zBNj31LZc-QqgIyx1VQEYTm9Xv81vY5pJiwQroJo,
|
|
|
51
54
|
bizon/source/auth/config.py,sha256=2jjcBLP95XsCkfKxdUei4X2yHI2WX92lJb8D8Txw86g,750
|
|
52
55
|
bizon/source/config.py,sha256=DPwJsBfU48yMvCw-pQCEha4X-IUjvmnQzjTwgsaCxAA,2307
|
|
53
56
|
bizon/source/cursor.py,sha256=TSgWe1T9b4x7EEsbk22hwTWwVXCk5vdrs9eaHNhrevo,3983
|
|
54
|
-
bizon/source/discover.py,sha256=
|
|
57
|
+
bizon/source/discover.py,sha256=C0_SnFxeHpz4VernxAfu2gbnQuoqv0cWX9z5J3WlCKw,11120
|
|
55
58
|
bizon/source/models.py,sha256=iVp0H4muOWGst1W5DuxEVtHIY6lewOV8zDZUqvPTcBk,1337
|
|
56
59
|
bizon/source/session.py,sha256=z4dZlKC_PD8w_utTuAqs1vsfGuRkxHh5WQZhVKamNd0,1979
|
|
57
60
|
bizon/source/source.py,sha256=NhxMU1yXgi7mL64RyeymOYNqRk6fad9v_S8lhvXYUI0,3390
|
|
@@ -76,16 +79,16 @@ bizon/sources/hubspot/src/hubspot_objects.py,sha256=EmABx9XD8q6g4Uc5mHLv5YYl5KcI
|
|
|
76
79
|
bizon/sources/hubspot/src/models/hs_object.py,sha256=-Y20H3-nenJyySMlvM4TPttPz4O8qm3ArKP_I8pxsuo,1235
|
|
77
80
|
bizon/sources/hubspot/tests/hubspot_pipeline.py,sha256=e6dCF5_MHMySkeiF6kKrSAuCa_48J22-ZeSCZSjrfUI,216
|
|
78
81
|
bizon/sources/kafka/config/kafka.example.yml,sha256=ZyHBmSWZ_5WQaBr9WzD05PuE6vi3hhYgHh2VZ-IU-Iw,755
|
|
79
|
-
bizon/sources/kafka/src/source.py,sha256=
|
|
80
|
-
bizon/sources/kafka/tests/kafka_pipeline.py,sha256=
|
|
82
|
+
bizon/sources/kafka/src/source.py,sha256=wPCtrQ7qolaRzOYPUvzp6vuBSdx1I5FBniKjqNyYYJ8,10972
|
|
83
|
+
bizon/sources/kafka/tests/kafka_pipeline.py,sha256=txi2-Tvg4Ydgk6iYp-GqDRXqWj1Sb5rrg9Q0hbBA114,238
|
|
81
84
|
bizon/sources/periscope/config/periscope_charts.example.yml,sha256=rpFDAWeU5oZ3UOiX0sSAgd1X5lv6t-s3iqiDPnRqutU,477
|
|
82
85
|
bizon/sources/periscope/config/periscope_dashboards.example.yml,sha256=sN2iGGqCQCvrMXcwxNGq_dR7-KZ1KtYdXmNYKXlfEpg,481
|
|
83
86
|
bizon/sources/periscope/src/source.py,sha256=AZM-HDDjdTWj8akeeofQ_-G8YlnNHEKi2mjEQSYwOvE,7638
|
|
84
87
|
bizon/sources/periscope/tests/periscope_pipeline_charts.py,sha256=mU0JtfhS1KmWsS3iovGhGxK7iPVWiYzjBM_QfRL3ZQI,275
|
|
85
88
|
bizon/sources/periscope/tests/periscope_pipeline_dashboard.py,sha256=vZKN7UfH-lQIWrnfjPqQFjZm28UIw2m9OSg4yS-Wckk,279
|
|
86
89
|
bizon/utils.py,sha256=HXaPiyxpWKoy3XN5vSYOve1ezlFeOYin3aFqTjcabUQ,81
|
|
87
|
-
bizon-0.0.
|
|
88
|
-
bizon-0.0.
|
|
89
|
-
bizon-0.0.
|
|
90
|
-
bizon-0.0.
|
|
91
|
-
bizon-0.0.
|
|
90
|
+
bizon-0.0.11.dist-info/LICENSE,sha256=AW7SjYVT2bBnXOxgDxqy_e_JF8jDCFlMCaPCF11wFDI,1072
|
|
91
|
+
bizon-0.0.11.dist-info/METADATA,sha256=DyUyvYgGZqHsFiMWoxaTr7OoY0GYbYs5rUx_ueMVfVo,5682
|
|
92
|
+
bizon-0.0.11.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
93
|
+
bizon-0.0.11.dist-info/entry_points.txt,sha256=wtCd-6JswSY8lPWYSvOf7ASX1zfKgmgXtgg5XQS5274,44
|
|
94
|
+
bizon-0.0.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|