bizon 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. bizon/alerting/__init__.py +0 -0
  2. bizon/alerting/alerts.py +23 -0
  3. bizon/alerting/models.py +28 -0
  4. bizon/alerting/slack/__init__.py +0 -0
  5. bizon/alerting/slack/config.py +5 -0
  6. bizon/alerting/slack/handler.py +39 -0
  7. bizon/cli/main.py +7 -3
  8. bizon/common/models.py +33 -7
  9. bizon/{destinations → connectors/destinations}/bigquery/config/bigquery.example.yml +3 -4
  10. bizon/connectors/destinations/bigquery/src/config.py +128 -0
  11. bizon/{destinations → connectors/destinations}/bigquery/src/destination.py +48 -25
  12. bizon/connectors/destinations/bigquery_streaming/src/config.py +57 -0
  13. bizon/connectors/destinations/bigquery_streaming/src/destination.py +377 -0
  14. bizon/connectors/destinations/bigquery_streaming_v2/src/config.py +57 -0
  15. bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py +446 -0
  16. bizon/{destinations/bigquery_streaming → connectors/destinations/bigquery_streaming_v2}/src/proto_utils.py +30 -36
  17. bizon/{destinations → connectors/destinations}/file/src/config.py +9 -3
  18. bizon/connectors/destinations/file/src/destination.py +56 -0
  19. bizon/{destinations → connectors/destinations}/logger/src/config.py +2 -1
  20. bizon/{destinations → connectors/destinations}/logger/src/destination.py +18 -3
  21. bizon/connectors/sources/cycle/config/cycle.example.yml +15 -0
  22. bizon/connectors/sources/cycle/src/source.py +133 -0
  23. bizon/{sources/periscope/tests/periscope_pipeline_dashboard.py → connectors/sources/cycle/tests/cycle_customers.py} +1 -1
  24. bizon/{sources → connectors/sources}/dummy/config/dummy.example.yml +2 -2
  25. bizon/{sources → connectors/sources}/dummy/src/fake_api.py +6 -1
  26. bizon/{sources → connectors/sources}/dummy/src/source.py +18 -5
  27. bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline.py +2 -2
  28. bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_bigquery_backend.py +2 -2
  29. bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_kafka.py +2 -2
  30. bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_rabbitmq.py +2 -2
  31. bizon/connectors/sources/dummy/tests/dummy_pipeline_unnest.py +29 -0
  32. bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_write_data_bigquery.py +2 -2
  33. bizon/{sources → connectors/sources}/dummy/tests/dummy_pipeline_write_data_bigquery_through_kafka.py +2 -2
  34. bizon/{sources → connectors/sources}/gsheets/config/default_auth.example.yml +4 -2
  35. bizon/{sources → connectors/sources}/gsheets/config/service_account.example.yml +4 -2
  36. bizon/{sources → connectors/sources}/hubspot/config/api_key.example.yml +4 -2
  37. bizon/{sources → connectors/sources}/hubspot/config/oauth.example.yml +4 -2
  38. bizon/{sources → connectors/sources}/hubspot/src/hubspot_objects.py +1 -1
  39. bizon/{sources → connectors/sources}/kafka/config/kafka.example.yml +3 -5
  40. bizon/connectors/sources/kafka/config/kafka_debezium.example.yml +110 -0
  41. bizon/connectors/sources/kafka/src/callback.py +18 -0
  42. bizon/connectors/sources/kafka/src/config.py +69 -0
  43. bizon/connectors/sources/kafka/src/decode.py +93 -0
  44. bizon/connectors/sources/kafka/src/source.py +381 -0
  45. bizon/connectors/sources/kafka/tests/kafka_pipeline.py +7 -0
  46. bizon/connectors/sources/periscope/config/periscope_charts.example.yml +20 -0
  47. bizon/connectors/sources/periscope/config/periscope_dashboards.example.yml +20 -0
  48. bizon/{sources → connectors/sources}/periscope/src/source.py +137 -13
  49. bizon/connectors/sources/periscope/tests/periscope_pipeline_dashboard.py +9 -0
  50. bizon/connectors/sources/pokeapi/config/pokeapi_pokemon_to_json.example.yml +19 -0
  51. bizon/connectors/sources/pokeapi/config/pokeapi_pokemon_to_logger.example.yml +10 -0
  52. bizon/connectors/sources/pokeapi/src/source.py +79 -0
  53. bizon/{destinations → destination}/buffer.py +5 -0
  54. bizon/destination/config.py +83 -0
  55. bizon/{destinations → destination}/destination.py +103 -15
  56. bizon/engine/backend/adapters/sqlalchemy/backend.py +3 -1
  57. bizon/engine/engine.py +20 -1
  58. bizon/engine/pipeline/consumer.py +73 -5
  59. bizon/engine/pipeline/models.py +8 -3
  60. bizon/engine/pipeline/producer.py +18 -9
  61. bizon/engine/queue/adapters/kafka/consumer.py +2 -2
  62. bizon/engine/queue/adapters/kafka/queue.py +3 -2
  63. bizon/engine/queue/adapters/python_queue/consumer.py +40 -23
  64. bizon/engine/queue/adapters/python_queue/queue.py +19 -9
  65. bizon/engine/queue/adapters/rabbitmq/consumer.py +3 -6
  66. bizon/engine/queue/adapters/rabbitmq/queue.py +3 -2
  67. bizon/engine/queue/config.py +16 -0
  68. bizon/engine/queue/queue.py +17 -16
  69. bizon/engine/runner/adapters/process.py +15 -2
  70. bizon/engine/runner/adapters/streaming.py +121 -0
  71. bizon/engine/runner/adapters/thread.py +32 -9
  72. bizon/engine/runner/config.py +28 -0
  73. bizon/engine/runner/runner.py +113 -24
  74. bizon/monitoring/__init__.py +0 -0
  75. bizon/monitoring/config.py +39 -0
  76. bizon/monitoring/datadog/__init__.py +0 -0
  77. bizon/monitoring/datadog/monitor.py +153 -0
  78. bizon/monitoring/monitor.py +71 -0
  79. bizon/monitoring/noop/__init__.py +0 -0
  80. bizon/monitoring/noop/monitor.py +30 -0
  81. bizon/source/callback.py +24 -0
  82. bizon/source/config.py +3 -3
  83. bizon/source/cursor.py +1 -1
  84. bizon/source/discover.py +4 -3
  85. bizon/source/models.py +4 -2
  86. bizon/source/source.py +10 -2
  87. bizon/transform/config.py +8 -0
  88. bizon/transform/transform.py +48 -0
  89. {bizon-0.1.0.dist-info → bizon-0.1.2.dist-info}/METADATA +23 -6
  90. bizon-0.1.2.dist-info/RECORD +123 -0
  91. {bizon-0.1.0.dist-info → bizon-0.1.2.dist-info}/WHEEL +1 -1
  92. bizon/destinations/bigquery/src/config.py +0 -51
  93. bizon/destinations/bigquery_streaming/src/config.py +0 -43
  94. bizon/destinations/bigquery_streaming/src/destination.py +0 -154
  95. bizon/destinations/config.py +0 -47
  96. bizon/destinations/file/src/destination.py +0 -27
  97. bizon/sources/kafka/src/source.py +0 -357
  98. bizon/sources/kafka/tests/kafka_pipeline.py +0 -9
  99. bizon/sources/periscope/config/periscope_charts.example.yml +0 -26
  100. bizon/sources/periscope/config/periscope_dashboards.example.yml +0 -26
  101. bizon-0.1.0.dist-info/RECORD +0 -93
  102. /bizon/{sources → connectors/sources}/gsheets/src/source.py +0 -0
  103. /bizon/{sources → connectors/sources}/gsheets/tests/gsheets_pipeline.py +0 -0
  104. /bizon/{sources → connectors/sources}/hubspot/src/hubspot_base.py +0 -0
  105. /bizon/{sources → connectors/sources}/hubspot/src/models/hs_object.py +0 -0
  106. /bizon/{sources → connectors/sources}/hubspot/tests/hubspot_pipeline.py +0 -0
  107. /bizon/{sources → connectors/sources}/periscope/tests/periscope_pipeline_charts.py +0 -0
  108. /bizon/{destinations → destination}/models.py +0 -0
  109. {bizon-0.1.0.dist-info → bizon-0.1.2.dist-info}/LICENSE +0 -0
  110. {bizon-0.1.0.dist-info → bizon-0.1.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,133 @@
1
+ from typing import Any, List, Tuple
2
+
3
+ from pydantic import Field
4
+ from requests.auth import AuthBase
5
+
6
+ from bizon.source.auth.builder import AuthBuilder
7
+ from bizon.source.auth.config import AuthType
8
+ from bizon.source.config import SourceConfig
9
+ from bizon.source.models import SourceIteration, SourceRecord
10
+ from bizon.source.source import AbstractSource
11
+
12
+
13
+ class CycleSourceConfig(SourceConfig):
14
+ slug: str = Field(..., description="Slug of the Cycle account")
15
+
16
+
17
+ class CycleSource(AbstractSource):
18
+ def __init__(self, config: CycleSourceConfig):
19
+ super().__init__(config)
20
+ self.config: CycleSourceConfig = config
21
+ self.url_graphql = "https://api.product.cycle.app/graphql"
22
+
23
+ def get_authenticator(self) -> AuthBase:
24
+ if self.config.authentication.type.value == AuthType.API_KEY:
25
+ return AuthBuilder.token(params=self.config.authentication.params)
26
+
27
+ @staticmethod
28
+ def streams() -> List[str]:
29
+ return ["customers"]
30
+
31
+ @staticmethod
32
+ def get_config_class() -> SourceConfig:
33
+ return CycleSourceConfig
34
+
35
+ def check_connection(self) -> Tuple[bool | Any | None]:
36
+ return True, None
37
+
38
+ def get_total_records_count(self) -> int | None:
39
+ return None
40
+
41
+ def run_graphql_query(self, query: str, variables: dict) -> dict:
42
+ """Run a graphql query and return the response"""
43
+
44
+ payload = {"query": query, "variables": variables}
45
+
46
+ response = self.session.post(self.url_graphql, json=payload)
47
+
48
+ data = response.json()
49
+ return data
50
+
51
+ def _get_pagination_str(self, pagination: dict) -> str:
52
+ if not pagination:
53
+ pagination_str = """
54
+ size: 100
55
+ """
56
+ else:
57
+ pagination_str = """
58
+ size: 100
59
+ where: {
60
+ cursor: "PAGINATION_CURSOR"
61
+ direction: AFTER
62
+ }
63
+ """.replace(
64
+ "PAGINATION_CURSOR", pagination.get("endCursor")
65
+ )
66
+
67
+ return pagination_str
68
+
69
+ def get_customers(self, pagination: dict) -> SourceIteration:
70
+ """Return all customers for the given slug"""
71
+
72
+ pagination_str = self._get_pagination_str(pagination=pagination)
73
+
74
+ query = """
75
+ query Customers($slug: DefaultString!) {
76
+ getProductBySlug(slug: $slug) {
77
+ customers(pagination: {
78
+ PAGINATION_STRING
79
+ }) {
80
+ edges {
81
+ cursor
82
+ node {
83
+ id
84
+ email
85
+ name
86
+ company {
87
+ domain
88
+ id
89
+ name
90
+ }
91
+ }
92
+ }
93
+ pageInfo {
94
+ hasPreviousPage
95
+ hasNextPage
96
+ startCursor
97
+ endCursor
98
+ }
99
+ }
100
+ }
101
+ }
102
+ """.replace(
103
+ "PAGINATION_STRING", pagination_str
104
+ )
105
+
106
+ variables = {"slug": self.config.slug}
107
+
108
+ data = self.run_graphql_query(query, variables)
109
+
110
+ # Parse edges from response
111
+ edges = data.get("data", {}).get("getProductBySlug", {}).get("customers", {}).get("edges", [])
112
+
113
+ records = []
114
+ for customer in edges:
115
+ customer_data = customer.get("node", {})
116
+ records.append(
117
+ SourceRecord(
118
+ id=customer_data["id"],
119
+ data=customer_data,
120
+ )
121
+ )
122
+
123
+ # Get pagination info from response
124
+ pagination_info = data.get("data", {}).get("getProductBySlug", {}).get("customers", {}).get("pageInfo", {})
125
+ next_pagination = pagination_info if pagination_info.get("hasNextPage") else {}
126
+
127
+ return SourceIteration(records=records, next_pagination=next_pagination)
128
+
129
+ def get(self, pagination: dict = None) -> SourceIteration:
130
+ if self.config.stream == "customers":
131
+ return self.get_customers(pagination)
132
+
133
+ raise NotImplementedError(f"Stream {self.config.stream} not implemented for Cycle")
@@ -3,7 +3,7 @@ import os
3
3
  from bizon.cli.utils import parse_from_yaml
4
4
  from bizon.engine.engine import RunnerFactory
5
5
 
6
- config = parse_from_yaml(os.path.abspath("bizon/sources/periscope/config/periscope_dashboards.yml"))
6
+ config = parse_from_yaml(os.path.abspath("bizon/connectors/sources/cycle/config/cycle.yml"))
7
7
 
8
8
  runner = RunnerFactory.create_from_config_dict(config=config)
9
9
  runner.run()
@@ -1,8 +1,8 @@
1
1
  name: dummy to logger
2
2
 
3
3
  source:
4
- source_name: dummy
5
- stream_name: creatures
4
+ name: dummy
5
+ stream: creatures
6
6
  authentication:
7
7
  type: api_key
8
8
  params:
@@ -1,5 +1,10 @@
1
+ import time
2
+
3
+
1
4
  # Function emulating an API call to a source endpoint
2
- def fake_api_call(url: str, cursor: str = None) -> dict:
5
+ def fake_api_call(url: str, cursor: str = None, sleep: int = None) -> dict:
6
+ if sleep:
7
+ time.sleep(sleep)
3
8
  if url == "https://api.dummy.com/v1/creatures":
4
9
  return fake_api_call_creatures(cursor)
5
10
 
@@ -1,4 +1,5 @@
1
- from typing import List, Literal, Tuple, Type, Union
1
+ import random
2
+ from typing import List, Literal, Tuple, Union
2
3
 
3
4
  from pydantic import Field
4
5
  from requests.auth import AuthBase
@@ -7,7 +8,7 @@ from bizon.source.auth.authenticators.oauth import Oauth2AuthParams
7
8
  from bizon.source.auth.authenticators.token import TokenAuthParams
8
9
  from bizon.source.auth.builder import AuthBuilder
9
10
  from bizon.source.auth.config import AuthConfig, AuthType
10
- from bizon.source.config import SourceConfig
11
+ from bizon.source.config import SourceConfig, SourceSyncModes
11
12
  from bizon.source.models import SourceIteration, SourceRecord
12
13
  from bizon.source.source import AbstractSource
13
14
 
@@ -23,12 +24,14 @@ class DummyAuthConfig(AuthConfig):
23
24
 
24
25
  class DummySourceConfig(SourceConfig):
25
26
  authentication: DummyAuthConfig
27
+ sleep: int = Field(0, description="Sleep time in seconds between API calls")
26
28
 
27
29
 
28
30
  class DummySource(AbstractSource):
29
31
 
30
32
  def __init__(self, config: DummySourceConfig):
31
33
  super().__init__(config)
34
+ self.config = config
32
35
 
33
36
  @staticmethod
34
37
  def streams() -> List[str]:
@@ -40,7 +43,7 @@ class DummySource(AbstractSource):
40
43
 
41
44
  @property
42
45
  def url_entity(self) -> str:
43
- return f"https://api.dummy.com/v1/{self.config.stream_name}"
46
+ return f"https://api.dummy.com/v1/{self.config.stream}"
44
47
 
45
48
  def get_authenticator(self) -> AuthBase:
46
49
 
@@ -71,11 +74,11 @@ class DummySource(AbstractSource):
71
74
 
72
75
  # If no pagination data is passed, we want to reach first page
73
76
  if not pagination:
74
- response = fake_api_call(url=self.url_entity)
77
+ response = fake_api_call(url=self.url_entity, sleep=self.config.sleep)
75
78
 
76
79
  # If we have pagination data we pass it to the API
77
80
  else:
78
- response = fake_api_call(url=self.url_entity, cursor=pagination.get("cursor"))
81
+ response = fake_api_call(url=self.url_entity, cursor=pagination.get("cursor"), sleep=self.config.sleep)
79
82
 
80
83
  # Now we process the response to:
81
84
  # - allow bizon to process the records and write them to destination
@@ -87,6 +90,15 @@ class DummySource(AbstractSource):
87
90
 
88
91
  next_pagination = {"cursor": next_cursor} if next_cursor else {}
89
92
 
93
+ destination_id = None
94
+
95
+ # If we are in streaming mode, we need to get the destination id from the stream name
96
+ if self.config.sync_mode == SourceSyncModes.STREAM:
97
+ if next_pagination.get("cursor") == "final-cursor":
98
+ destination_id = "routed"
99
+ else:
100
+ destination_id = self.config.stream
101
+
90
102
  if records:
91
103
  return SourceIteration(
92
104
  next_pagination=next_pagination,
@@ -94,6 +106,7 @@ class DummySource(AbstractSource):
94
106
  SourceRecord(
95
107
  id=record["id"],
96
108
  data=record,
109
+ destination_id=destination_id,
97
110
  )
98
111
  for record in records
99
112
  ],
@@ -6,8 +6,8 @@ config_yaml = """
6
6
  name: dummy to logger
7
7
 
8
8
  source:
9
- source_name: dummy
10
- stream_name: creatures
9
+ name: dummy
10
+ stream: creatures
11
11
  authentication:
12
12
  type: api_key
13
13
  params:
@@ -4,8 +4,8 @@ from bizon.engine.engine import RunnerFactory
4
4
 
5
5
  config_yaml = """
6
6
  source:
7
- source_name: dummy
8
- stream_name: creatures
7
+ name: dummy
8
+ stream: creatures
9
9
  force_ignore_checkpoint: true
10
10
  authentication:
11
11
  type: api_key
@@ -4,8 +4,8 @@ from bizon.engine.engine import RunnerFactory
4
4
 
5
5
  config_yaml = """
6
6
  source:
7
- source_name: dummy
8
- stream_name: creatures
7
+ name: dummy
8
+ stream: creatures
9
9
  authentication:
10
10
  type: api_key
11
11
  params:
@@ -4,8 +4,8 @@ from bizon.engine.engine import RunnerFactory
4
4
 
5
5
  config_yaml = """
6
6
  source:
7
- source_name: dummy
8
- stream_name: creatures
7
+ name: dummy
8
+ stream: creatures
9
9
  authentication:
10
10
  type: api_key
11
11
  params:
@@ -0,0 +1,29 @@
1
+ from yaml import safe_load
2
+
3
+ from bizon.engine.engine import RunnerFactory
4
+
5
+ config_yaml = """
6
+ name: test_job
7
+
8
+ source:
9
+ name: dummy
10
+ stream: creatures
11
+ authentication:
12
+ type: api_key
13
+ params:
14
+ token: dummy_key
15
+
16
+ destination:
17
+ name: logger
18
+ config:
19
+ dummy: dummy
20
+
21
+ transforms:
22
+ - label: failure_transform
23
+ python: |
24
+ data['cookies'] = data['key_that_does_not_exist'].upper()
25
+ """
26
+
27
+ config = safe_load(config_yaml)
28
+ runner = RunnerFactory.create_from_config_dict(config=config)
29
+ runner.run()
@@ -4,8 +4,8 @@ from bizon.engine.engine import RunnerFactory
4
4
 
5
5
  config_yaml = """
6
6
  source:
7
- source_name: dummy
8
- stream_name: creatures
7
+ name: dummy
8
+ stream: creatures
9
9
  authentication:
10
10
  type: api_key
11
11
  params:
@@ -4,8 +4,8 @@ from bizon.engine.engine import RunnerFactory
4
4
 
5
5
  config_yaml = """
6
6
  source:
7
- source_name: dummy
8
- stream_name: creatures
7
+ name: dummy
8
+ stream: creatures
9
9
  authentication:
10
10
  type: api_key
11
11
  params:
@@ -1,6 +1,8 @@
1
+ name: gsheets to logger example
2
+
1
3
  source:
2
- source_name: gsheets
3
- stream_name: worksheet
4
+ name: gsheets
5
+ stream: worksheet
4
6
  spreadsheet_url: <MY_SPREADSHEET_URL>
5
7
  worksheet_name: Sheet1
6
8
 
@@ -1,6 +1,8 @@
1
+ name: gsheets to logger example
2
+
1
3
  source:
2
- source_name: gsheets
3
- stream_name: worksheet
4
+ name: gsheets
5
+ stream: worksheet
4
6
  spreadsheet_url: <MY_SPREADSHEET_URL>
5
7
  worksheet_name: Sheet1
6
8
  service_account_key: >-
@@ -1,6 +1,8 @@
1
+ name: hubspot contacts to logger
2
+
1
3
  source:
2
- source_name: hubspot
3
- stream_name: contacts
4
+ name: hubspot
5
+ stream: contacts
4
6
  properties:
5
7
  strategy: all
6
8
  authentication:
@@ -1,6 +1,8 @@
1
+ name: hubspot contacts to bigquery
2
+
1
3
  source:
2
- source_name: hubspot
3
- stream_name: contacts
4
+ name: hubspot
5
+ stream: contacts
4
6
  properties:
5
7
  strategy: all
6
8
  authentication:
@@ -40,7 +40,7 @@ class HubSpotObjectsSource(HubSpotBaseSource):
40
40
  def __init__(self, config: HubSpotSourceConfig):
41
41
  super().__init__(config)
42
42
  self.config: HubSpotSourceConfig = config
43
- self.object = self.config.stream_name
43
+ self.object = self.config.stream
44
44
  self.selected_properties = [] # Initialize properties to empty list
45
45
 
46
46
  # If we are initializing the pipeline, we retrieve the selected properties from HubSpot
@@ -1,15 +1,13 @@
1
1
  name: demo kafka to bigquery
2
2
 
3
3
  source:
4
- source_name: kafka
5
- stream_name: topic
4
+ name: kafka
5
+ stream: topic
6
6
 
7
7
  sync_mode: full_refresh
8
8
 
9
9
  topic: my-topic
10
10
 
11
- nb_bytes_schema_id: 8
12
-
13
11
  batch_size: 1000
14
12
  consumer_timeout: 10
15
13
  bootstrap_servers: <bootstrap-severs>:9092
@@ -47,4 +45,4 @@ destination:
47
45
  # syncCursorInDBEvery: 100
48
46
 
49
47
  # runner:
50
- # log_level: INFO
48
+ # log_level: INFO
@@ -0,0 +1,110 @@
1
+ name: Kafka debezium messages to bigquery streaming
2
+
3
+ source:
4
+ name: kafka
5
+ stream: topic
6
+
7
+ sync_mode: full_refresh
8
+
9
+ force_ignore_checkpoint: true
10
+
11
+ topic: <TOPIC_NAME>
12
+
13
+ batch_size: 1000
14
+ consumer_timeout: 10
15
+ bootstrap_servers: <BOOTSTRAP_SERVERS>
16
+ group_id: <GROUP_ID>
17
+
18
+ authentication:
19
+ type: basic
20
+
21
+ schema_registry_url: <SCHEMA_REGISTRY_URL>
22
+ schema_registry_username: <SCHEMA_REGISTRY_USERNAME>
23
+ schema_registry_password: <SCHEMA_REGISTRY_PASSWORD>
24
+
25
+ params:
26
+ username: <USERNAME>
27
+ password: <PASSWORD>
28
+
29
+ destination:
30
+ name: bigquery_streaming
31
+
32
+ config:
33
+ buffer_size: 50
34
+ bq_max_rows_per_request: 10000
35
+ buffer_flush_timeout: 30
36
+
37
+ table_id: <TABLE_ID>
38
+ dataset_id: <DATASET_ID>
39
+ dataset_location: US
40
+ project_id: <PROJECT_ID>
41
+
42
+ unnest: true
43
+
44
+ time_partitioning:
45
+ # Mandatory if unnested
46
+ field: __event_timestamp
47
+
48
+ record_schema:
49
+ - name: account_id
50
+ type: INTEGER
51
+ mode: REQUIRED
52
+
53
+ - name: team_id
54
+ type: INTEGER
55
+ mode: REQUIRED
56
+
57
+ - name: user_id
58
+ type: INTEGER
59
+ mode: REQUIRED
60
+
61
+ - name: __deleted
62
+ type: BOOLEAN
63
+ mode: NULLABLE
64
+
65
+ - name: __cluster
66
+ type: STRING
67
+ mode: NULLABLE
68
+
69
+ - name: __kafka_partition
70
+ type: INTEGER
71
+ mode: NULLABLE
72
+
73
+ - name: __kafka_offset
74
+ type: INTEGER
75
+ mode: NULLABLE
76
+
77
+ - name: __event_timestamp
78
+ type: TIMESTAMP
79
+ mode: NULLABLE
80
+
81
+ transforms:
82
+ - label: debezium
83
+ python: |
84
+ from datetime import datetime
85
+
86
+ cluster = data['value']['source']['name'].replace('_', '-')
87
+ partition = data['partition']
88
+ offset = data['offset']
89
+
90
+ kafka_timestamp = datetime.utcfromtimestamp(data['value']['source']['ts_ms'] / 1000).strftime('%Y-%m-%d %H:%M:%S.%f')
91
+
92
+ deleted = False
93
+
94
+ if data['value']['op'] == 'd':
95
+ data = data['value']['before']
96
+ deleted = True
97
+ else:
98
+ data = data['value']['after']
99
+
100
+ data['__deleted'] = deleted
101
+ data['__cluster'] = cluster
102
+ data['__kafka_partition'] = partition
103
+ data['__kafka_offset'] = offset
104
+ data['__event_timestamp'] = kafka_timestamp
105
+
106
+ engine:
107
+ queue:
108
+ type: python_queue
109
+ config:
110
+ max_nb_messages: 1000000
@@ -0,0 +1,18 @@
1
+ from typing import List
2
+
3
+ from bizon.source.callback import AbstractSourceCallback
4
+ from bizon.source.models import SourceIteration
5
+
6
+ from .config import KafkaSourceConfig
7
+
8
+
9
+ class KafkaSourceCallback(AbstractSourceCallback):
10
+ def __init__(self, config: KafkaSourceConfig):
11
+ super().__init__(config)
12
+
13
+ def on_iterations_written(self, iterations: List[SourceIteration]):
14
+ """Commit the offsets of the iterations"""
15
+
16
+ # TODO: Implement the callback
17
+
18
+ pass
@@ -0,0 +1,69 @@
1
+ from enum import Enum
2
+ from typing import Any, List, Literal, Mapping
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+ from bizon.source.auth.config import AuthConfig, AuthType
7
+ from bizon.source.config import SourceConfig
8
+
9
+
10
+ class SchemaRegistryType(str, Enum):
11
+ APICURIO = "apicurio"
12
+
13
+
14
+ class MessageEncoding(str, Enum):
15
+ UTF_8 = "utf-8"
16
+ AVRO = "avro"
17
+
18
+
19
+ class KafkaAuthConfig(AuthConfig):
20
+
21
+ type: Literal[AuthType.BASIC] = AuthType.BASIC # username and password authentication
22
+
23
+ # Schema registry authentication
24
+ schema_registry_type: SchemaRegistryType = Field(
25
+ default=SchemaRegistryType.APICURIO, description="Schema registry type"
26
+ )
27
+
28
+ schema_registry_url: str = Field(default="", description="Schema registry URL with the format ")
29
+ schema_registry_username: str = Field(default="", description="Schema registry username")
30
+ schema_registry_password: str = Field(default="", description="Schema registry password")
31
+
32
+
33
+ def default_kafka_consumer_config():
34
+ return {
35
+ "auto.offset.reset": "earliest",
36
+ "enable.auto.commit": False, # Turn off auto-commit for manual offset handling
37
+ "session.timeout.ms": 45000,
38
+ "security.protocol": "SASL_SSL",
39
+ }
40
+
41
+
42
+ class TopicConfig(BaseModel):
43
+ name: str = Field(..., description="Kafka topic name")
44
+ destination_id: str = Field(..., description="Destination id")
45
+
46
+
47
+ class KafkaSourceConfig(SourceConfig):
48
+
49
+ # Mandatory Kafka configuration
50
+ topics: List[TopicConfig] = Field(..., description="Kafka topic, comma separated")
51
+ bootstrap_servers: str = Field(..., description="Kafka bootstrap servers")
52
+ group_id: str = Field(default="bizon", description="Kafka group id")
53
+
54
+ skip_message_empty_value: bool = Field(
55
+ default=True, description="Skip messages with empty value (tombstone messages)"
56
+ )
57
+
58
+ # Kafka consumer configuration
59
+ batch_size: int = Field(100, description="Kafka batch size, number of messages to fetch at once.")
60
+ consumer_timeout: int = Field(10, description="Kafka consumer timeout in seconds, before returning batch.")
61
+
62
+ consumer_config: Mapping[str, Any] = Field(
63
+ default_factory=default_kafka_consumer_config,
64
+ description="Kafka consumer configuration, as described in the confluent-kafka-python documentation",
65
+ )
66
+
67
+ message_encoding: str = Field(default=MessageEncoding.AVRO, description="Encoding to use to decode the message")
68
+
69
+ authentication: KafkaAuthConfig = Field(..., description="Authentication configuration")