bizon 0.0.3.dev1__tar.gz → 0.0.4.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/PKG-INFO +11 -10
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/README.md +8 -9
- bizon-0.0.4.dev2/bizon/cli/__init__.py +0 -0
- bizon-0.0.4.dev2/bizon/cli/main.py +119 -0
- bizon-0.0.4.dev2/bizon/cli/utils.py +31 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/tests/test_bigquery_client.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/destination.py +5 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/adapters/sqlalchemy/backend.py +52 -18
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/adapters/sqlalchemy/config.py +5 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/backend.py +4 -2
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/config.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/models.py +5 -5
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/config.py +7 -9
- bizon-0.0.4.dev2/bizon/engine/engine.py +31 -0
- bizon-0.0.4.dev2/bizon/engine/pipeline/consumer.py +15 -0
- bizon-0.0.4.dev2/bizon/engine/pipeline/models.py +10 -0
- bizon-0.0.4.dev2/bizon/engine/pipeline/producer.py +182 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/kafka/consumer.py +4 -5
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/python_queue/config.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/python_queue/consumer.py +2 -6
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/python_queue/queue.py +6 -4
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/rabbitmq/consumer.py +3 -4
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/rabbitmq/queue.py +3 -9
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/queue.py +5 -11
- bizon-0.0.4.dev2/bizon/engine/runners/adapters/process.py +80 -0
- bizon-0.0.4.dev2/bizon/engine/runners/adapters/thread.py +72 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/runners/config.py +14 -8
- bizon-0.0.4.dev2/bizon/engine/runners/models.py +9 -0
- bizon-0.0.4.dev2/bizon/engine/runners/runner.py +190 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/config.py +7 -6
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/cursor.py +1 -1
- bizon-0.0.4.dev2/bizon/source/discover.py +301 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/session.py +7 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/source.py +20 -2
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/src/source.py +23 -5
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_bigquery_backend.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_kafka.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_rabbitmq.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_write_data_bigquery.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_write_data_bigquery_through_kafka.py +1 -1
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/gsheets/src/source.py +56 -11
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/gsheets/tests/gsheets_pipeline.py +1 -1
- bizon-0.0.3.dev1/bizon/sources/hubspot/src/source.py → bizon-0.0.4.dev2/bizon/sources/hubspot/src/source_objects.py +28 -5
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/tests/hubspot_benchmark.py +2 -2
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/tests/hubspot_client.py +2 -2
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/tests/hubspot_iteration.py +2 -2
- bizon-0.0.4.dev2/bizon/sources/hubspot/tests/hubspot_pipeline.py +6 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/src/source.py +37 -8
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/tests/periscope_pipeline_charts.py +2 -2
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/tests/periscope_pipeline_dashboard.py +2 -2
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/pyproject.toml +7 -2
- bizon-0.0.3.dev1/bizon/cli/parser.py +0 -7
- bizon-0.0.3.dev1/bizon/engine/producer.py +0 -117
- bizon-0.0.3.dev1/bizon/engine/runner.py +0 -162
- bizon-0.0.3.dev1/bizon/engine/runners/thread.py +0 -43
- bizon-0.0.3.dev1/bizon/sources/__init__.py +0 -104
- bizon-0.0.3.dev1/bizon/sources/dummy/src/config.py +0 -25
- bizon-0.0.3.dev1/bizon/sources/gsheets/src/config.py +0 -18
- bizon-0.0.3.dev1/bizon/sources/hubspot/src/errors.py +0 -43
- bizon-0.0.3.dev1/bizon/sources/hubspot/src/models/config.py +0 -28
- bizon-0.0.3.dev1/bizon/sources/hubspot/tests/hubspot_pipeline.py +0 -9
- bizon-0.0.3.dev1/bizon/sources/periscope/src/config.py +0 -39
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/LICENSE +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/common/errors/backoff.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/common/errors/errors.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/common/models.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/config/bigquery.example.yml +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/src/config.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/src/destination.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/buffer.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/config.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/file/src/config.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/file/src/destination.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/logger/src/config.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/logger/src/destination.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/models.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/kafka/config.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/kafka/queue.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/rabbitmq/config.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/config.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/abstract_oauth.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/abstract_token.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/basic.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/cookies.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/oauth.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/token.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/builder.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/config.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/models.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/config/api_key.example.yml +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/config/api_key_kafka.example.yml +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/src/fake_api.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/gsheets/config/default_auth.example.yml +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/gsheets/config/service_account.example.yml +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/config/api_key.example.yml +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/config/oauth.example.yml +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/src/models/hs_object.py +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/config/periscope_charts.example.yml +0 -0
- {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/config/periscope_dashboards.example.yml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: bizon
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4.dev2
|
|
4
4
|
Summary: Extract and load your data reliably from API Clients with native fault-tolerant and checkpointing mechanism.
|
|
5
5
|
Author: Antoine Balliet
|
|
6
6
|
Author-email: antoine.balliet@gmail.com
|
|
@@ -16,6 +16,7 @@ Provides-Extra: kafka
|
|
|
16
16
|
Provides-Extra: postgres
|
|
17
17
|
Provides-Extra: rabbitmq
|
|
18
18
|
Requires-Dist: backoff (>=2.2.1,<3.0.0)
|
|
19
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
19
20
|
Requires-Dist: dpath (>=2.2.0,<3.0.0)
|
|
20
21
|
Requires-Dist: faker (>=26.0.0,<27.0.0)
|
|
21
22
|
Requires-Dist: google-cloud-bigquery (>=3.25.0,<4.0.0) ; extra == "bigquery"
|
|
@@ -32,6 +33,7 @@ Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
|
|
|
32
33
|
Requires-Dist: pydantic (>=2.8.2,<3.0.0)
|
|
33
34
|
Requires-Dist: pydantic-extra-types (>=2.9.0,<3.0.0)
|
|
34
35
|
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
36
|
+
Requires-Dist: pytz (>=2024.2,<2025.0)
|
|
35
37
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
36
38
|
Requires-Dist: requests (>=2.28.2,<3.0.0)
|
|
37
39
|
Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
|
|
@@ -61,11 +63,10 @@ pip install bizon
|
|
|
61
63
|
```
|
|
62
64
|
|
|
63
65
|
## Usage
|
|
64
|
-
```python
|
|
65
|
-
from yaml import safe_load
|
|
66
|
-
from bizon.engine.runner import RunnerFactory
|
|
67
66
|
|
|
68
|
-
|
|
67
|
+
Create a file named `config.yml` in your working directory with the following content:
|
|
68
|
+
|
|
69
|
+
```yaml
|
|
69
70
|
source:
|
|
70
71
|
source_name: dummy
|
|
71
72
|
stream_name: creatures
|
|
@@ -78,13 +79,13 @@ destination:
|
|
|
78
79
|
name: logger
|
|
79
80
|
config:
|
|
80
81
|
dummy: dummy
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
config = safe_load(yaml_config)
|
|
84
|
-
runner = RunnerFactory.create_from_config_dict(config=config)
|
|
85
|
-
runner.run()
|
|
86
82
|
```
|
|
87
83
|
|
|
84
|
+
Run the pipeline with the following command:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
bizon run config.yml
|
|
88
|
+
```
|
|
88
89
|
## Backend configuration
|
|
89
90
|
|
|
90
91
|
Backend is the interface used by Bizon to store its state. It can be configured in the `backend` section of the configuration file. The following backends are supported:
|
|
@@ -21,11 +21,10 @@ pip install bizon
|
|
|
21
21
|
```
|
|
22
22
|
|
|
23
23
|
## Usage
|
|
24
|
-
```python
|
|
25
|
-
from yaml import safe_load
|
|
26
|
-
from bizon.engine.runner import RunnerFactory
|
|
27
24
|
|
|
28
|
-
|
|
25
|
+
Create a file named `config.yml` in your working directory with the following content:
|
|
26
|
+
|
|
27
|
+
```yaml
|
|
29
28
|
source:
|
|
30
29
|
source_name: dummy
|
|
31
30
|
stream_name: creatures
|
|
@@ -38,13 +37,13 @@ destination:
|
|
|
38
37
|
name: logger
|
|
39
38
|
config:
|
|
40
39
|
dummy: dummy
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
config = safe_load(yaml_config)
|
|
44
|
-
runner = RunnerFactory.create_from_config_dict(config=config)
|
|
45
|
-
runner.run()
|
|
46
40
|
```
|
|
47
41
|
|
|
42
|
+
Run the pipeline with the following command:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
bizon run config.yml
|
|
46
|
+
```
|
|
48
47
|
## Backend configuration
|
|
49
48
|
|
|
50
49
|
Backend is the interface used by Bizon to store its state. It can be configured in the `backend` section of the configuration file. The following backends are supported:
|
|
File without changes
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
from bizon.engine.engine import RunnerFactory
|
|
4
|
+
from bizon.source.discover import discover_all_sources
|
|
5
|
+
|
|
6
|
+
from .utils import (
|
|
7
|
+
parse_from_yaml,
|
|
8
|
+
set_custom_source_path_in_config,
|
|
9
|
+
set_debug_mode,
|
|
10
|
+
set_runner_in_config,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.group()
|
|
15
|
+
def cli():
|
|
16
|
+
"""Bizon CLI."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Create a 'destination' group under 'bizon'
|
|
21
|
+
@cli.group()
|
|
22
|
+
def source():
|
|
23
|
+
"""Subcommands for handling sources."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@source.command()
|
|
28
|
+
def list():
|
|
29
|
+
"""List available sources."""
|
|
30
|
+
|
|
31
|
+
click.echo("Retrieving available sources...")
|
|
32
|
+
sources = discover_all_sources()
|
|
33
|
+
|
|
34
|
+
click.echo("Available sources:")
|
|
35
|
+
for source_name, source_model in sources.items():
|
|
36
|
+
if not source_model.available_streams:
|
|
37
|
+
click.echo(
|
|
38
|
+
f"{source_name} - NOT AVAILABLE, run 'pip install bizon[{source_name}]' to install missing dependencies."
|
|
39
|
+
)
|
|
40
|
+
else:
|
|
41
|
+
click.echo(f"{source_name} - {source_model.available_streams}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Create a 'destination' group under 'bizon'
|
|
45
|
+
@cli.group()
|
|
46
|
+
def stream():
|
|
47
|
+
"""Subcommands for handling streams."""
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@stream.command()
|
|
52
|
+
@click.argument("source_name", type=click.STRING)
|
|
53
|
+
def list(source_name: str):
|
|
54
|
+
"""List available streams for a source."""
|
|
55
|
+
sources = discover_all_sources()
|
|
56
|
+
source_model = sources.get(source_name)
|
|
57
|
+
if not source_model:
|
|
58
|
+
click.echo(f"Source {source_name} not found.")
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
click.echo(f"Available streams for {source_name}:")
|
|
62
|
+
for stream in source_model.streams:
|
|
63
|
+
stream_mode = "[Supports incremental]" if stream.supports_incremental else "[Full refresh only]"
|
|
64
|
+
click.echo(f"{stream_mode} - {stream.name}")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Create a 'destination' group under 'bizon'
|
|
68
|
+
@cli.group()
|
|
69
|
+
def destination():
|
|
70
|
+
"""Subcommands for handling destinations."""
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@cli.command()
|
|
75
|
+
@click.argument("filename", type=click.Path(exists=True))
|
|
76
|
+
@click.option(
|
|
77
|
+
"--custom-source",
|
|
78
|
+
required=False,
|
|
79
|
+
type=click.Path(exists=True),
|
|
80
|
+
help="Custom Python file implementing a Bizon source.",
|
|
81
|
+
)
|
|
82
|
+
@click.option(
|
|
83
|
+
"--runner",
|
|
84
|
+
required=False,
|
|
85
|
+
type=click.Choice(["thread", "process"]),
|
|
86
|
+
default="thread",
|
|
87
|
+
show_default=True,
|
|
88
|
+
help="Runner type to use. Thread or Process.",
|
|
89
|
+
)
|
|
90
|
+
@click.option(
|
|
91
|
+
"--debug",
|
|
92
|
+
required=False,
|
|
93
|
+
is_flag=True,
|
|
94
|
+
show_default=True,
|
|
95
|
+
default=False,
|
|
96
|
+
help="Enable debug mode.",
|
|
97
|
+
)
|
|
98
|
+
def run(filename: str, custom_source: str, runner: str, debug, help="Run a bizon pipeline from a YAML file."):
|
|
99
|
+
"""Run a bizon pipeline from a YAML file."""
|
|
100
|
+
ctx = click.get_current_context()
|
|
101
|
+
|
|
102
|
+
# Parse config from YAML file as a dictionary
|
|
103
|
+
config = parse_from_yaml(filename)
|
|
104
|
+
|
|
105
|
+
# Set debug mode
|
|
106
|
+
set_debug_mode(debug)
|
|
107
|
+
|
|
108
|
+
# Override source_file_path param in config
|
|
109
|
+
set_custom_source_path_in_config(config=config, custom_source=ctx.get_parameter_source("custom-source"))
|
|
110
|
+
|
|
111
|
+
# Override runner param in config
|
|
112
|
+
set_runner_in_config(config=config, runner=runner)
|
|
113
|
+
|
|
114
|
+
runner = RunnerFactory.create_from_config_dict(config=config)
|
|
115
|
+
runner.run()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if __name__ == "__main__":
|
|
119
|
+
cli()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def parse_from_yaml(path_to_yaml) -> dict:
|
|
7
|
+
with open(path_to_yaml) as f:
|
|
8
|
+
config = yaml.safe_load(f)
|
|
9
|
+
return config
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def set_debug_mode(debug: bool):
|
|
13
|
+
# Set Log Level to DEBUG
|
|
14
|
+
if debug:
|
|
15
|
+
os.environ["LOGURU_LEVEL"] = "DEBUG"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def set_custom_source_path_in_config(config: dict, custom_source: str):
|
|
19
|
+
if custom_source:
|
|
20
|
+
config["source"]["source_file_path"] = custom_source
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def set_runner_in_config(config: dict, runner: str):
|
|
24
|
+
if runner:
|
|
25
|
+
if "engine" not in config:
|
|
26
|
+
config["engine"] = {}
|
|
27
|
+
|
|
28
|
+
if "runner" not in config["engine"]:
|
|
29
|
+
config["engine"]["runner"] = {"type": runner}
|
|
30
|
+
|
|
31
|
+
config["engine"]["runner"]["type"] = runner
|
{bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/tests/test_bigquery_client.py
RENAMED
|
@@ -6,7 +6,7 @@ from random import randint
|
|
|
6
6
|
import pytest
|
|
7
7
|
from faker import Faker
|
|
8
8
|
|
|
9
|
-
from bizon.cli.
|
|
9
|
+
from bizon.cli.utils import parse_from_yaml
|
|
10
10
|
from bizon.destinations.bigquery.src.config import BigQueryConfig
|
|
11
11
|
from bizon.destinations.bigquery.src.destination import BigQueryDestination
|
|
12
12
|
from bizon.source.config import SourceConfig
|
|
@@ -154,6 +154,11 @@ class AbstractDestination(ABC):
|
|
|
154
154
|
) -> bool:
|
|
155
155
|
"""Write records to destination and update the cursor for the given iteration"""
|
|
156
156
|
|
|
157
|
+
# Case when producer failed to fetch data from first iteration
|
|
158
|
+
if iteration == 0 and len(source_records) == 0:
|
|
159
|
+
logger.warning("Source failed to fetch data from the first iteration, no records will be written.")
|
|
160
|
+
return False
|
|
161
|
+
|
|
157
162
|
# Prepare destination
|
|
158
163
|
if iteration == 0:
|
|
159
164
|
self.prepare_destination()
|
|
@@ -29,7 +29,8 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
29
29
|
|
|
30
30
|
def __init__(self, config: Union[PostgresConfigDetails, SQLiteConfigDetails], type: BackendTypes):
|
|
31
31
|
super().__init__(config, type)
|
|
32
|
-
|
|
32
|
+
|
|
33
|
+
self._engine = None
|
|
33
34
|
|
|
34
35
|
self.config: Union[
|
|
35
36
|
PostgresConfigDetails,
|
|
@@ -40,9 +41,10 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
40
41
|
@property
|
|
41
42
|
def session(self) -> Generator[Session, None, None]:
|
|
42
43
|
"""yields a SQLAlchemy connection"""
|
|
44
|
+
engine = self.get_engine()
|
|
43
45
|
session_ = scoped_session(
|
|
44
46
|
sessionmaker(
|
|
45
|
-
bind=
|
|
47
|
+
bind=engine,
|
|
46
48
|
expire_on_commit=False,
|
|
47
49
|
)
|
|
48
50
|
)
|
|
@@ -51,14 +53,19 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
51
53
|
|
|
52
54
|
session_.close()
|
|
53
55
|
|
|
56
|
+
# For SQLite, we need to dispose the engine after each operation to prevent database lock
|
|
57
|
+
self.handle_dispose_sqlite(engine)
|
|
58
|
+
|
|
54
59
|
def _get_engine_bigquery(self) -> Engine:
|
|
55
60
|
if hasattr(self.config, "service_account_key") and self.config.service_account_key:
|
|
56
61
|
return create_engine(
|
|
57
|
-
f"bigquery://{self.config.database}/{self.config.
|
|
62
|
+
f"bigquery://{self.config.database}/{self.config.schema_name}",
|
|
58
63
|
echo=self.config.echoEngine,
|
|
59
64
|
credentials_info=self.config.service_account_key,
|
|
60
65
|
)
|
|
61
|
-
return create_engine(
|
|
66
|
+
return create_engine(
|
|
67
|
+
f"bigquery://{self.config.database}/{self.config.schema_name}", echo=self.config.echoEngine
|
|
68
|
+
)
|
|
62
69
|
|
|
63
70
|
def _get_engine_postgres(self) -> Engine:
|
|
64
71
|
return create_engine(
|
|
@@ -75,14 +82,15 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
75
82
|
if self.type == BackendTypes.POSTGRES:
|
|
76
83
|
return self._get_engine_postgres()
|
|
77
84
|
|
|
78
|
-
# SQLite in a file
|
|
85
|
+
# SQLite in a file, ok for small tests
|
|
79
86
|
if self.type == BackendTypes.SQLITE:
|
|
80
87
|
return create_engine(
|
|
81
|
-
f"sqlite:///{self.config.database}.
|
|
88
|
+
f"sqlite:///{self.config.database}.sqlite3",
|
|
82
89
|
echo=self.config.echoEngine,
|
|
90
|
+
connect_args={"check_same_thread": False},
|
|
83
91
|
)
|
|
84
92
|
|
|
85
|
-
# SQLite in memory
|
|
93
|
+
# ONLY FOR UNIT TESTS: SQLite in memory
|
|
86
94
|
if self.type == BackendTypes.SQLITE_IN_MEMORY:
|
|
87
95
|
return create_engine(
|
|
88
96
|
f"sqlite:///:memory:",
|
|
@@ -97,22 +105,44 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
97
105
|
logger.warning("SQLite does not support schemas")
|
|
98
106
|
return True
|
|
99
107
|
|
|
100
|
-
|
|
101
|
-
|
|
108
|
+
engine = self.get_engine()
|
|
109
|
+
|
|
110
|
+
with engine.connect() as connection:
|
|
111
|
+
if not inspect(connection).has_schema(self.config.schema_name):
|
|
102
112
|
logger.error(
|
|
103
|
-
f"Schema or dataset {self.config.
|
|
113
|
+
f"Schema or dataset {self.config.schema_name} does not exist in the database, you need to create it first."
|
|
104
114
|
)
|
|
105
115
|
raise Exception(
|
|
106
|
-
f"Schema or dataset {self.config.
|
|
116
|
+
f"Schema or dataset {self.config.schema_name} does not exist in the database, you need to create it first."
|
|
107
117
|
)
|
|
108
118
|
|
|
119
|
+
self.handle_dispose_sqlite(engine)
|
|
120
|
+
|
|
121
|
+
def handle_dispose_sqlite(self, engine: Engine):
|
|
122
|
+
if self.type == BackendTypes.SQLITE:
|
|
123
|
+
engine.dispose()
|
|
124
|
+
|
|
125
|
+
def get_engine(self) -> Engine:
|
|
126
|
+
"""Return the SQLAlchemy engine"""
|
|
127
|
+
if self.type == BackendTypes.SQLITE:
|
|
128
|
+
return self._get_engine()
|
|
129
|
+
|
|
130
|
+
if not self._engine:
|
|
131
|
+
self._engine = self._get_engine()
|
|
132
|
+
|
|
133
|
+
return self._engine
|
|
134
|
+
|
|
109
135
|
#### INIT DATABASE ####
|
|
110
136
|
|
|
111
137
|
def create_all_tables(self):
|
|
112
|
-
|
|
138
|
+
engine = self.get_engine()
|
|
139
|
+
Base.metadata.create_all(engine)
|
|
140
|
+
self.handle_dispose_sqlite(engine)
|
|
113
141
|
|
|
114
142
|
def drop_all_tables(self):
|
|
115
|
-
|
|
143
|
+
engine = self.get_engine()
|
|
144
|
+
Base.metadata.drop_all(engine)
|
|
145
|
+
self.handle_dispose_sqlite(engine)
|
|
116
146
|
|
|
117
147
|
def check_prerequisites(self) -> bool:
|
|
118
148
|
"""Check if the database contains the necessary tables, return True if entities are present
|
|
@@ -125,19 +155,23 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
125
155
|
|
|
126
156
|
all_entities_exist = True
|
|
127
157
|
|
|
158
|
+
engine = self.get_engine()
|
|
159
|
+
|
|
128
160
|
# Check if TABLE_STREAM_INFO exists, otherwise create it
|
|
129
|
-
if not inspect(
|
|
161
|
+
if not inspect(engine).has_table(TABLE_STREAM_INFO):
|
|
130
162
|
all_entities_exist = False
|
|
131
163
|
logger.info(f"Table {TABLE_STREAM_INFO} does not exist in the database, we will create it")
|
|
132
164
|
|
|
133
|
-
if not inspect(
|
|
165
|
+
if not inspect(engine).has_table(TABLE_SOURCE_CURSOR):
|
|
134
166
|
all_entities_exist = False
|
|
135
167
|
logger.info(f"Table {TABLE_SOURCE_CURSOR} does not exist in the database, we will create it")
|
|
136
168
|
|
|
137
|
-
if not inspect(
|
|
169
|
+
if not inspect(engine).has_table(TABLE_DESTINATION_CURSOR):
|
|
138
170
|
all_entities_exist = False
|
|
139
171
|
logger.info(f"Table {TABLE_DESTINATION_CURSOR} does not exist in the database, we will create it")
|
|
140
172
|
|
|
173
|
+
self.handle_dispose_sqlite(engine)
|
|
174
|
+
|
|
141
175
|
return all_entities_exist
|
|
142
176
|
|
|
143
177
|
def _add_and_commit(self, obj, session: Optional[Session] = None):
|
|
@@ -158,7 +192,7 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
158
192
|
source_name: str,
|
|
159
193
|
stream_name: str,
|
|
160
194
|
total_records_to_fetch: Optional[int] = None,
|
|
161
|
-
job_status: JobStatus = JobStatus.
|
|
195
|
+
job_status: JobStatus = JobStatus.STARTED,
|
|
162
196
|
session: Optional[Session] = None,
|
|
163
197
|
) -> StreamJob:
|
|
164
198
|
"""Create new StreamJob record in dbt and return its ID"""
|
|
@@ -228,7 +262,7 @@ class SQLAlchemyBackend(AbstractBackend):
|
|
|
228
262
|
iteration: int,
|
|
229
263
|
rows_fetched: int,
|
|
230
264
|
next_pagination: dict,
|
|
231
|
-
cursor_status: CursorStatus = CursorStatus.
|
|
265
|
+
cursor_status: CursorStatus = CursorStatus.STARTED,
|
|
232
266
|
error_message: Optional[str] = None,
|
|
233
267
|
session: Optional[Session] = None,
|
|
234
268
|
) -> str:
|
|
@@ -55,14 +55,18 @@ class SQLiteInMemoryConfig(AbstractBackendConfig):
|
|
|
55
55
|
|
|
56
56
|
## BIGQUERY ##
|
|
57
57
|
class BigQueryConfigDetails(SQLAlchemyConfigDetails):
|
|
58
|
+
|
|
58
59
|
database: str = Field(
|
|
59
60
|
description="GCP Project name",
|
|
60
61
|
default=...,
|
|
61
62
|
)
|
|
62
|
-
|
|
63
|
+
|
|
64
|
+
schema_name: str = Field(
|
|
63
65
|
description="BigQuery Dataset name",
|
|
64
66
|
default=...,
|
|
67
|
+
alias="schema",
|
|
65
68
|
)
|
|
69
|
+
|
|
66
70
|
service_account_key: str = Field(
|
|
67
71
|
description="Service Account Key JSON string. If empty it will be infered",
|
|
68
72
|
default="",
|
|
@@ -35,7 +35,7 @@ class AbstractBackend(ABC):
|
|
|
35
35
|
source_name: str,
|
|
36
36
|
stream_name: str,
|
|
37
37
|
total_records_to_fetch: Optional[int] = None,
|
|
38
|
-
job_status: JobStatus = JobStatus.
|
|
38
|
+
job_status: JobStatus = JobStatus.STARTED,
|
|
39
39
|
session: Optional[Session] = None,
|
|
40
40
|
) -> StreamJob:
|
|
41
41
|
"""Create new StreamJob record in db and return it"""
|
|
@@ -69,7 +69,7 @@ class AbstractBackend(ABC):
|
|
|
69
69
|
iteration: int,
|
|
70
70
|
rows_fetched: int,
|
|
71
71
|
next_pagination: dict,
|
|
72
|
-
cursor_status: CursorStatus = CursorStatus.
|
|
72
|
+
cursor_status: CursorStatus = CursorStatus.STARTED,
|
|
73
73
|
error_message: Optional[str] = None,
|
|
74
74
|
session: Optional[Session] = None,
|
|
75
75
|
) -> str:
|
|
@@ -161,8 +161,10 @@ class BackendFactory:
|
|
|
161
161
|
|
|
162
162
|
return SQLAlchemyBackend(config=config.config, type=config.type)
|
|
163
163
|
|
|
164
|
+
# ONLY FOR UNIT TESTS
|
|
164
165
|
elif config.type == BackendTypes.SQLITE_IN_MEMORY:
|
|
165
166
|
from .adapters.sqlalchemy.backend import SQLAlchemyBackend
|
|
166
167
|
|
|
167
168
|
return SQLAlchemyBackend(config=config.config, type=config.type)
|
|
169
|
+
|
|
168
170
|
raise ValueError(f"Unsupported backend type: {config.type}")
|
|
@@ -15,7 +15,7 @@ class BackendTypes(str, Enum):
|
|
|
15
15
|
|
|
16
16
|
class AbstractBackendConfigDetails(BaseModel, ABC):
|
|
17
17
|
database: str = Field(..., description="Database name")
|
|
18
|
-
|
|
18
|
+
schema_name: str = Field(..., description="Schema name", alias="schema")
|
|
19
19
|
syncCursorInDBEvery: int = Field(10, description="Number of iterations before syncing the cursor in the database")
|
|
20
20
|
|
|
21
21
|
|
|
@@ -16,7 +16,7 @@ def generate_uuid():
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class JobStatus(str, Enum):
|
|
19
|
-
|
|
19
|
+
STARTED = "started"
|
|
20
20
|
RUNNING = "running"
|
|
21
21
|
SUCCESS = "success"
|
|
22
22
|
FAILED = "failed"
|
|
@@ -25,7 +25,7 @@ class JobStatus(str, Enum):
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class CursorStatus(str, Enum):
|
|
28
|
-
|
|
28
|
+
STARTED = "started"
|
|
29
29
|
PULLING = "pulling"
|
|
30
30
|
PULLED = "pulled"
|
|
31
31
|
WRITTEN_IN_DESTINATION = "written_in_destination"
|
|
@@ -50,7 +50,7 @@ class StreamJob(Base):
|
|
|
50
50
|
DateTime, nullable=False, default=datetime.now(tz=UTC), doc="Timestamp when the job was created"
|
|
51
51
|
)
|
|
52
52
|
updated_at = Column(DateTime, nullable=True, default=None, doc="Timestamp when the job was last updated")
|
|
53
|
-
status = Column(String(100), default=JobStatus.
|
|
53
|
+
status = Column(String(100), default=JobStatus.STARTED, doc="Status of the job")
|
|
54
54
|
error_message = Column(String(255), nullable=True, doc="Error message if the job failed", default=None)
|
|
55
55
|
|
|
56
56
|
source_cursor = relationship("SourceCursor", cascade="all, delete")
|
|
@@ -71,7 +71,7 @@ class SourceCursor(Base):
|
|
|
71
71
|
rows_fetched = Column(Integer, default=0)
|
|
72
72
|
next_pagination = Column(String, nullable=True)
|
|
73
73
|
attempt = Column(Integer, default=0, doc="Number of attempts to pull the data for this cursor")
|
|
74
|
-
status = Column(String(100), default=CursorStatus.
|
|
74
|
+
status = Column(String(100), default=CursorStatus.STARTED, doc="Status of the cursor")
|
|
75
75
|
error_message = Column(
|
|
76
76
|
String(500), nullable=True, doc="Error message if pulling failed for this cursor", default=None
|
|
77
77
|
)
|
|
@@ -94,7 +94,7 @@ class DestinationCursor(Base):
|
|
|
94
94
|
to_source_iteration = Column(Integer, default=0)
|
|
95
95
|
rows_written = Column(Integer, default=0)
|
|
96
96
|
attempt = Column(Integer, default=0, doc="Number of attempts to pull the data for this cursor")
|
|
97
|
-
status = Column(String(100), default=CursorStatus.
|
|
97
|
+
status = Column(String(100), default=CursorStatus.STARTED, doc="Status of the cursor")
|
|
98
98
|
error_message = Column(
|
|
99
99
|
String(500), nullable=True, doc="Error message if pulling failed for this cursor", default=None
|
|
100
100
|
)
|
|
@@ -19,7 +19,7 @@ from .queue.adapters.python_queue.config import (
|
|
|
19
19
|
)
|
|
20
20
|
from .queue.adapters.rabbitmq.config import RabbitMQConfig
|
|
21
21
|
from .queue.config import QueueTypes
|
|
22
|
-
from .runners.config import RunnerConfig,
|
|
22
|
+
from .runners.config import RunnerConfig, RunnerFuturesConfig, RunnerTypes
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class EngineConfig(BaseModel):
|
|
@@ -34,10 +34,10 @@ class EngineConfig(BaseModel):
|
|
|
34
34
|
BigQuerySQLAlchemyConfig,
|
|
35
35
|
] = Field(
|
|
36
36
|
description="Configuration for the backend",
|
|
37
|
-
default=
|
|
38
|
-
type=BackendTypes.
|
|
37
|
+
default=SQLiteSQLAlchemyConfig(
|
|
38
|
+
type=BackendTypes.SQLITE,
|
|
39
39
|
config=SQLiteConfigDetails(
|
|
40
|
-
database="
|
|
40
|
+
database="bizon",
|
|
41
41
|
schema="NOT_USED_IN_SQLITE",
|
|
42
42
|
),
|
|
43
43
|
syncCursorInDBEvery=2,
|
|
@@ -54,7 +54,7 @@ class EngineConfig(BaseModel):
|
|
|
54
54
|
default=PythonQueueConfig(
|
|
55
55
|
type=QueueTypes.PYTHON_QUEUE,
|
|
56
56
|
config=PythonQueueConfigDetails(
|
|
57
|
-
queue=PythonQueueQueueConfig(max_size=
|
|
57
|
+
queue=PythonQueueQueueConfig(max_size=0),
|
|
58
58
|
consumer=PythonQueueConsumerConfig(poll_interval=2),
|
|
59
59
|
),
|
|
60
60
|
),
|
|
@@ -64,10 +64,8 @@ class EngineConfig(BaseModel):
|
|
|
64
64
|
runner: RunnerConfig = Field(
|
|
65
65
|
description="Runner to use for the pipeline",
|
|
66
66
|
default=RunnerConfig(
|
|
67
|
-
type=RunnerTypes.
|
|
68
|
-
config=
|
|
69
|
-
consumer_start_delay=2,
|
|
70
|
-
),
|
|
67
|
+
type=RunnerTypes.THREAD,
|
|
68
|
+
config=RunnerFuturesConfig(),
|
|
71
69
|
log_level="INFO",
|
|
72
70
|
),
|
|
73
71
|
)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from loguru import logger
|
|
2
|
+
|
|
3
|
+
from bizon.cli.utils import parse_from_yaml
|
|
4
|
+
from bizon.common.models import BizonConfig
|
|
5
|
+
|
|
6
|
+
from .config import RunnerTypes
|
|
7
|
+
from .runners.runner import AbstractRunner
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RunnerFactory:
|
|
11
|
+
@staticmethod
|
|
12
|
+
def create_from_config_dict(config: dict) -> AbstractRunner:
|
|
13
|
+
|
|
14
|
+
bizon_config = BizonConfig.model_validate(obj=config)
|
|
15
|
+
|
|
16
|
+
if bizon_config.engine.runner.type == RunnerTypes.THREAD:
|
|
17
|
+
from .runners.adapters.thread import ThreadRunner
|
|
18
|
+
|
|
19
|
+
return ThreadRunner(config=config)
|
|
20
|
+
|
|
21
|
+
if bizon_config.engine.runner.type == RunnerTypes.PROCESS:
|
|
22
|
+
from .runners.adapters.process import ProcessRunner
|
|
23
|
+
|
|
24
|
+
return ProcessRunner(config=config)
|
|
25
|
+
|
|
26
|
+
raise ValueError(f"Runner type {bizon_config.engine.runner.type} is not supported")
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def create_from_yaml(filepath: str) -> AbstractRunner:
|
|
30
|
+
config = parse_from_yaml(filepath)
|
|
31
|
+
return RunnerFactory.create_from_config_dict(config)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
from bizon.destinations.destination import AbstractDestination
|
|
5
|
+
from bizon.engine.queue.config import AbstractQueueConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AbstractQueueConsumer(ABC):
|
|
9
|
+
def __init__(self, config: AbstractQueueConfig, destination: AbstractDestination):
|
|
10
|
+
self.config = config
|
|
11
|
+
self.destination = destination
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def run(self):
|
|
15
|
+
pass
|