bizon 0.0.3.dev1__tar.gz → 0.0.4.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/PKG-INFO +11 -10
  2. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/README.md +8 -9
  3. bizon-0.0.4.dev2/bizon/cli/__init__.py +0 -0
  4. bizon-0.0.4.dev2/bizon/cli/main.py +119 -0
  5. bizon-0.0.4.dev2/bizon/cli/utils.py +31 -0
  6. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/tests/test_bigquery_client.py +1 -1
  7. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/destination.py +5 -0
  8. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/adapters/sqlalchemy/backend.py +52 -18
  9. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/adapters/sqlalchemy/config.py +5 -1
  10. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/backend.py +4 -2
  11. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/config.py +1 -1
  12. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/backend/models.py +5 -5
  13. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/config.py +7 -9
  14. bizon-0.0.4.dev2/bizon/engine/engine.py +31 -0
  15. bizon-0.0.4.dev2/bizon/engine/pipeline/consumer.py +15 -0
  16. bizon-0.0.4.dev2/bizon/engine/pipeline/models.py +10 -0
  17. bizon-0.0.4.dev2/bizon/engine/pipeline/producer.py +182 -0
  18. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/kafka/consumer.py +4 -5
  19. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/python_queue/config.py +1 -1
  20. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/python_queue/consumer.py +2 -6
  21. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/python_queue/queue.py +6 -4
  22. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/rabbitmq/consumer.py +3 -4
  23. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/rabbitmq/queue.py +3 -9
  24. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/queue.py +5 -11
  25. bizon-0.0.4.dev2/bizon/engine/runners/adapters/process.py +80 -0
  26. bizon-0.0.4.dev2/bizon/engine/runners/adapters/thread.py +72 -0
  27. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/runners/config.py +14 -8
  28. bizon-0.0.4.dev2/bizon/engine/runners/models.py +9 -0
  29. bizon-0.0.4.dev2/bizon/engine/runners/runner.py +190 -0
  30. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/config.py +7 -6
  31. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/cursor.py +1 -1
  32. bizon-0.0.4.dev2/bizon/source/discover.py +301 -0
  33. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/session.py +7 -1
  34. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/source.py +20 -2
  35. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/src/source.py +23 -5
  36. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline.py +1 -1
  37. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_bigquery_backend.py +1 -1
  38. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_kafka.py +1 -1
  39. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_rabbitmq.py +1 -1
  40. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_write_data_bigquery.py +1 -1
  41. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/tests/dummy_pipeline_write_data_bigquery_through_kafka.py +1 -1
  42. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/gsheets/src/source.py +56 -11
  43. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/gsheets/tests/gsheets_pipeline.py +1 -1
  44. bizon-0.0.3.dev1/bizon/sources/hubspot/src/source.py → bizon-0.0.4.dev2/bizon/sources/hubspot/src/source_objects.py +28 -5
  45. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/tests/hubspot_benchmark.py +2 -2
  46. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/tests/hubspot_client.py +2 -2
  47. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/tests/hubspot_iteration.py +2 -2
  48. bizon-0.0.4.dev2/bizon/sources/hubspot/tests/hubspot_pipeline.py +6 -0
  49. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/src/source.py +37 -8
  50. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/tests/periscope_pipeline_charts.py +2 -2
  51. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/tests/periscope_pipeline_dashboard.py +2 -2
  52. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/pyproject.toml +7 -2
  53. bizon-0.0.3.dev1/bizon/cli/parser.py +0 -7
  54. bizon-0.0.3.dev1/bizon/engine/producer.py +0 -117
  55. bizon-0.0.3.dev1/bizon/engine/runner.py +0 -162
  56. bizon-0.0.3.dev1/bizon/engine/runners/thread.py +0 -43
  57. bizon-0.0.3.dev1/bizon/sources/__init__.py +0 -104
  58. bizon-0.0.3.dev1/bizon/sources/dummy/src/config.py +0 -25
  59. bizon-0.0.3.dev1/bizon/sources/gsheets/src/config.py +0 -18
  60. bizon-0.0.3.dev1/bizon/sources/hubspot/src/errors.py +0 -43
  61. bizon-0.0.3.dev1/bizon/sources/hubspot/src/models/config.py +0 -28
  62. bizon-0.0.3.dev1/bizon/sources/hubspot/tests/hubspot_pipeline.py +0 -9
  63. bizon-0.0.3.dev1/bizon/sources/periscope/src/config.py +0 -39
  64. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/LICENSE +0 -0
  65. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/common/errors/backoff.py +0 -0
  66. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/common/errors/errors.py +0 -0
  67. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/common/models.py +0 -0
  68. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/config/bigquery.example.yml +0 -0
  69. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/src/config.py +0 -0
  70. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/bigquery/src/destination.py +0 -0
  71. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/buffer.py +0 -0
  72. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/config.py +0 -0
  73. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/file/src/config.py +0 -0
  74. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/file/src/destination.py +0 -0
  75. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/logger/src/config.py +0 -0
  76. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/logger/src/destination.py +0 -0
  77. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/destinations/models.py +0 -0
  78. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/kafka/config.py +0 -0
  79. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/kafka/queue.py +0 -0
  80. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/adapters/rabbitmq/config.py +0 -0
  81. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/engine/queue/config.py +0 -0
  82. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/abstract_oauth.py +0 -0
  83. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/abstract_token.py +0 -0
  84. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/basic.py +0 -0
  85. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/cookies.py +0 -0
  86. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/oauth.py +0 -0
  87. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/authenticators/token.py +0 -0
  88. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/builder.py +0 -0
  89. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/auth/config.py +0 -0
  90. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/source/models.py +0 -0
  91. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/config/api_key.example.yml +0 -0
  92. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/config/api_key_kafka.example.yml +0 -0
  93. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/dummy/src/fake_api.py +0 -0
  94. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/gsheets/config/default_auth.example.yml +0 -0
  95. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/gsheets/config/service_account.example.yml +0 -0
  96. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/config/api_key.example.yml +0 -0
  97. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/config/oauth.example.yml +0 -0
  98. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/hubspot/src/models/hs_object.py +0 -0
  99. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/config/periscope_charts.example.yml +0 -0
  100. {bizon-0.0.3.dev1 → bizon-0.0.4.dev2}/bizon/sources/periscope/config/periscope_dashboards.example.yml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bizon
3
- Version: 0.0.3.dev1
3
+ Version: 0.0.4.dev2
4
4
  Summary: Extract and load your data reliably from API Clients with native fault-tolerant and checkpointing mechanism.
5
5
  Author: Antoine Balliet
6
6
  Author-email: antoine.balliet@gmail.com
@@ -16,6 +16,7 @@ Provides-Extra: kafka
16
16
  Provides-Extra: postgres
17
17
  Provides-Extra: rabbitmq
18
18
  Requires-Dist: backoff (>=2.2.1,<3.0.0)
19
+ Requires-Dist: click (>=8.1.7,<9.0.0)
19
20
  Requires-Dist: dpath (>=2.2.0,<3.0.0)
20
21
  Requires-Dist: faker (>=26.0.0,<27.0.0)
21
22
  Requires-Dist: google-cloud-bigquery (>=3.25.0,<4.0.0) ; extra == "bigquery"
@@ -32,6 +33,7 @@ Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
32
33
  Requires-Dist: pydantic (>=2.8.2,<3.0.0)
33
34
  Requires-Dist: pydantic-extra-types (>=2.9.0,<3.0.0)
34
35
  Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
36
+ Requires-Dist: pytz (>=2024.2,<2025.0)
35
37
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
36
38
  Requires-Dist: requests (>=2.28.2,<3.0.0)
37
39
  Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
@@ -61,11 +63,10 @@ pip install bizon
61
63
  ```
62
64
 
63
65
  ## Usage
64
- ```python
65
- from yaml import safe_load
66
- from bizon.engine.runner import RunnerFactory
67
66
 
68
- yaml_config = """
67
+ Create a file named `config.yml` in your working directory with the following content:
68
+
69
+ ```yaml
69
70
  source:
70
71
  source_name: dummy
71
72
  stream_name: creatures
@@ -78,13 +79,13 @@ destination:
78
79
  name: logger
79
80
  config:
80
81
  dummy: dummy
81
- """
82
-
83
- config = safe_load(yaml_config)
84
- runner = RunnerFactory.create_from_config_dict(config=config)
85
- runner.run()
86
82
  ```
87
83
 
84
+ Run the pipeline with the following command:
85
+
86
+ ```bash
87
+ bizon run config.yml
88
+ ```
88
89
  ## Backend configuration
89
90
 
90
91
  Backend is the interface used by Bizon to store its state. It can be configured in the `backend` section of the configuration file. The following backends are supported:
@@ -21,11 +21,10 @@ pip install bizon
21
21
  ```
22
22
 
23
23
  ## Usage
24
- ```python
25
- from yaml import safe_load
26
- from bizon.engine.runner import RunnerFactory
27
24
 
28
- yaml_config = """
25
+ Create a file named `config.yml` in your working directory with the following content:
26
+
27
+ ```yaml
29
28
  source:
30
29
  source_name: dummy
31
30
  stream_name: creatures
@@ -38,13 +37,13 @@ destination:
38
37
  name: logger
39
38
  config:
40
39
  dummy: dummy
41
- """
42
-
43
- config = safe_load(yaml_config)
44
- runner = RunnerFactory.create_from_config_dict(config=config)
45
- runner.run()
46
40
  ```
47
41
 
42
+ Run the pipeline with the following command:
43
+
44
+ ```bash
45
+ bizon run config.yml
46
+ ```
48
47
  ## Backend configuration
49
48
 
50
49
  Backend is the interface used by Bizon to store its state. It can be configured in the `backend` section of the configuration file. The following backends are supported:
File without changes
@@ -0,0 +1,119 @@
1
+ import click
2
+
3
+ from bizon.engine.engine import RunnerFactory
4
+ from bizon.source.discover import discover_all_sources
5
+
6
+ from .utils import (
7
+ parse_from_yaml,
8
+ set_custom_source_path_in_config,
9
+ set_debug_mode,
10
+ set_runner_in_config,
11
+ )
12
+
13
+
14
+ @click.group()
15
+ def cli():
16
+ """Bizon CLI."""
17
+ pass
18
+
19
+
20
+ # Create a 'destination' group under 'bizon'
21
+ @cli.group()
22
+ def source():
23
+ """Subcommands for handling sources."""
24
+ pass
25
+
26
+
27
+ @source.command()
28
+ def list():
29
+ """List available sources."""
30
+
31
+ click.echo("Retrieving available sources...")
32
+ sources = discover_all_sources()
33
+
34
+ click.echo("Available sources:")
35
+ for source_name, source_model in sources.items():
36
+ if not source_model.available_streams:
37
+ click.echo(
38
+ f"{source_name} - NOT AVAILABLE, run 'pip install bizon[{source_name}]' to install missing dependencies."
39
+ )
40
+ else:
41
+ click.echo(f"{source_name} - {source_model.available_streams}")
42
+
43
+
44
+ # Create a 'destination' group under 'bizon'
45
+ @cli.group()
46
+ def stream():
47
+ """Subcommands for handling streams."""
48
+ pass
49
+
50
+
51
+ @stream.command()
52
+ @click.argument("source_name", type=click.STRING)
53
+ def list(source_name: str):
54
+ """List available streams for a source."""
55
+ sources = discover_all_sources()
56
+ source_model = sources.get(source_name)
57
+ if not source_model:
58
+ click.echo(f"Source {source_name} not found.")
59
+ return
60
+
61
+ click.echo(f"Available streams for {source_name}:")
62
+ for stream in source_model.streams:
63
+ stream_mode = "[Supports incremental]" if stream.supports_incremental else "[Full refresh only]"
64
+ click.echo(f"{stream_mode} - {stream.name}")
65
+
66
+
67
+ # Create a 'destination' group under 'bizon'
68
+ @cli.group()
69
+ def destination():
70
+ """Subcommands for handling destinations."""
71
+ pass
72
+
73
+
74
+ @cli.command()
75
+ @click.argument("filename", type=click.Path(exists=True))
76
+ @click.option(
77
+ "--custom-source",
78
+ required=False,
79
+ type=click.Path(exists=True),
80
+ help="Custom Python file implementing a Bizon source.",
81
+ )
82
+ @click.option(
83
+ "--runner",
84
+ required=False,
85
+ type=click.Choice(["thread", "process"]),
86
+ default="thread",
87
+ show_default=True,
88
+ help="Runner type to use. Thread or Process.",
89
+ )
90
+ @click.option(
91
+ "--debug",
92
+ required=False,
93
+ is_flag=True,
94
+ show_default=True,
95
+ default=False,
96
+ help="Enable debug mode.",
97
+ )
98
+ def run(filename: str, custom_source: str, runner: str, debug, help="Run a bizon pipeline from a YAML file."):
99
+ """Run a bizon pipeline from a YAML file."""
100
+ ctx = click.get_current_context()
101
+
102
+ # Parse config from YAML file as a dictionary
103
+ config = parse_from_yaml(filename)
104
+
105
+ # Set debug mode
106
+ set_debug_mode(debug)
107
+
108
+ # Override source_file_path param in config
109
+ set_custom_source_path_in_config(config=config, custom_source=ctx.get_parameter_source("custom-source"))
110
+
111
+ # Override runner param in config
112
+ set_runner_in_config(config=config, runner=runner)
113
+
114
+ runner = RunnerFactory.create_from_config_dict(config=config)
115
+ runner.run()
116
+
117
+
118
+ if __name__ == "__main__":
119
+ cli()
@@ -0,0 +1,31 @@
1
+ import os
2
+
3
+ import yaml
4
+
5
+
6
+ def parse_from_yaml(path_to_yaml) -> dict:
7
+ with open(path_to_yaml) as f:
8
+ config = yaml.safe_load(f)
9
+ return config
10
+
11
+
12
+ def set_debug_mode(debug: bool):
13
+ # Set Log Level to DEBUG
14
+ if debug:
15
+ os.environ["LOGURU_LEVEL"] = "DEBUG"
16
+
17
+
18
+ def set_custom_source_path_in_config(config: dict, custom_source: str):
19
+ if custom_source:
20
+ config["source"]["source_file_path"] = custom_source
21
+
22
+
23
+ def set_runner_in_config(config: dict, runner: str):
24
+ if runner:
25
+ if "engine" not in config:
26
+ config["engine"] = {}
27
+
28
+ if "runner" not in config["engine"]:
29
+ config["engine"]["runner"] = {"type": runner}
30
+
31
+ config["engine"]["runner"]["type"] = runner
@@ -6,7 +6,7 @@ from random import randint
6
6
  import pytest
7
7
  from faker import Faker
8
8
 
9
- from bizon.cli.parser import parse_from_yaml
9
+ from bizon.cli.utils import parse_from_yaml
10
10
  from bizon.destinations.bigquery.src.config import BigQueryConfig
11
11
  from bizon.destinations.bigquery.src.destination import BigQueryDestination
12
12
  from bizon.source.config import SourceConfig
@@ -154,6 +154,11 @@ class AbstractDestination(ABC):
154
154
  ) -> bool:
155
155
  """Write records to destination and update the cursor for the given iteration"""
156
156
 
157
+ # Case when producer failed to fetch data from first iteration
158
+ if iteration == 0 and len(source_records) == 0:
159
+ logger.warning("Source failed to fetch data from the first iteration, no records will be written.")
160
+ return False
161
+
157
162
  # Prepare destination
158
163
  if iteration == 0:
159
164
  self.prepare_destination()
@@ -29,7 +29,8 @@ class SQLAlchemyBackend(AbstractBackend):
29
29
 
30
30
  def __init__(self, config: Union[PostgresConfigDetails, SQLiteConfigDetails], type: BackendTypes):
31
31
  super().__init__(config, type)
32
- self.engine: Engine = self._get_engine()
32
+
33
+ self._engine = None
33
34
 
34
35
  self.config: Union[
35
36
  PostgresConfigDetails,
@@ -40,9 +41,10 @@ class SQLAlchemyBackend(AbstractBackend):
40
41
  @property
41
42
  def session(self) -> Generator[Session, None, None]:
42
43
  """yields a SQLAlchemy connection"""
44
+ engine = self.get_engine()
43
45
  session_ = scoped_session(
44
46
  sessionmaker(
45
- bind=self.engine,
47
+ bind=engine,
46
48
  expire_on_commit=False,
47
49
  )
48
50
  )
@@ -51,14 +53,19 @@ class SQLAlchemyBackend(AbstractBackend):
51
53
 
52
54
  session_.close()
53
55
 
56
+ # For SQLite, we need to dispose the engine after each operation to prevent database lock
57
+ self.handle_dispose_sqlite(engine)
58
+
54
59
  def _get_engine_bigquery(self) -> Engine:
55
60
  if hasattr(self.config, "service_account_key") and self.config.service_account_key:
56
61
  return create_engine(
57
- f"bigquery://{self.config.database}/{self.config.schema}",
62
+ f"bigquery://{self.config.database}/{self.config.schema_name}",
58
63
  echo=self.config.echoEngine,
59
64
  credentials_info=self.config.service_account_key,
60
65
  )
61
- return create_engine(f"bigquery://{self.config.database}/{self.config.schema}", echo=self.config.echoEngine)
66
+ return create_engine(
67
+ f"bigquery://{self.config.database}/{self.config.schema_name}", echo=self.config.echoEngine
68
+ )
62
69
 
63
70
  def _get_engine_postgres(self) -> Engine:
64
71
  return create_engine(
@@ -75,14 +82,15 @@ class SQLAlchemyBackend(AbstractBackend):
75
82
  if self.type == BackendTypes.POSTGRES:
76
83
  return self._get_engine_postgres()
77
84
 
78
- # SQLite in a file
85
+ # SQLite in a file, ok for small tests
79
86
  if self.type == BackendTypes.SQLITE:
80
87
  return create_engine(
81
- f"sqlite:///{self.config.database}.db",
88
+ f"sqlite:///{self.config.database}.sqlite3",
82
89
  echo=self.config.echoEngine,
90
+ connect_args={"check_same_thread": False},
83
91
  )
84
92
 
85
- # SQLite in memory
93
+ # ONLY FOR UNIT TESTS: SQLite in memory
86
94
  if self.type == BackendTypes.SQLITE_IN_MEMORY:
87
95
  return create_engine(
88
96
  f"sqlite:///:memory:",
@@ -97,22 +105,44 @@ class SQLAlchemyBackend(AbstractBackend):
97
105
  logger.warning("SQLite does not support schemas")
98
106
  return True
99
107
 
100
- with self.engine.connect() as connection:
101
- if not inspect(connection).has_schema(self.config.schema):
108
+ engine = self.get_engine()
109
+
110
+ with engine.connect() as connection:
111
+ if not inspect(connection).has_schema(self.config.schema_name):
102
112
  logger.error(
103
- f"Schema or dataset {self.config.schema} does not exist in the database, you need to create it first."
113
+ f"Schema or dataset {self.config.schema_name} does not exist in the database, you need to create it first."
104
114
  )
105
115
  raise Exception(
106
- f"Schema or dataset {self.config.schema} does not exist in the database, you need to create it first."
116
+ f"Schema or dataset {self.config.schema_name} does not exist in the database, you need to create it first."
107
117
  )
108
118
 
119
+ self.handle_dispose_sqlite(engine)
120
+
121
+ def handle_dispose_sqlite(self, engine: Engine):
122
+ if self.type == BackendTypes.SQLITE:
123
+ engine.dispose()
124
+
125
+ def get_engine(self) -> Engine:
126
+ """Return the SQLAlchemy engine"""
127
+ if self.type == BackendTypes.SQLITE:
128
+ return self._get_engine()
129
+
130
+ if not self._engine:
131
+ self._engine = self._get_engine()
132
+
133
+ return self._engine
134
+
109
135
  #### INIT DATABASE ####
110
136
 
111
137
  def create_all_tables(self):
112
- Base.metadata.create_all(self.engine)
138
+ engine = self.get_engine()
139
+ Base.metadata.create_all(engine)
140
+ self.handle_dispose_sqlite(engine)
113
141
 
114
142
  def drop_all_tables(self):
115
- Base.metadata.drop_all(self.engine)
143
+ engine = self.get_engine()
144
+ Base.metadata.drop_all(engine)
145
+ self.handle_dispose_sqlite(engine)
116
146
 
117
147
  def check_prerequisites(self) -> bool:
118
148
  """Check if the database contains the necessary tables, return True if entities are present
@@ -125,19 +155,23 @@ class SQLAlchemyBackend(AbstractBackend):
125
155
 
126
156
  all_entities_exist = True
127
157
 
158
+ engine = self.get_engine()
159
+
128
160
  # Check if TABLE_STREAM_INFO exists, otherwise create it
129
- if not inspect(self.engine).has_table(TABLE_STREAM_INFO):
161
+ if not inspect(engine).has_table(TABLE_STREAM_INFO):
130
162
  all_entities_exist = False
131
163
  logger.info(f"Table {TABLE_STREAM_INFO} does not exist in the database, we will create it")
132
164
 
133
- if not inspect(self.engine).has_table(TABLE_SOURCE_CURSOR):
165
+ if not inspect(engine).has_table(TABLE_SOURCE_CURSOR):
134
166
  all_entities_exist = False
135
167
  logger.info(f"Table {TABLE_SOURCE_CURSOR} does not exist in the database, we will create it")
136
168
 
137
- if not inspect(self.engine).has_table(TABLE_DESTINATION_CURSOR):
169
+ if not inspect(engine).has_table(TABLE_DESTINATION_CURSOR):
138
170
  all_entities_exist = False
139
171
  logger.info(f"Table {TABLE_DESTINATION_CURSOR} does not exist in the database, we will create it")
140
172
 
173
+ self.handle_dispose_sqlite(engine)
174
+
141
175
  return all_entities_exist
142
176
 
143
177
  def _add_and_commit(self, obj, session: Optional[Session] = None):
@@ -158,7 +192,7 @@ class SQLAlchemyBackend(AbstractBackend):
158
192
  source_name: str,
159
193
  stream_name: str,
160
194
  total_records_to_fetch: Optional[int] = None,
161
- job_status: JobStatus = JobStatus.NOT_STARTED,
195
+ job_status: JobStatus = JobStatus.STARTED,
162
196
  session: Optional[Session] = None,
163
197
  ) -> StreamJob:
164
198
  """Create new StreamJob record in dbt and return its ID"""
@@ -228,7 +262,7 @@ class SQLAlchemyBackend(AbstractBackend):
228
262
  iteration: int,
229
263
  rows_fetched: int,
230
264
  next_pagination: dict,
231
- cursor_status: CursorStatus = CursorStatus.NOT_STARTED,
265
+ cursor_status: CursorStatus = CursorStatus.STARTED,
232
266
  error_message: Optional[str] = None,
233
267
  session: Optional[Session] = None,
234
268
  ) -> str:
@@ -55,14 +55,18 @@ class SQLiteInMemoryConfig(AbstractBackendConfig):
55
55
 
56
56
  ## BIGQUERY ##
57
57
  class BigQueryConfigDetails(SQLAlchemyConfigDetails):
58
+
58
59
  database: str = Field(
59
60
  description="GCP Project name",
60
61
  default=...,
61
62
  )
62
- schema: str = Field(
63
+
64
+ schema_name: str = Field(
63
65
  description="BigQuery Dataset name",
64
66
  default=...,
67
+ alias="schema",
65
68
  )
69
+
66
70
  service_account_key: str = Field(
67
71
  description="Service Account Key JSON string. If empty it will be infered",
68
72
  default="",
@@ -35,7 +35,7 @@ class AbstractBackend(ABC):
35
35
  source_name: str,
36
36
  stream_name: str,
37
37
  total_records_to_fetch: Optional[int] = None,
38
- job_status: JobStatus = JobStatus.NOT_STARTED,
38
+ job_status: JobStatus = JobStatus.STARTED,
39
39
  session: Optional[Session] = None,
40
40
  ) -> StreamJob:
41
41
  """Create new StreamJob record in db and return it"""
@@ -69,7 +69,7 @@ class AbstractBackend(ABC):
69
69
  iteration: int,
70
70
  rows_fetched: int,
71
71
  next_pagination: dict,
72
- cursor_status: CursorStatus = CursorStatus.NOT_STARTED,
72
+ cursor_status: CursorStatus = CursorStatus.STARTED,
73
73
  error_message: Optional[str] = None,
74
74
  session: Optional[Session] = None,
75
75
  ) -> str:
@@ -161,8 +161,10 @@ class BackendFactory:
161
161
 
162
162
  return SQLAlchemyBackend(config=config.config, type=config.type)
163
163
 
164
+ # ONLY FOR UNIT TESTS
164
165
  elif config.type == BackendTypes.SQLITE_IN_MEMORY:
165
166
  from .adapters.sqlalchemy.backend import SQLAlchemyBackend
166
167
 
167
168
  return SQLAlchemyBackend(config=config.config, type=config.type)
169
+
168
170
  raise ValueError(f"Unsupported backend type: {config.type}")
@@ -15,7 +15,7 @@ class BackendTypes(str, Enum):
15
15
 
16
16
  class AbstractBackendConfigDetails(BaseModel, ABC):
17
17
  database: str = Field(..., description="Database name")
18
- schema: str = Field(..., description="Schema name")
18
+ schema_name: str = Field(..., description="Schema name", alias="schema")
19
19
  syncCursorInDBEvery: int = Field(10, description="Number of iterations before syncing the cursor in the database")
20
20
 
21
21
 
@@ -16,7 +16,7 @@ def generate_uuid():
16
16
 
17
17
 
18
18
  class JobStatus(str, Enum):
19
- NOT_STARTED = "not_started"
19
+ STARTED = "started"
20
20
  RUNNING = "running"
21
21
  SUCCESS = "success"
22
22
  FAILED = "failed"
@@ -25,7 +25,7 @@ class JobStatus(str, Enum):
25
25
 
26
26
 
27
27
  class CursorStatus(str, Enum):
28
- NOT_STARTED = "not_started"
28
+ STARTED = "started"
29
29
  PULLING = "pulling"
30
30
  PULLED = "pulled"
31
31
  WRITTEN_IN_DESTINATION = "written_in_destination"
@@ -50,7 +50,7 @@ class StreamJob(Base):
50
50
  DateTime, nullable=False, default=datetime.now(tz=UTC), doc="Timestamp when the job was created"
51
51
  )
52
52
  updated_at = Column(DateTime, nullable=True, default=None, doc="Timestamp when the job was last updated")
53
- status = Column(String(100), default=JobStatus.NOT_STARTED, doc="Status of the job")
53
+ status = Column(String(100), default=JobStatus.STARTED, doc="Status of the job")
54
54
  error_message = Column(String(255), nullable=True, doc="Error message if the job failed", default=None)
55
55
 
56
56
  source_cursor = relationship("SourceCursor", cascade="all, delete")
@@ -71,7 +71,7 @@ class SourceCursor(Base):
71
71
  rows_fetched = Column(Integer, default=0)
72
72
  next_pagination = Column(String, nullable=True)
73
73
  attempt = Column(Integer, default=0, doc="Number of attempts to pull the data for this cursor")
74
- status = Column(String(100), default=CursorStatus.NOT_STARTED, doc="Status of the cursor")
74
+ status = Column(String(100), default=CursorStatus.STARTED, doc="Status of the cursor")
75
75
  error_message = Column(
76
76
  String(500), nullable=True, doc="Error message if pulling failed for this cursor", default=None
77
77
  )
@@ -94,7 +94,7 @@ class DestinationCursor(Base):
94
94
  to_source_iteration = Column(Integer, default=0)
95
95
  rows_written = Column(Integer, default=0)
96
96
  attempt = Column(Integer, default=0, doc="Number of attempts to pull the data for this cursor")
97
- status = Column(String(100), default=CursorStatus.NOT_STARTED, doc="Status of the cursor")
97
+ status = Column(String(100), default=CursorStatus.STARTED, doc="Status of the cursor")
98
98
  error_message = Column(
99
99
  String(500), nullable=True, doc="Error message if pulling failed for this cursor", default=None
100
100
  )
@@ -19,7 +19,7 @@ from .queue.adapters.python_queue.config import (
19
19
  )
20
20
  from .queue.adapters.rabbitmq.config import RabbitMQConfig
21
21
  from .queue.config import QueueTypes
22
- from .runners.config import RunnerConfig, RunnerTypes, ThreadsConfig
22
+ from .runners.config import RunnerConfig, RunnerFuturesConfig, RunnerTypes
23
23
 
24
24
 
25
25
  class EngineConfig(BaseModel):
@@ -34,10 +34,10 @@ class EngineConfig(BaseModel):
34
34
  BigQuerySQLAlchemyConfig,
35
35
  ] = Field(
36
36
  description="Configuration for the backend",
37
- default=SQLiteInMemoryConfig(
38
- type=BackendTypes.SQLITE_IN_MEMORY,
37
+ default=SQLiteSQLAlchemyConfig(
38
+ type=BackendTypes.SQLITE,
39
39
  config=SQLiteConfigDetails(
40
- database="NOT_USED_IN_SQLITE",
40
+ database="bizon",
41
41
  schema="NOT_USED_IN_SQLITE",
42
42
  ),
43
43
  syncCursorInDBEvery=2,
@@ -54,7 +54,7 @@ class EngineConfig(BaseModel):
54
54
  default=PythonQueueConfig(
55
55
  type=QueueTypes.PYTHON_QUEUE,
56
56
  config=PythonQueueConfigDetails(
57
- queue=PythonQueueQueueConfig(max_size=1000),
57
+ queue=PythonQueueQueueConfig(max_size=0),
58
58
  consumer=PythonQueueConsumerConfig(poll_interval=2),
59
59
  ),
60
60
  ),
@@ -64,10 +64,8 @@ class EngineConfig(BaseModel):
64
64
  runner: RunnerConfig = Field(
65
65
  description="Runner to use for the pipeline",
66
66
  default=RunnerConfig(
67
- type=RunnerTypes.THREADS,
68
- config=ThreadsConfig(
69
- consumer_start_delay=2,
70
- ),
67
+ type=RunnerTypes.THREAD,
68
+ config=RunnerFuturesConfig(),
71
69
  log_level="INFO",
72
70
  ),
73
71
  )
@@ -0,0 +1,31 @@
1
+ from loguru import logger
2
+
3
+ from bizon.cli.utils import parse_from_yaml
4
+ from bizon.common.models import BizonConfig
5
+
6
+ from .config import RunnerTypes
7
+ from .runners.runner import AbstractRunner
8
+
9
+
10
+ class RunnerFactory:
11
+ @staticmethod
12
+ def create_from_config_dict(config: dict) -> AbstractRunner:
13
+
14
+ bizon_config = BizonConfig.model_validate(obj=config)
15
+
16
+ if bizon_config.engine.runner.type == RunnerTypes.THREAD:
17
+ from .runners.adapters.thread import ThreadRunner
18
+
19
+ return ThreadRunner(config=config)
20
+
21
+ if bizon_config.engine.runner.type == RunnerTypes.PROCESS:
22
+ from .runners.adapters.process import ProcessRunner
23
+
24
+ return ProcessRunner(config=config)
25
+
26
+ raise ValueError(f"Runner type {bizon_config.engine.runner.type} is not supported")
27
+
28
+ @staticmethod
29
+ def create_from_yaml(filepath: str) -> AbstractRunner:
30
+ config = parse_from_yaml(filepath)
31
+ return RunnerFactory.create_from_config_dict(config)
@@ -0,0 +1,15 @@
1
+ from abc import ABC, abstractmethod
2
+ from enum import Enum
3
+
4
+ from bizon.destinations.destination import AbstractDestination
5
+ from bizon.engine.queue.config import AbstractQueueConfig
6
+
7
+
8
+ class AbstractQueueConsumer(ABC):
9
+ def __init__(self, config: AbstractQueueConfig, destination: AbstractDestination):
10
+ self.config = config
11
+ self.destination = destination
12
+
13
+ @abstractmethod
14
+ def run(self):
15
+ pass
@@ -0,0 +1,10 @@
1
+ from enum import Enum
2
+
3
+
4
+ class PipelineReturnStatus(Enum):
5
+ """Producer error types"""
6
+
7
+ SUCCESS = "success"
8
+ QUEUE_ERROR = "queue_error"
9
+ SOURCE_ERROR = "source_error"
10
+ BACKEND_ERROR = "backend_error"