fabricks 3.0.5.2__py3-none-any.whl → 3.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. fabricks/api/__init__.py +2 -0
  2. fabricks/api/context.py +1 -2
  3. fabricks/api/deploy.py +3 -0
  4. fabricks/api/job_schema.py +2 -2
  5. fabricks/api/masks.py +3 -0
  6. fabricks/api/notebooks/initialize.py +2 -2
  7. fabricks/api/notebooks/process.py +2 -2
  8. fabricks/api/notebooks/run.py +2 -2
  9. fabricks/api/notebooks/schedule.py +75 -0
  10. fabricks/api/notebooks/terminate.py +2 -2
  11. fabricks/api/schedules.py +2 -16
  12. fabricks/cdc/__init__.py +2 -2
  13. fabricks/cdc/base/__init__.py +2 -2
  14. fabricks/cdc/base/_types.py +9 -2
  15. fabricks/cdc/base/configurator.py +86 -41
  16. fabricks/cdc/base/generator.py +44 -35
  17. fabricks/cdc/base/merger.py +16 -14
  18. fabricks/cdc/base/processor.py +232 -144
  19. fabricks/cdc/nocdc.py +8 -7
  20. fabricks/cdc/templates/{query → ctes}/base.sql.jinja +7 -6
  21. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  22. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  23. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  24. fabricks/cdc/templates/{query → ctes}/rectify.sql.jinja +4 -22
  25. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  26. fabricks/cdc/templates/filter.sql.jinja +4 -4
  27. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  28. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  29. fabricks/cdc/templates/merge.sql.jinja +3 -2
  30. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  31. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  32. fabricks/cdc/templates/{query/nocdc.sql.jinja → queries/nocdc/complete.sql.jinja} +1 -1
  33. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +35 -0
  34. fabricks/cdc/templates/{query → queries}/scd1.sql.jinja +2 -28
  35. fabricks/cdc/templates/{query → queries}/scd2.sql.jinja +29 -48
  36. fabricks/cdc/templates/query.sql.jinja +15 -11
  37. fabricks/context/__init__.py +18 -4
  38. fabricks/context/_types.py +2 -0
  39. fabricks/context/config/__init__.py +92 -0
  40. fabricks/context/config/utils.py +53 -0
  41. fabricks/context/log.py +8 -2
  42. fabricks/context/runtime.py +87 -263
  43. fabricks/context/secret.py +1 -1
  44. fabricks/context/spark_session.py +1 -1
  45. fabricks/context/utils.py +80 -0
  46. fabricks/core/dags/generator.py +6 -7
  47. fabricks/core/dags/log.py +2 -15
  48. fabricks/core/dags/processor.py +11 -11
  49. fabricks/core/dags/utils.py +15 -1
  50. fabricks/core/{scripts/job_schema.py → job_schema.py} +4 -0
  51. fabricks/core/jobs/base/_types.py +64 -22
  52. fabricks/core/jobs/base/checker.py +13 -12
  53. fabricks/core/jobs/base/configurator.py +41 -67
  54. fabricks/core/jobs/base/generator.py +55 -24
  55. fabricks/core/jobs/base/invoker.py +54 -30
  56. fabricks/core/jobs/base/processor.py +43 -26
  57. fabricks/core/jobs/bronze.py +45 -38
  58. fabricks/core/jobs/get_jobs.py +2 -2
  59. fabricks/core/jobs/get_schedule.py +10 -0
  60. fabricks/core/jobs/get_schedules.py +32 -0
  61. fabricks/core/jobs/gold.py +61 -48
  62. fabricks/core/jobs/silver.py +39 -40
  63. fabricks/core/masks.py +52 -0
  64. fabricks/core/parsers/base.py +2 -2
  65. fabricks/core/schedules/__init__.py +14 -0
  66. fabricks/core/schedules/diagrams.py +46 -0
  67. fabricks/core/schedules/get_schedule.py +5 -0
  68. fabricks/core/schedules/get_schedules.py +9 -0
  69. fabricks/core/schedules/run.py +3 -0
  70. fabricks/core/schedules/views.py +61 -0
  71. fabricks/core/steps/base.py +110 -72
  72. fabricks/core/udfs.py +12 -23
  73. fabricks/core/views.py +20 -13
  74. fabricks/deploy/__init__.py +97 -0
  75. fabricks/deploy/masks.py +8 -0
  76. fabricks/deploy/notebooks.py +71 -0
  77. fabricks/deploy/schedules.py +8 -0
  78. fabricks/{core/deploy → deploy}/tables.py +16 -13
  79. fabricks/{core/deploy → deploy}/udfs.py +3 -1
  80. fabricks/deploy/utils.py +36 -0
  81. fabricks/{core/deploy → deploy}/views.py +5 -9
  82. fabricks/metastore/database.py +3 -3
  83. fabricks/metastore/dbobject.py +4 -4
  84. fabricks/metastore/table.py +157 -88
  85. fabricks/metastore/view.py +13 -6
  86. fabricks/utils/_types.py +6 -0
  87. fabricks/utils/azure_table.py +4 -3
  88. fabricks/utils/helpers.py +141 -11
  89. fabricks/utils/log.py +29 -18
  90. fabricks/utils/read/_types.py +1 -1
  91. fabricks/utils/schema/get_schema_for_type.py +6 -0
  92. fabricks/utils/write/delta.py +3 -3
  93. {fabricks-3.0.5.2.dist-info → fabricks-3.0.7.dist-info}/METADATA +2 -1
  94. fabricks-3.0.7.dist-info/RECORD +175 -0
  95. fabricks/api/notebooks/add_fabricks.py +0 -13
  96. fabricks/api/notebooks/optimize.py +0 -29
  97. fabricks/api/notebooks/vacuum.py +0 -29
  98. fabricks/cdc/templates/query/context.sql.jinja +0 -101
  99. fabricks/cdc/templates/query/current.sql.jinja +0 -32
  100. fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +0 -21
  101. fabricks/cdc/templates/query/deduplicate_key.sql.jinja +0 -14
  102. fabricks/cdc/templates/query/hash.sql.jinja +0 -1
  103. fabricks/cdc/templates/query/slice.sql.jinja +0 -14
  104. fabricks/config/__init__.py +0 -0
  105. fabricks/config/base.py +0 -8
  106. fabricks/config/fabricks/__init__.py +0 -26
  107. fabricks/config/fabricks/base.py +0 -90
  108. fabricks/config/fabricks/environment.py +0 -9
  109. fabricks/config/fabricks/pyproject.py +0 -47
  110. fabricks/config/jobs/__init__.py +0 -6
  111. fabricks/config/jobs/base.py +0 -101
  112. fabricks/config/jobs/bronze.py +0 -38
  113. fabricks/config/jobs/gold.py +0 -27
  114. fabricks/config/jobs/silver.py +0 -22
  115. fabricks/config/runtime.py +0 -67
  116. fabricks/config/steps/__init__.py +0 -6
  117. fabricks/config/steps/base.py +0 -50
  118. fabricks/config/steps/bronze.py +0 -7
  119. fabricks/config/steps/gold.py +0 -14
  120. fabricks/config/steps/silver.py +0 -15
  121. fabricks/core/deploy/__init__.py +0 -17
  122. fabricks/core/schedules.py +0 -142
  123. fabricks/core/scripts/__init__.py +0 -9
  124. fabricks/core/scripts/armageddon.py +0 -87
  125. fabricks/core/scripts/stats.py +0 -51
  126. fabricks/core/scripts/steps.py +0 -26
  127. fabricks-3.0.5.2.dist-info/RECORD +0 -177
  128. /fabricks/cdc/templates/{filter → filters}/final.sql.jinja +0 -0
  129. /fabricks/cdc/templates/{filter → filters}/latest.sql.jinja +0 -0
  130. /fabricks/cdc/templates/{filter → filters}/update.sql.jinja +0 -0
  131. /fabricks/cdc/templates/{merge → merges}/scd1.sql.jinja +0 -0
  132. /fabricks/cdc/templates/{merge → merges}/scd2.sql.jinja +0 -0
  133. /fabricks/cdc/templates/{query → queries}/__init__.py +0 -0
  134. /fabricks/cdc/templates/{query → queries}/final.sql.jinja +0 -0
  135. /fabricks/core/{utils.py → parsers/utils.py} +0 -0
  136. /fabricks/core/{scripts → schedules}/generate.py +0 -0
  137. /fabricks/core/{scripts → schedules}/process.py +0 -0
  138. /fabricks/core/{scripts → schedules}/terminate.py +0 -0
  139. {fabricks-3.0.5.2.dist-info → fabricks-3.0.7.dist-info}/WHEEL +0 -0
@@ -1,29 +0,0 @@
1
- # Databricks notebook source
2
- # MAGIC %run ./add_fabricks
3
-
4
- # COMMAND ----------
5
-
6
- from databricks.sdk.runtime import dbutils
7
- from pyspark.errors.exceptions.base import IllegalArgumentException
8
-
9
- from fabricks.core.scripts import optimize
10
-
11
- # COMMAND ----------
12
-
13
- dbutils.widgets.text("schedule_id", "---")
14
-
15
- # COMMAND ----------
16
-
17
- try:
18
- schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
19
- except (TypeError, IllegalArgumentException, ValueError):
20
- schedule_id = dbutils.widgets.get("schedule_id")
21
- schedule_id = None if schedule_id == "---" else schedule_id
22
-
23
- # COMMAND ----------
24
-
25
- optimize(schedule_id=schedule_id)
26
-
27
- # COMMAND ----------
28
-
29
- dbutils.notebook.exit(value="exit (0)") # type: ignore
@@ -1,29 +0,0 @@
1
- # Databricks notebook source
2
- # MAGIC %run ./add_fabricks
3
-
4
- # COMMAND ----------
5
-
6
- from databricks.sdk.runtime import dbutils
7
- from pyspark.errors.exceptions.base import IllegalArgumentException
8
-
9
- from fabricks.core.scripts import vacuum
10
-
11
- # COMMAND ----------
12
-
13
- dbutils.widgets.text("schedule_id", "---")
14
-
15
- # COMMAND ----------
16
-
17
- try:
18
- schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
19
- except (TypeError, IllegalArgumentException, ValueError):
20
- schedule_id = dbutils.widgets.get("schedule_id")
21
- schedule_id = None if schedule_id == "---" else schedule_id
22
-
23
- # COMMAND ----------
24
-
25
- vacuum(schedule_id=schedule_id)
26
-
27
- # COMMAND ----------
28
-
29
- dbutils.notebook.exit(value="exit (0)") # type: ignore
@@ -1,101 +0,0 @@
1
- /*
2
- ⛷️🧀🍫🏔️
3
-
4
- 👀🏁
5
- {%- if format %}
6
- ☐ format: {{format}}{% endif %}
7
- {%- if tgt %}
8
- ☐ tgt: {{tgt}}{% endif %}
9
- {%- if cdc %}
10
- ☐ cdc: {{cdc}}{% endif %}
11
- {%- if mode %}
12
- ☐ mode: {{mode}}{% endif %}
13
- {%- if slice %}
14
- 🗹 slice: {{slice}}{% endif %}
15
- {%- if slices %}
16
- ☐ slices: {{slices}}{% endif %}
17
- {%- if rectify %}
18
- 🗹 rectify: {{rectify}}{% endif %}
19
- {%- if deduplicate %}
20
- 🗹 deduplicate: {{deduplicate}}{% endif %}
21
- {%- if deduplicate_key %}
22
- 🗹 deduplicate_key: {{deduplicate_key}}{% endif %}
23
- {%- if deduplicate_hash %}
24
- 🗹 deduplicate_hash: {{deduplicate_hash}}{% endif %}
25
- {%- if soft_delete %}
26
- 🗹 soft_delete: {{soft_delete}}{% endif %}
27
- {%- if correct_valid_from %}
28
- 🗹 correct_valid_from: {{correct_valid_from}}{% endif %}
29
- {%- if has_data %}
30
- 🗹 has_data: {{has_data}}{% else %}☒ has_data: {{has_data}}{% endif %}
31
- {%- if has_rows %}
32
- 🗹 has_rows: {{has_rows}}{% endif %}
33
- {%- if has_source %}
34
- 🗹 has_source: {{has_source}}{% endif %}
35
- {%- if sources %}
36
- 🗹 sources: {{sources}}{% endif %}
37
- {%- if has_metadata %}
38
- 🗹 has_metadata: {{has_metadata}}{% endif %}
39
- {%- if has_timestamp %}
40
- 🗹 has_timestamp: {{has_timestamp}}{% endif %}
41
- {%- if has_identity %}
42
- 🗹 has_identity: {{has_identity}}{% endif %}
43
- {%- if has_key %}
44
- 🗹 has_key: {{has_key}}{% endif %}
45
- {%- if has_hash %}
46
- 🗹 has_hash: {{has_hash}}{% endif %}
47
- {%- if has_order_by %}
48
- 🗹 has_order_by: {{has_order_by}}{% endif %}
49
- {%- if has_rescued_data %}
50
- 🗹 has_rescued_data: {{has_rescued_data}}{% endif %}
51
- {%- if add_metadata %}
52
- 🗹 add_metadata: {{add_metadata}}{% endif %}
53
- {%- if add_timestamp %}
54
- 🗹 add_timestamp: {{add_timestamp}}{% endif %}
55
- {%- if add_key %}
56
- 🗹 add_key: {{add_key}}{% endif %}
57
- {%- if add_hash %}
58
- 🗹 add_hash: {{add_hash}}{% endif %}
59
- {%- if add_operation %}
60
- ☐ add_operation: {{add_operation}}{% endif %}
61
- {%- if add_source %}
62
- ☐ add_source: {{add_source}}{% endif %}
63
- {%- if add_calculated_columns %}
64
- ☐ add_calculated_columns: {{add_calculated_columns}}{% endif %}
65
- {%- if order_duplicate_by %}
66
- 🗹 order_duplicate_by: {{order_duplicate_by}}{% endif %}
67
- {%- if all_except %}
68
- ☐ all_except: {{all_except}}{% endif %}
69
- {%- if all_overwrite %}
70
- ☐ all_overwrite: {{all_overwrite}}{% endif %}
71
- {%- if filter_where %}
72
- ☐ filter_where: {{filter_where}}{% endif %}
73
- {%- if update_where %}
74
- ☐ update_where: {{update_where}}{% endif %}
75
- {%- if parent_slice %}
76
- ☐ parent_slice: {{parent_slice}}{% endif %}
77
- {%- if parent_rectify %}
78
- ☐ parent_rectify: {{parent_rectify}}{% endif %}
79
- {%- if parent_deduplicate_key %}
80
- ☐ parent_deduplicate_key: {{parent_deduplicate_key}}{% endif %}
81
- {%- if parent_deduplicate_hash %}
82
- ☐ parent_deduplicate_hash: {{parent_deduplicate_hash}}{% endif %}
83
- {%- if parent_cdc %}
84
- ☐ parent_cdc: {{parent_cdc}}{% endif %}
85
- {%- if parent_final %}
86
- ☐ parent_final: {{parent_final}}{% endif %}
87
- 👀🏳️
88
-
89
- 👁️🏁
90
- {%- if src %}
91
- ☐ src: {{src}}{% endif %}
92
- {%- if fields %}
93
- ☐ fields: {{fields}}{% endif %}
94
- {%- if keys %}
95
- ☐ keys: {{keys}}{% endif %}
96
- {%- if hashes %}
97
- ☐ hashes: {{hashes}}{% endif %}
98
- 👁️🏳️
99
-
100
- */
101
-
@@ -1,32 +0,0 @@
1
- {% import 'query/hash.sql.jinja' as h -%}
2
-
3
- __current as (
4
- select
5
- {% for field in fields %} {{ field }}, {% endfor %}
6
- {% if has_data %} 'current'
7
- {% else %} 'delete'
8
- {% endif %} as __operation,
9
- {% if has_timestamp %}
10
- {% if cdc == "nocdc" %} __timestamp as __timestamp, {% endif %}
11
- {% if cdc == "scd1" %} __timestamp as __timestamp, {% endif %}
12
- {% if cdc == "scd2" %} __valid_from as __timestamp, {% endif %}
13
- {% else %} cast('0001-01-01' as timestamp) as __timestamp,
14
- {% endif %}
15
- {% if has_hash %} __hash,
16
- {% else %} {{ h.hash(fields=hashes) }} as __hash,
17
- {% endif %}
18
- {% if has_identity %} __identity, {% endif %}
19
- {% if has_key %} __key,
20
- {% else %} {{ h.hash(fields=keys) }} as __key,
21
- {% endif %}
22
- {% if has_source %} __source, {% endif %}
23
- {% if has_metadata %} __metadata, {% endif %}
24
- {% if has_rescued_data %} __rescued_data, {% endif %}
25
- from {{ tgt }} t
26
- where
27
- true
28
- {% if cdc == "scd2" %} and __is_current {% endif %}
29
- {% if cdc == "scd1" %} {% if soft_delete %} and __is_current {% endif %} {% endif %}
30
- {% if sources %} and ({{ sources }}) {% endif %}
31
- {% if update_where %} and {{ update_where }} {% endif %}
32
- ),
@@ -1,21 +0,0 @@
1
- __deduplicate_hash as (
2
- select
3
- *,
4
- lag(__hash) over (
5
- partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
6
- ) as __deduplicate_hash_previous__hash,
7
- lag(__operation) over (
8
- partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
9
- ) as __deduplicate_hash_previous_operation
10
- from {{ parent_deduplicate_hash }}
11
- where true
12
- ),
13
- __deduplicated_hash as (
14
- select *
15
- from __deduplicate_hash
16
- where
17
- true
18
- and not (
19
- __hash <=> __deduplicate_hash_previous__hash and __operation <=> __deduplicate_hash_previous_operation
20
- )
21
- ),
@@ -1,14 +0,0 @@
1
- __deduplicate_key as (
2
- select
3
- *,
4
- row_number() over (
5
- partition by {% if has_source %} __source, {% endif %} __key, __timestamp
6
- order by
7
- /* prioritize delete over upsert */
8
- __operation asc,
9
- {% if has_order_by %} {% for o in order_duplicate_by %} {{ o }}, {% endfor %} {% endif %}
10
- ) as __deduplicate_key_rn
11
- from {{ parent_deduplicate_key }}
12
- where true
13
- ),
14
- __deduplicated_key as (select *, from __deduplicate_key where __deduplicate_key_rn == 1),
@@ -1 +0,0 @@
1
- {% macro hash(fields) -%} md5(array_join(array({% for f in fields %}{{ f }}, {% endfor %}), '*', '-1')) {%- endmacro %}
@@ -1,14 +0,0 @@
1
- __sliced as (
2
- select
3
- {% for field in fields %} {{ field }}, {% endfor %}
4
- s.__operation,
5
- s.__timestamp,
6
- s.__hash,
7
- s.__key,
8
- {% if has_identity %} s.__identity, {% endif %}
9
- {% if has_source %} s.__source, {% endif %}
10
- {% if has_metadata %} s.__metadata, {% endif %}
11
- {% if has_rescued_data %} __rescued_data, {% endif %}
12
- from {{ parent_slice }} s
13
- where true and ({{ slices }})
14
- ),
File without changes
fabricks/config/base.py DELETED
@@ -1,8 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from pydantic import BaseModel, ConfigDict
4
-
5
-
6
- class ModelBase(BaseModel):
7
- # Ignore extra/unknown fields (TypedDict-like strictness)
8
- model_config = ConfigDict(extra="ignore")
@@ -1,26 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from pydantic_settings import BaseSettings
4
-
5
- from fabricks.config.fabricks.base import BaseConfig
6
- from fabricks.config.fabricks.environment import EnvironmentConfig
7
- from fabricks.config.fabricks.pyproject import PyprojectConfig
8
-
9
-
10
- class FabricksConfig(BaseConfig, BaseSettings):
11
- @classmethod
12
- def load(cls) -> FabricksConfig:
13
- pyproject = PyprojectConfig.load()
14
- environ = EnvironmentConfig() # type: ignore
15
-
16
- data = {}
17
-
18
- if pyproject:
19
- dump = pyproject.model_dump(exclude_none=True)
20
- data.update(dump)
21
-
22
- # Override with environment settings
23
- dump = environ.model_dump(exclude_none=True)
24
- data.update(dump)
25
-
26
- return cls(**data)
@@ -1,90 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from pathlib import Path
4
- from typing import Optional
5
-
6
- from pydantic import Field, field_validator
7
-
8
- from fabricks.config.base import ModelBase
9
-
10
-
11
- class BaseConfig(ModelBase):
12
- root: Optional[Path] = Field(None, description="Root directory")
13
-
14
- runtime: Optional[str] = Field(None, description="Runtime path")
15
- notebooks: Optional[str] = Field(None, description="Notebooks path")
16
- is_job_config_from_yaml: Optional[bool] = Field(None, description="Load job config from YAML")
17
- is_debugmode: Optional[bool] = Field(None, description="Enable debug mode")
18
- loglevel: Optional[str] = Field(None, description="Logging level")
19
- config: Optional[str] = Field(None, description="Config file path")
20
-
21
- @field_validator("runtime", "notebooks", "config", mode="before")
22
- @classmethod
23
- def handle_none(cls, v):
24
- if isinstance(v, str) and v.lower() == "none":
25
- return None
26
-
27
- return v
28
-
29
- @field_validator("is_job_config_from_yaml", "is_debugmode", mode="before")
30
- @classmethod
31
- def handle_bool(cls, v):
32
- if v is None:
33
- return None
34
-
35
- if isinstance(v, str):
36
- return v.lower() in ("true", "1", "yes")
37
-
38
- return bool(v)
39
-
40
- @field_validator("loglevel", mode="before")
41
- @classmethod
42
- def handle_loglevel(cls, v):
43
- if v is None:
44
- return None
45
-
46
- if v.upper() not in {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}:
47
- raise ValueError(f"{v.upper()} not allowed. Use DEBUG, INFO, WARNING, ERROR or CRITICAL")
48
-
49
- return v.upper()
50
-
51
- def resolve_runtime_path(self) -> Path:
52
- runtime = self.runtime
53
-
54
- # Use environment/explicit setting if available
55
- if runtime is not None:
56
- return Path(runtime)
57
-
58
- # Fall back to pyproject.toml location
59
- if self.root is not None:
60
- return self.root
61
-
62
- # Final fallback
63
- raise ValueError("No pyproject.toml nor FABRICKS_RUNTIME")
64
-
65
- def resolve_notebooks_path(self) -> Path:
66
- notebooks = self.notebooks
67
- runtime = self.resolve_runtime_path()
68
-
69
- if notebooks is not None:
70
- if self.root is not None and not Path(notebooks).is_absolute():
71
- return self.root.joinpath(notebooks)
72
-
73
- return Path(notebooks)
74
-
75
- # Default to runtime/notebooks
76
- return runtime.joinpath("notebooks")
77
-
78
- def resolve_config_path(self, cluster_id: Optional[str] = None) -> Path:
79
- config = self.config
80
- runtime = self.resolve_runtime_path()
81
-
82
- if config is not None:
83
- if self.root is not None and not Path(config).is_absolute():
84
- return self.root.joinpath(config)
85
-
86
- return Path(config)
87
-
88
- # default to fabricks/conf.yml
89
- assert cluster_id is not None
90
- return runtime.joinpath(f"fabricks/conf.{cluster_id}.yml")
@@ -1,9 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from pydantic_settings import BaseSettings, SettingsConfigDict
4
-
5
- from fabricks.config.fabricks.base import BaseConfig
6
-
7
-
8
- class EnvironmentConfig(BaseConfig, BaseSettings):
9
- model_config = SettingsConfigDict(env_prefix="FABRICKS_", case_sensitive=False, extra="ignore")
@@ -1,47 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import tomllib # type: ignore
5
- from pathlib import Path
6
- from typing import Optional
7
-
8
- from pydantic import Field
9
- from pydantic_settings import BaseSettings, SettingsConfigDict
10
-
11
- from fabricks.config.fabricks.base import BaseConfig
12
-
13
-
14
- class PyprojectConfig(BaseConfig, BaseSettings):
15
- model_config = SettingsConfigDict(
16
- env_prefix="",
17
- case_sensitive=False,
18
- extra="ignore",
19
- )
20
-
21
- is_job_config_from_yaml: Optional[bool] = Field(None, alias="job_config_from_yaml")
22
- is_debugmode: Optional[bool] = Field(None, alias="debugmode")
23
-
24
- @classmethod
25
- def _from_pyproject(cls, root: Path) -> PyprojectConfig:
26
- path = root / "pyproject.toml"
27
- if not path.exists():
28
- return cls() # type: ignore
29
-
30
- with open(path, "rb") as f:
31
- pyproject = tomllib.load(f)
32
- config = pyproject.get("tool", {}).get("fabricks", {})
33
-
34
- return cls(**config, root=root)
35
-
36
- @classmethod
37
- def load(cls) -> PyprojectConfig:
38
- path = Path(os.getcwd())
39
-
40
- while path is not None:
41
- if (path / "pyproject.toml").exists():
42
- break
43
- if path == path.parent:
44
- break
45
- path = path.parent
46
-
47
- return cls._from_pyproject(path)
@@ -1,6 +0,0 @@
1
- from fabricks.config.jobs.base import BaseJobConfig
2
- from fabricks.config.jobs.bronze import BronzeJobConfig
3
- from fabricks.config.jobs.gold import GoldJobConfig
4
- from fabricks.config.jobs.silver import SilverJobConfig
5
-
6
- __all__ = ["BaseJobConfig", "BronzeJobConfig", "SilverJobConfig", "GoldJobConfig"]
@@ -1,101 +0,0 @@
1
- from typing import List, Literal, Optional
2
-
3
- from fabricks.config.base import ModelBase
4
-
5
- FileFormats = Literal["json_array", "json", "jsonl", "csv", "parquet", "delta"]
6
- Operations = Literal["upsert", "reload", "delete"]
7
- Types = Literal["manual", "default"]
8
- Origins = Literal["parser", "job"]
9
- ChangeDataCaptures = Literal["none", "nocdc", "scd1", "scd2"]
10
- Modes = Literal[
11
- "memory",
12
- "append",
13
- "complete",
14
- "update",
15
- "invoke",
16
- "memory",
17
- "append",
18
- "register",
19
- "memory",
20
- "append",
21
- "latest",
22
- "update",
23
- "combine",
24
- ]
25
- Steps = Literal["bronze", "silver", "gold"]
26
-
27
-
28
- class SparkOptions(ModelBase):
29
- sql: Optional[dict[str, str]] = None
30
- conf: Optional[dict[str, str]] = None
31
-
32
-
33
- class TableOptions(ModelBase):
34
- identity: Optional[bool] = None
35
- liquid_clustering: Optional[bool] = None
36
- partition_by: Optional[List[str]] = None
37
- zorder_by: Optional[List[str]] = None
38
- cluster_by: Optional[List[str]] = None
39
- powerbi: Optional[bool] = None
40
- bloomfilter_by: Optional[List[str]] = None
41
- constraints: Optional[dict[str, str]] = None
42
- properties: Optional[dict[str, str]] = None
43
- comment: Optional[str] = None
44
- calculated_columns: Optional[dict[str, str]] = None
45
- retention_days: Optional[int] = None
46
-
47
-
48
- class InvokeOptions(ModelBase):
49
- notebook: str
50
- timeout: int
51
- arguments: Optional[dict[str, str]] = None
52
-
53
-
54
- class InvokerOptions(ModelBase):
55
- pre_run: Optional[List[InvokeOptions]] = None
56
- run: Optional[List[InvokeOptions]] = None
57
- post_run: Optional[List[InvokeOptions]] = None
58
-
59
-
60
- class ExtenderOptions(ModelBase):
61
- extender: str
62
- arguments: Optional[dict[str, str]] = None
63
-
64
-
65
- class CheckOptions(ModelBase):
66
- skip: Optional[bool] = None
67
- pre_run: Optional[bool] = None
68
- post_run: Optional[bool] = None
69
- min_rows: Optional[int] = None
70
- max_rows: Optional[int] = None
71
- count_must_equal: Optional[str] = None
72
-
73
-
74
- class DefaultOptions(ModelBase):
75
- type: Optional[Types] = None
76
- mode: Modes
77
- change_data_capture: Optional[ChangeDataCaptures]
78
- # extra
79
- parents: Optional[List[str]] = None
80
- filter_where: Optional[str] = None
81
- timeout: Optional[int] = None
82
-
83
-
84
- class BaseJobConfig(ModelBase):
85
- job_id: str
86
-
87
- extend: Steps
88
- step: Steps
89
-
90
- topic: str
91
- item: str
92
-
93
- options: Optional[DefaultOptions] = None
94
- table_options: Optional[TableOptions] = None
95
- check_options: Optional[CheckOptions] = None
96
- spark_options: Optional[SparkOptions] = None
97
- invoker_options: Optional[InvokerOptions] = None
98
- extender_options: Optional[List[ExtenderOptions]] = None
99
-
100
- tags: Optional[List[str]] = None
101
- comment: Optional[str] = None
@@ -1,38 +0,0 @@
1
- from typing import List, Literal, Optional
2
-
3
- from fabricks.config.base import ModelBase
4
- from fabricks.config.jobs.base import BaseJobConfig, ChangeDataCaptures, DefaultOptions, Operations
5
-
6
- BronzeModes = Literal["memory", "append", "register"]
7
-
8
-
9
- class ParserOptions(ModelBase):
10
- file_format: Optional[str]
11
- read_options: Optional[dict[str, str]]
12
-
13
-
14
- class BronzeOptions(DefaultOptions):
15
- # default
16
- mode: BronzeModes
17
- change_data_capture: ChangeDataCaptures = "none"
18
-
19
- # mandatory
20
- uri: str
21
- parser: str
22
- source: str
23
-
24
- # preferred
25
- keys: Optional[List[str]] = None
26
-
27
- # optional
28
- encrypted_columns: Optional[List[str]] = None
29
- calculated_columns: Optional[dict[str, str]] = None
30
- operation: Optional[Operations] = None
31
-
32
-
33
- class BronzeJobConfig(BaseJobConfig):
34
- extend: Literal["bronze"] = "bronze"
35
- step: Literal["bronze"]
36
-
37
- options: Optional[BronzeOptions] = None
38
- parser_options: Optional[ParserOptions] = None
@@ -1,27 +0,0 @@
1
- from typing import Literal, Optional
2
-
3
- from fabricks.config.jobs.base import BaseJobConfig, DefaultOptions
4
-
5
- GoldModes = Literal["memory", "append", "complete", "update", "invoke"]
6
-
7
-
8
- class GoldOptions(DefaultOptions):
9
- # default
10
- mode: GoldModes
11
-
12
- # optional
13
- update_where: Optional[str] = None
14
- deduplicate: Optional[bool] = None # remove duplicates on the keys and on the hash
15
- rectify_as_upserts: Optional[bool] = None # convert reloads into upserts and deletes
16
- correct_valid_from: Optional[bool] = None
17
- persist_last_timestamp: Optional[bool] = None
18
- table: Optional[str] = None
19
- notebook: Optional[bool] = None
20
- requirements: Optional[bool] = None
21
-
22
-
23
- class GoldJobConfig(BaseJobConfig):
24
- extend: Literal["gold"] = "gold"
25
- step: Literal["gold"]
26
-
27
- options: Optional[GoldOptions] = None
@@ -1,22 +0,0 @@
1
- from typing import Literal, Optional
2
-
3
- from fabricks.config.jobs.base import BaseJobConfig, DefaultOptions
4
-
5
- SilverModes = Literal["memory", "append", "latest", "update", "combine"]
6
-
7
-
8
- class SilverOptions(DefaultOptions):
9
- # default
10
- mode: SilverModes
11
-
12
- # optional
13
- deduplicate: Optional[bool] = None
14
- stream: Optional[bool] = None
15
- order_duplicate_by: Optional[dict[str, str]] = None
16
-
17
-
18
- class SilverJobConfig(BaseJobConfig):
19
- extend: Literal["silver"] = "silver"
20
- step: Literal["silver"]
21
-
22
- options: Optional[SilverOptions] = None