fabricks 3.0.5.2__py3-none-any.whl → 3.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/api/__init__.py +2 -0
- fabricks/api/context.py +1 -2
- fabricks/api/deploy.py +3 -0
- fabricks/api/job_schema.py +2 -2
- fabricks/api/masks.py +3 -0
- fabricks/api/notebooks/initialize.py +2 -2
- fabricks/api/notebooks/process.py +2 -2
- fabricks/api/notebooks/run.py +2 -2
- fabricks/api/notebooks/schedule.py +75 -0
- fabricks/api/notebooks/terminate.py +2 -2
- fabricks/api/schedules.py +2 -16
- fabricks/cdc/__init__.py +2 -2
- fabricks/cdc/base/__init__.py +2 -2
- fabricks/cdc/base/_types.py +9 -2
- fabricks/cdc/base/configurator.py +86 -41
- fabricks/cdc/base/generator.py +44 -35
- fabricks/cdc/base/merger.py +16 -14
- fabricks/cdc/base/processor.py +232 -144
- fabricks/cdc/nocdc.py +8 -7
- fabricks/cdc/templates/{query → ctes}/base.sql.jinja +7 -6
- fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
- fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
- fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
- fabricks/cdc/templates/{query → ctes}/rectify.sql.jinja +4 -22
- fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
- fabricks/cdc/templates/filter.sql.jinja +4 -4
- fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
- fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
- fabricks/cdc/templates/merge.sql.jinja +3 -2
- fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
- fabricks/cdc/templates/queries/context.sql.jinja +186 -0
- fabricks/cdc/templates/{query/nocdc.sql.jinja → queries/nocdc/complete.sql.jinja} +1 -1
- fabricks/cdc/templates/queries/nocdc/update.sql.jinja +35 -0
- fabricks/cdc/templates/{query → queries}/scd1.sql.jinja +2 -28
- fabricks/cdc/templates/{query → queries}/scd2.sql.jinja +29 -48
- fabricks/cdc/templates/query.sql.jinja +15 -11
- fabricks/context/__init__.py +18 -4
- fabricks/context/_types.py +2 -0
- fabricks/context/config/__init__.py +92 -0
- fabricks/context/config/utils.py +53 -0
- fabricks/context/log.py +8 -2
- fabricks/context/runtime.py +87 -263
- fabricks/context/secret.py +1 -1
- fabricks/context/spark_session.py +1 -1
- fabricks/context/utils.py +76 -0
- fabricks/core/dags/generator.py +6 -7
- fabricks/core/dags/log.py +2 -15
- fabricks/core/dags/processor.py +11 -11
- fabricks/core/dags/utils.py +15 -1
- fabricks/core/{scripts/job_schema.py → job_schema.py} +4 -0
- fabricks/core/jobs/base/_types.py +64 -22
- fabricks/core/jobs/base/checker.py +13 -12
- fabricks/core/jobs/base/configurator.py +41 -67
- fabricks/core/jobs/base/generator.py +55 -24
- fabricks/core/jobs/base/invoker.py +54 -30
- fabricks/core/jobs/base/processor.py +43 -26
- fabricks/core/jobs/bronze.py +45 -38
- fabricks/core/jobs/get_jobs.py +2 -2
- fabricks/core/jobs/get_schedule.py +10 -0
- fabricks/core/jobs/get_schedules.py +32 -0
- fabricks/core/jobs/gold.py +61 -48
- fabricks/core/jobs/silver.py +39 -40
- fabricks/core/masks.py +52 -0
- fabricks/core/parsers/base.py +2 -2
- fabricks/core/schedules/__init__.py +14 -0
- fabricks/core/schedules/diagrams.py +46 -0
- fabricks/core/schedules/get_schedule.py +5 -0
- fabricks/core/schedules/get_schedules.py +9 -0
- fabricks/core/schedules/run.py +3 -0
- fabricks/core/schedules/views.py +61 -0
- fabricks/core/steps/base.py +110 -72
- fabricks/core/udfs.py +12 -23
- fabricks/core/views.py +20 -13
- fabricks/deploy/__init__.py +97 -0
- fabricks/deploy/masks.py +8 -0
- fabricks/deploy/notebooks.py +71 -0
- fabricks/deploy/schedules.py +8 -0
- fabricks/{core/deploy → deploy}/tables.py +16 -13
- fabricks/{core/deploy → deploy}/udfs.py +3 -1
- fabricks/deploy/utils.py +36 -0
- fabricks/{core/deploy → deploy}/views.py +5 -9
- fabricks/metastore/database.py +3 -3
- fabricks/metastore/dbobject.py +4 -4
- fabricks/metastore/table.py +157 -88
- fabricks/metastore/view.py +13 -6
- fabricks/utils/_types.py +6 -0
- fabricks/utils/azure_table.py +4 -3
- fabricks/utils/helpers.py +141 -11
- fabricks/utils/log.py +29 -18
- fabricks/utils/read/_types.py +1 -1
- fabricks/utils/schema/get_schema_for_type.py +6 -0
- fabricks/utils/write/delta.py +3 -3
- {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/METADATA +2 -1
- fabricks-3.0.6.dist-info/RECORD +175 -0
- fabricks/api/notebooks/add_fabricks.py +0 -13
- fabricks/api/notebooks/optimize.py +0 -29
- fabricks/api/notebooks/vacuum.py +0 -29
- fabricks/cdc/templates/query/context.sql.jinja +0 -101
- fabricks/cdc/templates/query/current.sql.jinja +0 -32
- fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +0 -21
- fabricks/cdc/templates/query/deduplicate_key.sql.jinja +0 -14
- fabricks/cdc/templates/query/hash.sql.jinja +0 -1
- fabricks/cdc/templates/query/slice.sql.jinja +0 -14
- fabricks/config/__init__.py +0 -0
- fabricks/config/base.py +0 -8
- fabricks/config/fabricks/__init__.py +0 -26
- fabricks/config/fabricks/base.py +0 -90
- fabricks/config/fabricks/environment.py +0 -9
- fabricks/config/fabricks/pyproject.py +0 -47
- fabricks/config/jobs/__init__.py +0 -6
- fabricks/config/jobs/base.py +0 -101
- fabricks/config/jobs/bronze.py +0 -38
- fabricks/config/jobs/gold.py +0 -27
- fabricks/config/jobs/silver.py +0 -22
- fabricks/config/runtime.py +0 -67
- fabricks/config/steps/__init__.py +0 -6
- fabricks/config/steps/base.py +0 -50
- fabricks/config/steps/bronze.py +0 -7
- fabricks/config/steps/gold.py +0 -14
- fabricks/config/steps/silver.py +0 -15
- fabricks/core/deploy/__init__.py +0 -17
- fabricks/core/schedules.py +0 -142
- fabricks/core/scripts/__init__.py +0 -9
- fabricks/core/scripts/armageddon.py +0 -87
- fabricks/core/scripts/stats.py +0 -51
- fabricks/core/scripts/steps.py +0 -26
- fabricks-3.0.5.2.dist-info/RECORD +0 -177
- /fabricks/cdc/templates/{filter → filters}/final.sql.jinja +0 -0
- /fabricks/cdc/templates/{filter → filters}/latest.sql.jinja +0 -0
- /fabricks/cdc/templates/{filter → filters}/update.sql.jinja +0 -0
- /fabricks/cdc/templates/{merge → merges}/scd1.sql.jinja +0 -0
- /fabricks/cdc/templates/{merge → merges}/scd2.sql.jinja +0 -0
- /fabricks/cdc/templates/{query → queries}/__init__.py +0 -0
- /fabricks/cdc/templates/{query → queries}/final.sql.jinja +0 -0
- /fabricks/core/{utils.py → parsers/utils.py} +0 -0
- /fabricks/core/{scripts → schedules}/generate.py +0 -0
- /fabricks/core/{scripts → schedules}/process.py +0 -0
- /fabricks/core/{scripts → schedules}/terminate.py +0 -0
- {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/WHEEL +0 -0
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
# Databricks notebook source
|
|
2
|
-
# MAGIC %run ./add_fabricks
|
|
3
|
-
|
|
4
|
-
# COMMAND ----------
|
|
5
|
-
|
|
6
|
-
from databricks.sdk.runtime import dbutils
|
|
7
|
-
from pyspark.errors.exceptions.base import IllegalArgumentException
|
|
8
|
-
|
|
9
|
-
from fabricks.core.scripts import optimize
|
|
10
|
-
|
|
11
|
-
# COMMAND ----------
|
|
12
|
-
|
|
13
|
-
dbutils.widgets.text("schedule_id", "---")
|
|
14
|
-
|
|
15
|
-
# COMMAND ----------
|
|
16
|
-
|
|
17
|
-
try:
|
|
18
|
-
schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
|
|
19
|
-
except (TypeError, IllegalArgumentException, ValueError):
|
|
20
|
-
schedule_id = dbutils.widgets.get("schedule_id")
|
|
21
|
-
schedule_id = None if schedule_id == "---" else schedule_id
|
|
22
|
-
|
|
23
|
-
# COMMAND ----------
|
|
24
|
-
|
|
25
|
-
optimize(schedule_id=schedule_id)
|
|
26
|
-
|
|
27
|
-
# COMMAND ----------
|
|
28
|
-
|
|
29
|
-
dbutils.notebook.exit(value="exit (0)") # type: ignore
|
fabricks/api/notebooks/vacuum.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
# Databricks notebook source
|
|
2
|
-
# MAGIC %run ./add_fabricks
|
|
3
|
-
|
|
4
|
-
# COMMAND ----------
|
|
5
|
-
|
|
6
|
-
from databricks.sdk.runtime import dbutils
|
|
7
|
-
from pyspark.errors.exceptions.base import IllegalArgumentException
|
|
8
|
-
|
|
9
|
-
from fabricks.core.scripts import vacuum
|
|
10
|
-
|
|
11
|
-
# COMMAND ----------
|
|
12
|
-
|
|
13
|
-
dbutils.widgets.text("schedule_id", "---")
|
|
14
|
-
|
|
15
|
-
# COMMAND ----------
|
|
16
|
-
|
|
17
|
-
try:
|
|
18
|
-
schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
|
|
19
|
-
except (TypeError, IllegalArgumentException, ValueError):
|
|
20
|
-
schedule_id = dbutils.widgets.get("schedule_id")
|
|
21
|
-
schedule_id = None if schedule_id == "---" else schedule_id
|
|
22
|
-
|
|
23
|
-
# COMMAND ----------
|
|
24
|
-
|
|
25
|
-
vacuum(schedule_id=schedule_id)
|
|
26
|
-
|
|
27
|
-
# COMMAND ----------
|
|
28
|
-
|
|
29
|
-
dbutils.notebook.exit(value="exit (0)") # type: ignore
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
⛷️🧀🍫🏔️
|
|
3
|
-
|
|
4
|
-
👀🏁
|
|
5
|
-
{%- if format %}
|
|
6
|
-
☐ format: {{format}}{% endif %}
|
|
7
|
-
{%- if tgt %}
|
|
8
|
-
☐ tgt: {{tgt}}{% endif %}
|
|
9
|
-
{%- if cdc %}
|
|
10
|
-
☐ cdc: {{cdc}}{% endif %}
|
|
11
|
-
{%- if mode %}
|
|
12
|
-
☐ mode: {{mode}}{% endif %}
|
|
13
|
-
{%- if slice %}
|
|
14
|
-
🗹 slice: {{slice}}{% endif %}
|
|
15
|
-
{%- if slices %}
|
|
16
|
-
☐ slices: {{slices}}{% endif %}
|
|
17
|
-
{%- if rectify %}
|
|
18
|
-
🗹 rectify: {{rectify}}{% endif %}
|
|
19
|
-
{%- if deduplicate %}
|
|
20
|
-
🗹 deduplicate: {{deduplicate}}{% endif %}
|
|
21
|
-
{%- if deduplicate_key %}
|
|
22
|
-
🗹 deduplicate_key: {{deduplicate_key}}{% endif %}
|
|
23
|
-
{%- if deduplicate_hash %}
|
|
24
|
-
🗹 deduplicate_hash: {{deduplicate_hash}}{% endif %}
|
|
25
|
-
{%- if soft_delete %}
|
|
26
|
-
🗹 soft_delete: {{soft_delete}}{% endif %}
|
|
27
|
-
{%- if correct_valid_from %}
|
|
28
|
-
🗹 correct_valid_from: {{correct_valid_from}}{% endif %}
|
|
29
|
-
{%- if has_data %}
|
|
30
|
-
🗹 has_data: {{has_data}}{% else %}☒ has_data: {{has_data}}{% endif %}
|
|
31
|
-
{%- if has_rows %}
|
|
32
|
-
🗹 has_rows: {{has_rows}}{% endif %}
|
|
33
|
-
{%- if has_source %}
|
|
34
|
-
🗹 has_source: {{has_source}}{% endif %}
|
|
35
|
-
{%- if sources %}
|
|
36
|
-
🗹 sources: {{sources}}{% endif %}
|
|
37
|
-
{%- if has_metadata %}
|
|
38
|
-
🗹 has_metadata: {{has_metadata}}{% endif %}
|
|
39
|
-
{%- if has_timestamp %}
|
|
40
|
-
🗹 has_timestamp: {{has_timestamp}}{% endif %}
|
|
41
|
-
{%- if has_identity %}
|
|
42
|
-
🗹 has_identity: {{has_identity}}{% endif %}
|
|
43
|
-
{%- if has_key %}
|
|
44
|
-
🗹 has_key: {{has_key}}{% endif %}
|
|
45
|
-
{%- if has_hash %}
|
|
46
|
-
🗹 has_hash: {{has_hash}}{% endif %}
|
|
47
|
-
{%- if has_order_by %}
|
|
48
|
-
🗹 has_order_by: {{has_order_by}}{% endif %}
|
|
49
|
-
{%- if has_rescued_data %}
|
|
50
|
-
🗹 has_rescued_data: {{has_rescued_data}}{% endif %}
|
|
51
|
-
{%- if add_metadata %}
|
|
52
|
-
🗹 add_metadata: {{add_metadata}}{% endif %}
|
|
53
|
-
{%- if add_timestamp %}
|
|
54
|
-
🗹 add_timestamp: {{add_timestamp}}{% endif %}
|
|
55
|
-
{%- if add_key %}
|
|
56
|
-
🗹 add_key: {{add_key}}{% endif %}
|
|
57
|
-
{%- if add_hash %}
|
|
58
|
-
🗹 add_hash: {{add_hash}}{% endif %}
|
|
59
|
-
{%- if add_operation %}
|
|
60
|
-
☐ add_operation: {{add_operation}}{% endif %}
|
|
61
|
-
{%- if add_source %}
|
|
62
|
-
☐ add_source: {{add_source}}{% endif %}
|
|
63
|
-
{%- if add_calculated_columns %}
|
|
64
|
-
☐ add_calculated_columns: {{add_calculated_columns}}{% endif %}
|
|
65
|
-
{%- if order_duplicate_by %}
|
|
66
|
-
🗹 order_duplicate_by: {{order_duplicate_by}}{% endif %}
|
|
67
|
-
{%- if all_except %}
|
|
68
|
-
☐ all_except: {{all_except}}{% endif %}
|
|
69
|
-
{%- if all_overwrite %}
|
|
70
|
-
☐ all_overwrite: {{all_overwrite}}{% endif %}
|
|
71
|
-
{%- if filter_where %}
|
|
72
|
-
☐ filter_where: {{filter_where}}{% endif %}
|
|
73
|
-
{%- if update_where %}
|
|
74
|
-
☐ update_where: {{update_where}}{% endif %}
|
|
75
|
-
{%- if parent_slice %}
|
|
76
|
-
☐ parent_slice: {{parent_slice}}{% endif %}
|
|
77
|
-
{%- if parent_rectify %}
|
|
78
|
-
☐ parent_rectify: {{parent_rectify}}{% endif %}
|
|
79
|
-
{%- if parent_deduplicate_key %}
|
|
80
|
-
☐ parent_deduplicate_key: {{parent_deduplicate_key}}{% endif %}
|
|
81
|
-
{%- if parent_deduplicate_hash %}
|
|
82
|
-
☐ parent_deduplicate_hash: {{parent_deduplicate_hash}}{% endif %}
|
|
83
|
-
{%- if parent_cdc %}
|
|
84
|
-
☐ parent_cdc: {{parent_cdc}}{% endif %}
|
|
85
|
-
{%- if parent_final %}
|
|
86
|
-
☐ parent_final: {{parent_final}}{% endif %}
|
|
87
|
-
👀🏳️
|
|
88
|
-
|
|
89
|
-
👁️🏁
|
|
90
|
-
{%- if src %}
|
|
91
|
-
☐ src: {{src}}{% endif %}
|
|
92
|
-
{%- if fields %}
|
|
93
|
-
☐ fields: {{fields}}{% endif %}
|
|
94
|
-
{%- if keys %}
|
|
95
|
-
☐ keys: {{keys}}{% endif %}
|
|
96
|
-
{%- if hashes %}
|
|
97
|
-
☐ hashes: {{hashes}}{% endif %}
|
|
98
|
-
👁️🏳️
|
|
99
|
-
|
|
100
|
-
*/
|
|
101
|
-
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
{% import 'query/hash.sql.jinja' as h -%}
|
|
2
|
-
|
|
3
|
-
__current as (
|
|
4
|
-
select
|
|
5
|
-
{% for field in fields %} {{ field }}, {% endfor %}
|
|
6
|
-
{% if has_data %} 'current'
|
|
7
|
-
{% else %} 'delete'
|
|
8
|
-
{% endif %} as __operation,
|
|
9
|
-
{% if has_timestamp %}
|
|
10
|
-
{% if cdc == "nocdc" %} __timestamp as __timestamp, {% endif %}
|
|
11
|
-
{% if cdc == "scd1" %} __timestamp as __timestamp, {% endif %}
|
|
12
|
-
{% if cdc == "scd2" %} __valid_from as __timestamp, {% endif %}
|
|
13
|
-
{% else %} cast('0001-01-01' as timestamp) as __timestamp,
|
|
14
|
-
{% endif %}
|
|
15
|
-
{% if has_hash %} __hash,
|
|
16
|
-
{% else %} {{ h.hash(fields=hashes) }} as __hash,
|
|
17
|
-
{% endif %}
|
|
18
|
-
{% if has_identity %} __identity, {% endif %}
|
|
19
|
-
{% if has_key %} __key,
|
|
20
|
-
{% else %} {{ h.hash(fields=keys) }} as __key,
|
|
21
|
-
{% endif %}
|
|
22
|
-
{% if has_source %} __source, {% endif %}
|
|
23
|
-
{% if has_metadata %} __metadata, {% endif %}
|
|
24
|
-
{% if has_rescued_data %} __rescued_data, {% endif %}
|
|
25
|
-
from {{ tgt }} t
|
|
26
|
-
where
|
|
27
|
-
true
|
|
28
|
-
{% if cdc == "scd2" %} and __is_current {% endif %}
|
|
29
|
-
{% if cdc == "scd1" %} {% if soft_delete %} and __is_current {% endif %} {% endif %}
|
|
30
|
-
{% if sources %} and ({{ sources }}) {% endif %}
|
|
31
|
-
{% if update_where %} and {{ update_where }} {% endif %}
|
|
32
|
-
),
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
__deduplicate_hash as (
|
|
2
|
-
select
|
|
3
|
-
*,
|
|
4
|
-
lag(__hash) over (
|
|
5
|
-
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
|
|
6
|
-
) as __deduplicate_hash_previous__hash,
|
|
7
|
-
lag(__operation) over (
|
|
8
|
-
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
|
|
9
|
-
) as __deduplicate_hash_previous_operation
|
|
10
|
-
from {{ parent_deduplicate_hash }}
|
|
11
|
-
where true
|
|
12
|
-
),
|
|
13
|
-
__deduplicated_hash as (
|
|
14
|
-
select *
|
|
15
|
-
from __deduplicate_hash
|
|
16
|
-
where
|
|
17
|
-
true
|
|
18
|
-
and not (
|
|
19
|
-
__hash <=> __deduplicate_hash_previous__hash and __operation <=> __deduplicate_hash_previous_operation
|
|
20
|
-
)
|
|
21
|
-
),
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
__deduplicate_key as (
|
|
2
|
-
select
|
|
3
|
-
*,
|
|
4
|
-
row_number() over (
|
|
5
|
-
partition by {% if has_source %} __source, {% endif %} __key, __timestamp
|
|
6
|
-
order by
|
|
7
|
-
/* prioritize delete over upsert */
|
|
8
|
-
__operation asc,
|
|
9
|
-
{% if has_order_by %} {% for o in order_duplicate_by %} {{ o }}, {% endfor %} {% endif %}
|
|
10
|
-
) as __deduplicate_key_rn
|
|
11
|
-
from {{ parent_deduplicate_key }}
|
|
12
|
-
where true
|
|
13
|
-
),
|
|
14
|
-
__deduplicated_key as (select *, from __deduplicate_key where __deduplicate_key_rn == 1),
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{% macro hash(fields) -%} md5(array_join(array({% for f in fields %}{{ f }}, {% endfor %}), '*', '-1')) {%- endmacro %}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
__sliced as (
|
|
2
|
-
select
|
|
3
|
-
{% for field in fields %} {{ field }}, {% endfor %}
|
|
4
|
-
s.__operation,
|
|
5
|
-
s.__timestamp,
|
|
6
|
-
s.__hash,
|
|
7
|
-
s.__key,
|
|
8
|
-
{% if has_identity %} s.__identity, {% endif %}
|
|
9
|
-
{% if has_source %} s.__source, {% endif %}
|
|
10
|
-
{% if has_metadata %} s.__metadata, {% endif %}
|
|
11
|
-
{% if has_rescued_data %} __rescued_data, {% endif %}
|
|
12
|
-
from {{ parent_slice }} s
|
|
13
|
-
where true and ({{ slices }})
|
|
14
|
-
),
|
fabricks/config/__init__.py
DELETED
|
File without changes
|
fabricks/config/base.py
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from pydantic_settings import BaseSettings
|
|
4
|
-
|
|
5
|
-
from fabricks.config.fabricks.base import BaseConfig
|
|
6
|
-
from fabricks.config.fabricks.environment import EnvironmentConfig
|
|
7
|
-
from fabricks.config.fabricks.pyproject import PyprojectConfig
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class FabricksConfig(BaseConfig, BaseSettings):
|
|
11
|
-
@classmethod
|
|
12
|
-
def load(cls) -> FabricksConfig:
|
|
13
|
-
pyproject = PyprojectConfig.load()
|
|
14
|
-
environ = EnvironmentConfig() # type: ignore
|
|
15
|
-
|
|
16
|
-
data = {}
|
|
17
|
-
|
|
18
|
-
if pyproject:
|
|
19
|
-
dump = pyproject.model_dump(exclude_none=True)
|
|
20
|
-
data.update(dump)
|
|
21
|
-
|
|
22
|
-
# Override with environment settings
|
|
23
|
-
dump = environ.model_dump(exclude_none=True)
|
|
24
|
-
data.update(dump)
|
|
25
|
-
|
|
26
|
-
return cls(**data)
|
fabricks/config/fabricks/base.py
DELETED
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
from pydantic import Field, field_validator
|
|
7
|
-
|
|
8
|
-
from fabricks.config.base import ModelBase
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class BaseConfig(ModelBase):
|
|
12
|
-
root: Optional[Path] = Field(None, description="Root directory")
|
|
13
|
-
|
|
14
|
-
runtime: Optional[str] = Field(None, description="Runtime path")
|
|
15
|
-
notebooks: Optional[str] = Field(None, description="Notebooks path")
|
|
16
|
-
is_job_config_from_yaml: Optional[bool] = Field(None, description="Load job config from YAML")
|
|
17
|
-
is_debugmode: Optional[bool] = Field(None, description="Enable debug mode")
|
|
18
|
-
loglevel: Optional[str] = Field(None, description="Logging level")
|
|
19
|
-
config: Optional[str] = Field(None, description="Config file path")
|
|
20
|
-
|
|
21
|
-
@field_validator("runtime", "notebooks", "config", mode="before")
|
|
22
|
-
@classmethod
|
|
23
|
-
def handle_none(cls, v):
|
|
24
|
-
if isinstance(v, str) and v.lower() == "none":
|
|
25
|
-
return None
|
|
26
|
-
|
|
27
|
-
return v
|
|
28
|
-
|
|
29
|
-
@field_validator("is_job_config_from_yaml", "is_debugmode", mode="before")
|
|
30
|
-
@classmethod
|
|
31
|
-
def handle_bool(cls, v):
|
|
32
|
-
if v is None:
|
|
33
|
-
return None
|
|
34
|
-
|
|
35
|
-
if isinstance(v, str):
|
|
36
|
-
return v.lower() in ("true", "1", "yes")
|
|
37
|
-
|
|
38
|
-
return bool(v)
|
|
39
|
-
|
|
40
|
-
@field_validator("loglevel", mode="before")
|
|
41
|
-
@classmethod
|
|
42
|
-
def handle_loglevel(cls, v):
|
|
43
|
-
if v is None:
|
|
44
|
-
return None
|
|
45
|
-
|
|
46
|
-
if v.upper() not in {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}:
|
|
47
|
-
raise ValueError(f"{v.upper()} not allowed. Use DEBUG, INFO, WARNING, ERROR or CRITICAL")
|
|
48
|
-
|
|
49
|
-
return v.upper()
|
|
50
|
-
|
|
51
|
-
def resolve_runtime_path(self) -> Path:
|
|
52
|
-
runtime = self.runtime
|
|
53
|
-
|
|
54
|
-
# Use environment/explicit setting if available
|
|
55
|
-
if runtime is not None:
|
|
56
|
-
return Path(runtime)
|
|
57
|
-
|
|
58
|
-
# Fall back to pyproject.toml location
|
|
59
|
-
if self.root is not None:
|
|
60
|
-
return self.root
|
|
61
|
-
|
|
62
|
-
# Final fallback
|
|
63
|
-
raise ValueError("No pyproject.toml nor FABRICKS_RUNTIME")
|
|
64
|
-
|
|
65
|
-
def resolve_notebooks_path(self) -> Path:
|
|
66
|
-
notebooks = self.notebooks
|
|
67
|
-
runtime = self.resolve_runtime_path()
|
|
68
|
-
|
|
69
|
-
if notebooks is not None:
|
|
70
|
-
if self.root is not None and not Path(notebooks).is_absolute():
|
|
71
|
-
return self.root.joinpath(notebooks)
|
|
72
|
-
|
|
73
|
-
return Path(notebooks)
|
|
74
|
-
|
|
75
|
-
# Default to runtime/notebooks
|
|
76
|
-
return runtime.joinpath("notebooks")
|
|
77
|
-
|
|
78
|
-
def resolve_config_path(self, cluster_id: Optional[str] = None) -> Path:
|
|
79
|
-
config = self.config
|
|
80
|
-
runtime = self.resolve_runtime_path()
|
|
81
|
-
|
|
82
|
-
if config is not None:
|
|
83
|
-
if self.root is not None and not Path(config).is_absolute():
|
|
84
|
-
return self.root.joinpath(config)
|
|
85
|
-
|
|
86
|
-
return Path(config)
|
|
87
|
-
|
|
88
|
-
# default to fabricks/conf.yml
|
|
89
|
-
assert cluster_id is not None
|
|
90
|
-
return runtime.joinpath(f"fabricks/conf.{cluster_id}.yml")
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
|
-
|
|
5
|
-
from fabricks.config.fabricks.base import BaseConfig
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class EnvironmentConfig(BaseConfig, BaseSettings):
|
|
9
|
-
model_config = SettingsConfigDict(env_prefix="FABRICKS_", case_sensitive=False, extra="ignore")
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import tomllib # type: ignore
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Optional
|
|
7
|
-
|
|
8
|
-
from pydantic import Field
|
|
9
|
-
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
10
|
-
|
|
11
|
-
from fabricks.config.fabricks.base import BaseConfig
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class PyprojectConfig(BaseConfig, BaseSettings):
|
|
15
|
-
model_config = SettingsConfigDict(
|
|
16
|
-
env_prefix="",
|
|
17
|
-
case_sensitive=False,
|
|
18
|
-
extra="ignore",
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
is_job_config_from_yaml: Optional[bool] = Field(None, alias="job_config_from_yaml")
|
|
22
|
-
is_debugmode: Optional[bool] = Field(None, alias="debugmode")
|
|
23
|
-
|
|
24
|
-
@classmethod
|
|
25
|
-
def _from_pyproject(cls, root: Path) -> PyprojectConfig:
|
|
26
|
-
path = root / "pyproject.toml"
|
|
27
|
-
if not path.exists():
|
|
28
|
-
return cls() # type: ignore
|
|
29
|
-
|
|
30
|
-
with open(path, "rb") as f:
|
|
31
|
-
pyproject = tomllib.load(f)
|
|
32
|
-
config = pyproject.get("tool", {}).get("fabricks", {})
|
|
33
|
-
|
|
34
|
-
return cls(**config, root=root)
|
|
35
|
-
|
|
36
|
-
@classmethod
|
|
37
|
-
def load(cls) -> PyprojectConfig:
|
|
38
|
-
path = Path(os.getcwd())
|
|
39
|
-
|
|
40
|
-
while path is not None:
|
|
41
|
-
if (path / "pyproject.toml").exists():
|
|
42
|
-
break
|
|
43
|
-
if path == path.parent:
|
|
44
|
-
break
|
|
45
|
-
path = path.parent
|
|
46
|
-
|
|
47
|
-
return cls._from_pyproject(path)
|
fabricks/config/jobs/__init__.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
from fabricks.config.jobs.base import BaseJobConfig
|
|
2
|
-
from fabricks.config.jobs.bronze import BronzeJobConfig
|
|
3
|
-
from fabricks.config.jobs.gold import GoldJobConfig
|
|
4
|
-
from fabricks.config.jobs.silver import SilverJobConfig
|
|
5
|
-
|
|
6
|
-
__all__ = ["BaseJobConfig", "BronzeJobConfig", "SilverJobConfig", "GoldJobConfig"]
|
fabricks/config/jobs/base.py
DELETED
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
from typing import List, Literal, Optional
|
|
2
|
-
|
|
3
|
-
from fabricks.config.base import ModelBase
|
|
4
|
-
|
|
5
|
-
FileFormats = Literal["json_array", "json", "jsonl", "csv", "parquet", "delta"]
|
|
6
|
-
Operations = Literal["upsert", "reload", "delete"]
|
|
7
|
-
Types = Literal["manual", "default"]
|
|
8
|
-
Origins = Literal["parser", "job"]
|
|
9
|
-
ChangeDataCaptures = Literal["none", "nocdc", "scd1", "scd2"]
|
|
10
|
-
Modes = Literal[
|
|
11
|
-
"memory",
|
|
12
|
-
"append",
|
|
13
|
-
"complete",
|
|
14
|
-
"update",
|
|
15
|
-
"invoke",
|
|
16
|
-
"memory",
|
|
17
|
-
"append",
|
|
18
|
-
"register",
|
|
19
|
-
"memory",
|
|
20
|
-
"append",
|
|
21
|
-
"latest",
|
|
22
|
-
"update",
|
|
23
|
-
"combine",
|
|
24
|
-
]
|
|
25
|
-
Steps = Literal["bronze", "silver", "gold"]
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class SparkOptions(ModelBase):
|
|
29
|
-
sql: Optional[dict[str, str]] = None
|
|
30
|
-
conf: Optional[dict[str, str]] = None
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class TableOptions(ModelBase):
|
|
34
|
-
identity: Optional[bool] = None
|
|
35
|
-
liquid_clustering: Optional[bool] = None
|
|
36
|
-
partition_by: Optional[List[str]] = None
|
|
37
|
-
zorder_by: Optional[List[str]] = None
|
|
38
|
-
cluster_by: Optional[List[str]] = None
|
|
39
|
-
powerbi: Optional[bool] = None
|
|
40
|
-
bloomfilter_by: Optional[List[str]] = None
|
|
41
|
-
constraints: Optional[dict[str, str]] = None
|
|
42
|
-
properties: Optional[dict[str, str]] = None
|
|
43
|
-
comment: Optional[str] = None
|
|
44
|
-
calculated_columns: Optional[dict[str, str]] = None
|
|
45
|
-
retention_days: Optional[int] = None
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class InvokeOptions(ModelBase):
|
|
49
|
-
notebook: str
|
|
50
|
-
timeout: int
|
|
51
|
-
arguments: Optional[dict[str, str]] = None
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class InvokerOptions(ModelBase):
|
|
55
|
-
pre_run: Optional[List[InvokeOptions]] = None
|
|
56
|
-
run: Optional[List[InvokeOptions]] = None
|
|
57
|
-
post_run: Optional[List[InvokeOptions]] = None
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class ExtenderOptions(ModelBase):
|
|
61
|
-
extender: str
|
|
62
|
-
arguments: Optional[dict[str, str]] = None
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class CheckOptions(ModelBase):
|
|
66
|
-
skip: Optional[bool] = None
|
|
67
|
-
pre_run: Optional[bool] = None
|
|
68
|
-
post_run: Optional[bool] = None
|
|
69
|
-
min_rows: Optional[int] = None
|
|
70
|
-
max_rows: Optional[int] = None
|
|
71
|
-
count_must_equal: Optional[str] = None
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class DefaultOptions(ModelBase):
|
|
75
|
-
type: Optional[Types] = None
|
|
76
|
-
mode: Modes
|
|
77
|
-
change_data_capture: Optional[ChangeDataCaptures]
|
|
78
|
-
# extra
|
|
79
|
-
parents: Optional[List[str]] = None
|
|
80
|
-
filter_where: Optional[str] = None
|
|
81
|
-
timeout: Optional[int] = None
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class BaseJobConfig(ModelBase):
|
|
85
|
-
job_id: str
|
|
86
|
-
|
|
87
|
-
extend: Steps
|
|
88
|
-
step: Steps
|
|
89
|
-
|
|
90
|
-
topic: str
|
|
91
|
-
item: str
|
|
92
|
-
|
|
93
|
-
options: Optional[DefaultOptions] = None
|
|
94
|
-
table_options: Optional[TableOptions] = None
|
|
95
|
-
check_options: Optional[CheckOptions] = None
|
|
96
|
-
spark_options: Optional[SparkOptions] = None
|
|
97
|
-
invoker_options: Optional[InvokerOptions] = None
|
|
98
|
-
extender_options: Optional[List[ExtenderOptions]] = None
|
|
99
|
-
|
|
100
|
-
tags: Optional[List[str]] = None
|
|
101
|
-
comment: Optional[str] = None
|
fabricks/config/jobs/bronze.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
from typing import List, Literal, Optional
|
|
2
|
-
|
|
3
|
-
from fabricks.config.base import ModelBase
|
|
4
|
-
from fabricks.config.jobs.base import BaseJobConfig, ChangeDataCaptures, DefaultOptions, Operations
|
|
5
|
-
|
|
6
|
-
BronzeModes = Literal["memory", "append", "register"]
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class ParserOptions(ModelBase):
|
|
10
|
-
file_format: Optional[str]
|
|
11
|
-
read_options: Optional[dict[str, str]]
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class BronzeOptions(DefaultOptions):
|
|
15
|
-
# default
|
|
16
|
-
mode: BronzeModes
|
|
17
|
-
change_data_capture: ChangeDataCaptures = "none"
|
|
18
|
-
|
|
19
|
-
# mandatory
|
|
20
|
-
uri: str
|
|
21
|
-
parser: str
|
|
22
|
-
source: str
|
|
23
|
-
|
|
24
|
-
# preferred
|
|
25
|
-
keys: Optional[List[str]] = None
|
|
26
|
-
|
|
27
|
-
# optional
|
|
28
|
-
encrypted_columns: Optional[List[str]] = None
|
|
29
|
-
calculated_columns: Optional[dict[str, str]] = None
|
|
30
|
-
operation: Optional[Operations] = None
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class BronzeJobConfig(BaseJobConfig):
|
|
34
|
-
extend: Literal["bronze"] = "bronze"
|
|
35
|
-
step: Literal["bronze"]
|
|
36
|
-
|
|
37
|
-
options: Optional[BronzeOptions] = None
|
|
38
|
-
parser_options: Optional[ParserOptions] = None
|
fabricks/config/jobs/gold.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from typing import Literal, Optional
|
|
2
|
-
|
|
3
|
-
from fabricks.config.jobs.base import BaseJobConfig, DefaultOptions
|
|
4
|
-
|
|
5
|
-
GoldModes = Literal["memory", "append", "complete", "update", "invoke"]
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class GoldOptions(DefaultOptions):
|
|
9
|
-
# default
|
|
10
|
-
mode: GoldModes
|
|
11
|
-
|
|
12
|
-
# optional
|
|
13
|
-
update_where: Optional[str] = None
|
|
14
|
-
deduplicate: Optional[bool] = None # remove duplicates on the keys and on the hash
|
|
15
|
-
rectify_as_upserts: Optional[bool] = None # convert reloads into upserts and deletes
|
|
16
|
-
correct_valid_from: Optional[bool] = None
|
|
17
|
-
persist_last_timestamp: Optional[bool] = None
|
|
18
|
-
table: Optional[str] = None
|
|
19
|
-
notebook: Optional[bool] = None
|
|
20
|
-
requirements: Optional[bool] = None
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class GoldJobConfig(BaseJobConfig):
|
|
24
|
-
extend: Literal["gold"] = "gold"
|
|
25
|
-
step: Literal["gold"]
|
|
26
|
-
|
|
27
|
-
options: Optional[GoldOptions] = None
|
fabricks/config/jobs/silver.py
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
from typing import Literal, Optional
|
|
2
|
-
|
|
3
|
-
from fabricks.config.jobs.base import BaseJobConfig, DefaultOptions
|
|
4
|
-
|
|
5
|
-
SilverModes = Literal["memory", "append", "latest", "update", "combine"]
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class SilverOptions(DefaultOptions):
|
|
9
|
-
# default
|
|
10
|
-
mode: SilverModes
|
|
11
|
-
|
|
12
|
-
# optional
|
|
13
|
-
deduplicate: Optional[bool] = None
|
|
14
|
-
stream: Optional[bool] = None
|
|
15
|
-
order_duplicate_by: Optional[dict[str, str]] = None
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class SilverJobConfig(BaseJobConfig):
|
|
19
|
-
extend: Literal["silver"] = "silver"
|
|
20
|
-
step: Literal["silver"]
|
|
21
|
-
|
|
22
|
-
options: Optional[SilverOptions] = None
|