fabricks 3.0.5.2__py3-none-any.whl → 3.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/api/__init__.py +2 -0
- fabricks/api/context.py +1 -2
- fabricks/api/deploy.py +3 -0
- fabricks/api/job_schema.py +2 -2
- fabricks/api/masks.py +3 -0
- fabricks/api/notebooks/initialize.py +2 -2
- fabricks/api/notebooks/process.py +2 -2
- fabricks/api/notebooks/run.py +2 -2
- fabricks/api/notebooks/schedule.py +75 -0
- fabricks/api/notebooks/terminate.py +2 -2
- fabricks/api/schedules.py +2 -16
- fabricks/cdc/__init__.py +2 -2
- fabricks/cdc/base/__init__.py +2 -2
- fabricks/cdc/base/_types.py +9 -2
- fabricks/cdc/base/configurator.py +86 -41
- fabricks/cdc/base/generator.py +44 -35
- fabricks/cdc/base/merger.py +16 -14
- fabricks/cdc/base/processor.py +232 -144
- fabricks/cdc/nocdc.py +8 -7
- fabricks/cdc/templates/{query → ctes}/base.sql.jinja +7 -6
- fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
- fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
- fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
- fabricks/cdc/templates/{query → ctes}/rectify.sql.jinja +4 -22
- fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
- fabricks/cdc/templates/filter.sql.jinja +4 -4
- fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
- fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
- fabricks/cdc/templates/merge.sql.jinja +3 -2
- fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
- fabricks/cdc/templates/queries/context.sql.jinja +186 -0
- fabricks/cdc/templates/{query/nocdc.sql.jinja → queries/nocdc/complete.sql.jinja} +1 -1
- fabricks/cdc/templates/queries/nocdc/update.sql.jinja +35 -0
- fabricks/cdc/templates/{query → queries}/scd1.sql.jinja +2 -28
- fabricks/cdc/templates/{query → queries}/scd2.sql.jinja +29 -48
- fabricks/cdc/templates/query.sql.jinja +15 -11
- fabricks/context/__init__.py +18 -4
- fabricks/context/_types.py +2 -0
- fabricks/context/config/__init__.py +92 -0
- fabricks/context/config/utils.py +53 -0
- fabricks/context/log.py +8 -2
- fabricks/context/runtime.py +87 -263
- fabricks/context/secret.py +1 -1
- fabricks/context/spark_session.py +1 -1
- fabricks/context/utils.py +76 -0
- fabricks/core/dags/generator.py +6 -7
- fabricks/core/dags/log.py +2 -15
- fabricks/core/dags/processor.py +11 -11
- fabricks/core/dags/utils.py +15 -1
- fabricks/core/{scripts/job_schema.py → job_schema.py} +4 -0
- fabricks/core/jobs/base/_types.py +64 -22
- fabricks/core/jobs/base/checker.py +13 -12
- fabricks/core/jobs/base/configurator.py +41 -67
- fabricks/core/jobs/base/generator.py +55 -24
- fabricks/core/jobs/base/invoker.py +54 -30
- fabricks/core/jobs/base/processor.py +43 -26
- fabricks/core/jobs/bronze.py +45 -38
- fabricks/core/jobs/get_jobs.py +2 -2
- fabricks/core/jobs/get_schedule.py +10 -0
- fabricks/core/jobs/get_schedules.py +32 -0
- fabricks/core/jobs/gold.py +61 -48
- fabricks/core/jobs/silver.py +39 -40
- fabricks/core/masks.py +52 -0
- fabricks/core/parsers/base.py +2 -2
- fabricks/core/schedules/__init__.py +14 -0
- fabricks/core/schedules/diagrams.py +46 -0
- fabricks/core/schedules/get_schedule.py +5 -0
- fabricks/core/schedules/get_schedules.py +9 -0
- fabricks/core/schedules/run.py +3 -0
- fabricks/core/schedules/views.py +61 -0
- fabricks/core/steps/base.py +110 -72
- fabricks/core/udfs.py +12 -23
- fabricks/core/views.py +20 -13
- fabricks/deploy/__init__.py +97 -0
- fabricks/deploy/masks.py +8 -0
- fabricks/deploy/notebooks.py +71 -0
- fabricks/deploy/schedules.py +8 -0
- fabricks/{core/deploy → deploy}/tables.py +16 -13
- fabricks/{core/deploy → deploy}/udfs.py +3 -1
- fabricks/deploy/utils.py +36 -0
- fabricks/{core/deploy → deploy}/views.py +5 -9
- fabricks/metastore/database.py +3 -3
- fabricks/metastore/dbobject.py +4 -4
- fabricks/metastore/table.py +157 -88
- fabricks/metastore/view.py +13 -6
- fabricks/utils/_types.py +6 -0
- fabricks/utils/azure_table.py +4 -3
- fabricks/utils/helpers.py +141 -11
- fabricks/utils/log.py +29 -18
- fabricks/utils/read/_types.py +1 -1
- fabricks/utils/schema/get_schema_for_type.py +6 -0
- fabricks/utils/write/delta.py +3 -3
- {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/METADATA +2 -1
- fabricks-3.0.6.dist-info/RECORD +175 -0
- fabricks/api/notebooks/add_fabricks.py +0 -13
- fabricks/api/notebooks/optimize.py +0 -29
- fabricks/api/notebooks/vacuum.py +0 -29
- fabricks/cdc/templates/query/context.sql.jinja +0 -101
- fabricks/cdc/templates/query/current.sql.jinja +0 -32
- fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +0 -21
- fabricks/cdc/templates/query/deduplicate_key.sql.jinja +0 -14
- fabricks/cdc/templates/query/hash.sql.jinja +0 -1
- fabricks/cdc/templates/query/slice.sql.jinja +0 -14
- fabricks/config/__init__.py +0 -0
- fabricks/config/base.py +0 -8
- fabricks/config/fabricks/__init__.py +0 -26
- fabricks/config/fabricks/base.py +0 -90
- fabricks/config/fabricks/environment.py +0 -9
- fabricks/config/fabricks/pyproject.py +0 -47
- fabricks/config/jobs/__init__.py +0 -6
- fabricks/config/jobs/base.py +0 -101
- fabricks/config/jobs/bronze.py +0 -38
- fabricks/config/jobs/gold.py +0 -27
- fabricks/config/jobs/silver.py +0 -22
- fabricks/config/runtime.py +0 -67
- fabricks/config/steps/__init__.py +0 -6
- fabricks/config/steps/base.py +0 -50
- fabricks/config/steps/bronze.py +0 -7
- fabricks/config/steps/gold.py +0 -14
- fabricks/config/steps/silver.py +0 -15
- fabricks/core/deploy/__init__.py +0 -17
- fabricks/core/schedules.py +0 -142
- fabricks/core/scripts/__init__.py +0 -9
- fabricks/core/scripts/armageddon.py +0 -87
- fabricks/core/scripts/stats.py +0 -51
- fabricks/core/scripts/steps.py +0 -26
- fabricks-3.0.5.2.dist-info/RECORD +0 -177
- /fabricks/cdc/templates/{filter → filters}/final.sql.jinja +0 -0
- /fabricks/cdc/templates/{filter → filters}/latest.sql.jinja +0 -0
- /fabricks/cdc/templates/{filter → filters}/update.sql.jinja +0 -0
- /fabricks/cdc/templates/{merge → merges}/scd1.sql.jinja +0 -0
- /fabricks/cdc/templates/{merge → merges}/scd2.sql.jinja +0 -0
- /fabricks/cdc/templates/{query → queries}/__init__.py +0 -0
- /fabricks/cdc/templates/{query → queries}/final.sql.jinja +0 -0
- /fabricks/core/{utils.py → parsers/utils.py} +0 -0
- /fabricks/core/{scripts → schedules}/generate.py +0 -0
- /fabricks/core/{scripts → schedules}/process.py +0 -0
- /fabricks/core/{scripts → schedules}/terminate.py +0 -0
- {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/WHEEL +0 -0
|
@@ -1,19 +1,20 @@
|
|
|
1
|
-
{% import '
|
|
1
|
+
{% import 'macros/hash.sql.jinja' as h -%}
|
|
2
2
|
|
|
3
3
|
with
|
|
4
4
|
{% if format == "query" %} __query as ({{ src }}), {% endif %}
|
|
5
5
|
__base as (
|
|
6
6
|
select
|
|
7
7
|
*
|
|
8
|
-
{% if
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
{% if overwrite %}
|
|
9
|
+
-- will be overwritten below
|
|
10
|
+
except ({% for o in overwrite %}{{ o }}, {% endfor %})
|
|
11
|
+
{% endif %},
|
|
11
12
|
{% if add_calculated_columns %} {% for c in add_calculated_columns %} {{ c }}, {% endfor %} {% endif %}
|
|
12
13
|
{% if add_timestamp %} cast(current_date() as timestamp) as __timestamp, {% endif %}
|
|
13
14
|
{% if add_operation %} cast('{{ add_operation }}' as string) as __operation, {% endif %}
|
|
14
15
|
{% if add_source %} cast('{{ add_source }}' as string) as __source, {% endif %}
|
|
15
|
-
{% if add_hash %} {{ h.
|
|
16
|
-
{% if add_key %} {{ h.
|
|
16
|
+
{% if add_hash %} {{ h.add_hash(fields=hashes) }} as __hash, {% endif %}
|
|
17
|
+
{% if add_key %} {{ h.add_hash(fields=keys) }} as __key, {% endif %}
|
|
17
18
|
{% if add_metadata %}
|
|
18
19
|
struct(
|
|
19
20
|
{% if cdc == "nocdc" %}current_timestamp() as inserted,
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{% import 'macros/hash.sql.jinja' as h -%}
|
|
2
|
+
|
|
3
|
+
__current as (
|
|
4
|
+
select
|
|
5
|
+
{% for i in intermediates %}
|
|
6
|
+
{% if i == "__timestamp" %}
|
|
7
|
+
{% if add_timestamp %} cast('0001-01-01' as timestamp) as __timestamp,
|
|
8
|
+
{% elif cdc == "nocdc" %} __timestamp,
|
|
9
|
+
{% elif cdc == "scd1" %} __timestamp,
|
|
10
|
+
{% elif cdc == "scd2" %} __valid_from as __timestamp,
|
|
11
|
+
{% endif %}
|
|
12
|
+
{% elif i == "__operation" %}
|
|
13
|
+
{% if has_no_data %} 'delete' as __operation, {% else %} 'current' as __operation, {% endif %}
|
|
14
|
+
{% elif i == "__hash" %}
|
|
15
|
+
{% if add_hash %} {{ h.add_hash(fields=hashes) }} as __hash, {% else %} __hash, {% endif %}
|
|
16
|
+
{% elif i == "__key" %}
|
|
17
|
+
{% if add_key %} {{ h.add_key(fields=keys) }} as __key, {% else %} __key, {% endif %}
|
|
18
|
+
{% else %} `{{ i }}`,
|
|
19
|
+
{% endif %}
|
|
20
|
+
{% endfor %}
|
|
21
|
+
from {{ tgt }} t
|
|
22
|
+
where
|
|
23
|
+
true
|
|
24
|
+
{% if cdc == "scd2" %} and __is_current {% endif %}
|
|
25
|
+
{% if cdc == "scd1" %} {% if soft_delete %} and __is_current {% endif %} {% endif %}
|
|
26
|
+
{% if sources %} and ({{ sources }}) {% endif %}
|
|
27
|
+
{% if update_where %} and {{ update_where }} {% endif %}
|
|
28
|
+
),
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{% if advanced_deduplication %}
|
|
2
|
+
__deduplicate_hash as (
|
|
3
|
+
select
|
|
4
|
+
*,
|
|
5
|
+
lag(__hash) over (
|
|
6
|
+
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
|
|
7
|
+
) as __deduplicate_hash_previous__hash,
|
|
8
|
+
lag(__operation) over (
|
|
9
|
+
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
|
|
10
|
+
) as __deduplicate_hash_previous_operation
|
|
11
|
+
from {{ parent_deduplicate_hash }}
|
|
12
|
+
where true
|
|
13
|
+
),
|
|
14
|
+
__deduplicated_hash as (
|
|
15
|
+
select *
|
|
16
|
+
from __deduplicate_hash
|
|
17
|
+
where
|
|
18
|
+
true
|
|
19
|
+
and not (
|
|
20
|
+
__hash <=> __deduplicate_hash_previous__hash and __operation <=> __deduplicate_hash_previous_operation
|
|
21
|
+
)
|
|
22
|
+
),
|
|
23
|
+
{% else %}
|
|
24
|
+
__deduplicated_hash as (
|
|
25
|
+
select *
|
|
26
|
+
from {{ parent_deduplicate_hash }}
|
|
27
|
+
where true
|
|
28
|
+
qualify
|
|
29
|
+
not lag(__hash) over (partition by {% if has_source %} __source, {% endif %} __key order by null)
|
|
30
|
+
<=> __hash
|
|
31
|
+
),
|
|
32
|
+
{% endif %}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{% if advanced_deduplication %}
|
|
2
|
+
__deduplicate_key as (
|
|
3
|
+
select
|
|
4
|
+
*,
|
|
5
|
+
row_number() over (
|
|
6
|
+
partition by {% if has_source %} __source, {% endif %} __key, __timestamp
|
|
7
|
+
order by
|
|
8
|
+
/* prioritize delete over upsert */
|
|
9
|
+
__operation asc,
|
|
10
|
+
{% if has_order_by %} {% for o in order_duplicate_by %} {{ o }}, {% endfor %} {% endif %}
|
|
11
|
+
) as __deduplicate_key_rn
|
|
12
|
+
from {{ parent_deduplicate_key }}
|
|
13
|
+
where true
|
|
14
|
+
),
|
|
15
|
+
__deduplicated_key as (select *, from __deduplicate_key where __deduplicate_key_rn == 1),
|
|
16
|
+
{% else %}
|
|
17
|
+
__deduplicated_key as (
|
|
18
|
+
select *
|
|
19
|
+
from {{ parent_deduplicate_key }}
|
|
20
|
+
where true
|
|
21
|
+
qualify
|
|
22
|
+
row_number() over (
|
|
23
|
+
partition by {% if has_source %} __source, {% endif %} __key
|
|
24
|
+
order by
|
|
25
|
+
{% if has_order_by %} {% for o in order_duplicate_by %} {{ o }}, {% endfor %}
|
|
26
|
+
{% else %} null
|
|
27
|
+
{% endif %}
|
|
28
|
+
)
|
|
29
|
+
== 1
|
|
30
|
+
),
|
|
31
|
+
{% endif %}
|
|
@@ -1,34 +1,16 @@
|
|
|
1
|
-
{% import 'query/hash.sql.jinja' as h -%}
|
|
2
|
-
|
|
3
1
|
{% if mode == "update" %}
|
|
4
2
|
__rectified_base as (
|
|
5
3
|
select
|
|
6
|
-
{% for
|
|
7
|
-
__operation,
|
|
8
|
-
__operation as __original_operation,
|
|
9
|
-
__timestamp,
|
|
4
|
+
{% for i in intermediates %} `{{ i }}`, {% endfor %}
|
|
10
5
|
__timestamp as __original_timestamp,
|
|
11
|
-
|
|
12
|
-
__key,
|
|
13
|
-
{% if has_identity %} __identity, {% endif %}
|
|
14
|
-
{% if has_source %} __source, {% endif %}
|
|
15
|
-
{% if has_metadata %} __metadata, {% endif %}
|
|
16
|
-
{% if has_rescued_data %} __rescued_data, {% endif %}
|
|
6
|
+
__operation as __original_operation,
|
|
17
7
|
from {{ parent_rectify }}
|
|
18
8
|
{% if has_rows %}
|
|
19
9
|
union all
|
|
20
10
|
select
|
|
21
|
-
{% for
|
|
22
|
-
__operation,
|
|
23
|
-
__operation as __original_operation,
|
|
24
|
-
__timestamp,
|
|
11
|
+
{% for i in intermediates %} `{{ i }}`, {% endfor %}
|
|
25
12
|
__timestamp as __original_timestamp,
|
|
26
|
-
|
|
27
|
-
__key,
|
|
28
|
-
{% if has_identity %} __identity, {% endif %}
|
|
29
|
-
{% if has_source %} __source, {% endif %}
|
|
30
|
-
{% if has_metadata %} __metadata, {% endif %}
|
|
31
|
-
{% if has_rescued_data %} __rescued_data, {% endif %}
|
|
13
|
+
__operation as __original_operation,
|
|
32
14
|
from __current
|
|
33
15
|
{% endif %}
|
|
34
16
|
),
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__sliced as (select * from {{ parent_slice }} s where true and ({{ slices }})),
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
{% include '
|
|
2
|
-
{% if slice == "update" %} {% include '
|
|
3
|
-
{% if slice == "latest" %} {% include '
|
|
4
|
-
{% include '
|
|
1
|
+
{% include 'ctes/base.sql.jinja' %}
|
|
2
|
+
{% if slice == "update" %} {% include 'filters/update.sql.jinja' %} {% endif %}
|
|
3
|
+
{% if slice == "latest" %} {% include 'filters/latest.sql.jinja' %} {% endif %}
|
|
4
|
+
{% include 'filters/final.sql.jinja' %}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{% macro backtick(field) -%} `{{ field }}` {%- endmacro %}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{% macro add_hash(fields) -%}
|
|
2
|
+
md5(
|
|
3
|
+
array_join(
|
|
4
|
+
array(
|
|
5
|
+
{% for f in fields %}
|
|
6
|
+
{% if f == "__operation" %}cast(`{{ f }}` <=> 'delete' as string) -- reloads and upserts should have the same hash, not deletes,
|
|
7
|
+
{% else %}`{{ f }}`::string,
|
|
8
|
+
{% endif %}
|
|
9
|
+
{% endfor %}
|
|
10
|
+
),
|
|
11
|
+
'*',
|
|
12
|
+
'-1'
|
|
13
|
+
)
|
|
14
|
+
)
|
|
15
|
+
{%- endmacro %}
|
|
16
|
+
{% macro add_key(fields) -%}
|
|
17
|
+
md5(array_join(array({% for f in fields %}`{{ f }}`::string, {% endfor %}), '*', '-1'))
|
|
18
|
+
{%- endmacro %}
|
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
{% if cdc == "scd1" %} {% include '
|
|
2
|
-
{% if cdc == "scd2" %} {% include '
|
|
1
|
+
{% if cdc == "scd1" %} {% include 'merges/scd1.sql.jinja' %} {% endif %}
|
|
2
|
+
{% if cdc == "scd2" %} {% include 'merges/scd2.sql.jinja' %} {% endif %}
|
|
3
|
+
{% if cdc == "nocdc" %} {% include 'merges/nocdc.sql.jinja' %} {% endif %}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{% if format == "dataframe" %}
|
|
2
|
+
merge into {{ tgt }} t using {{ "{src}" }} s
|
|
3
|
+
{% endif %}
|
|
4
|
+
{% if format == "view" %}
|
|
5
|
+
merge into {{ tgt }} t using {{ src }} s
|
|
6
|
+
{% endif %}
|
|
7
|
+
{% if has_key %}
|
|
8
|
+
on t.__key == s.__merge_key
|
|
9
|
+
{% else %}
|
|
10
|
+
on
|
|
11
|
+
{% for k in keys %}
|
|
12
|
+
t.{{ k }} <=> s.{{ k }}
|
|
13
|
+
{% endfor %}
|
|
14
|
+
{% endif %}
|
|
15
|
+
{% if has_source %}
|
|
16
|
+
and t.__source == s.__source
|
|
17
|
+
{% endif %}
|
|
18
|
+
{% if update_where %} {{ update_where }} {% endif %}
|
|
19
|
+
when matched
|
|
20
|
+
and __merge_condition == 'upsert' then
|
|
21
|
+
update
|
|
22
|
+
set
|
|
23
|
+
{% for c in columns %}
|
|
24
|
+
{{ c }} = s.{{c}},
|
|
25
|
+
{% endfor %}
|
|
26
|
+
-- delete
|
|
27
|
+
when matched
|
|
28
|
+
and __merge_condition == 'delete' then
|
|
29
|
+
delete
|
|
30
|
+
when not matched
|
|
31
|
+
and __merge_condition == 'upsert' then
|
|
32
|
+
insert (
|
|
33
|
+
{% for c in columns %}
|
|
34
|
+
{{ c }},
|
|
35
|
+
{% endfor %}
|
|
36
|
+
)
|
|
37
|
+
values (
|
|
38
|
+
{% for c in columns %}
|
|
39
|
+
s.{{ c }},
|
|
40
|
+
{% endfor %}
|
|
41
|
+
)
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/*
|
|
2
|
+
|
|
3
|
+
⚙️ BASE
|
|
4
|
+
{%- if cdc %}
|
|
5
|
+
☐ cdc: {{ cdc }}
|
|
6
|
+
{%- endif %}
|
|
7
|
+
{%- if mode %}
|
|
8
|
+
☐ mode: {{ mode }}
|
|
9
|
+
{%- endif %}
|
|
10
|
+
|
|
11
|
+
🎯 SOURCE & TARTGET
|
|
12
|
+
{%- if format %}
|
|
13
|
+
☐ format: {{ format }}
|
|
14
|
+
{%- endif %}
|
|
15
|
+
{%- if src %}
|
|
16
|
+
☐ src: {{ src | truncate(100, killwords=True) }}
|
|
17
|
+
{%- endif %}
|
|
18
|
+
{%- if tgt %}
|
|
19
|
+
☐ tgt: {{ tgt }}
|
|
20
|
+
{%- endif %}
|
|
21
|
+
|
|
22
|
+
📊 CTE's
|
|
23
|
+
{%- if slice %}
|
|
24
|
+
🗹 slice?
|
|
25
|
+
{%- endif %}
|
|
26
|
+
{%- if deduplicate %}
|
|
27
|
+
🗹 deduplicate?
|
|
28
|
+
{%- endif %}
|
|
29
|
+
{%- if advanced_deduplication %}
|
|
30
|
+
🗹 advanced deduplication?
|
|
31
|
+
{%- endif %}
|
|
32
|
+
{%- if deduplicate_key %}
|
|
33
|
+
🗹 deduplicate key?
|
|
34
|
+
{%- endif %}
|
|
35
|
+
{%- if deduplicate_hash %}
|
|
36
|
+
🗹 deduplicate hash?
|
|
37
|
+
{%- endif %}
|
|
38
|
+
{%- if order_duplicate_by %}
|
|
39
|
+
🗹 order duplicate by?
|
|
40
|
+
{%- endif %}
|
|
41
|
+
{%- if rectify %}
|
|
42
|
+
🗹 rectify?
|
|
43
|
+
{%- endif %}
|
|
44
|
+
{%- if correct_valid_from %}
|
|
45
|
+
🗹 correct valid from?
|
|
46
|
+
{%- endif %}
|
|
47
|
+
|
|
48
|
+
🔪 FILTERING
|
|
49
|
+
{%- if filter_where %}
|
|
50
|
+
☐ filter where: {{ filter_where }}
|
|
51
|
+
{%- endif %}
|
|
52
|
+
{%- if update_where %}
|
|
53
|
+
☐ update where: {{ update_where }}
|
|
54
|
+
{%- endif %}
|
|
55
|
+
{%- if slices %}
|
|
56
|
+
☐ slices: {{ slices }}
|
|
57
|
+
{%- endif %}
|
|
58
|
+
{%- if sources %}
|
|
59
|
+
☐ sources: {{ sources }}
|
|
60
|
+
{%- endif %}
|
|
61
|
+
|
|
62
|
+
🗑️ DELETES
|
|
63
|
+
{%- if delete_missing %}
|
|
64
|
+
🗹 delete missing?
|
|
65
|
+
{%- endif %}
|
|
66
|
+
{%- if soft_delete %}
|
|
67
|
+
🗹 soft delete?
|
|
68
|
+
{%- endif %}
|
|
69
|
+
|
|
70
|
+
✅ DATA VALIDATION
|
|
71
|
+
{%- if has_no_data %}
|
|
72
|
+
☒ has_data?
|
|
73
|
+
{%- else %}
|
|
74
|
+
🗹 has data?
|
|
75
|
+
{%- endif %}
|
|
76
|
+
{%- if has_rows %}
|
|
77
|
+
🗹 has rows?
|
|
78
|
+
{%- else %}
|
|
79
|
+
☒ has rows?
|
|
80
|
+
{%- endif %}
|
|
81
|
+
{%- if has_source %}
|
|
82
|
+
🗹 has source?
|
|
83
|
+
{%- endif %}
|
|
84
|
+
|
|
85
|
+
🏷️ HAS FIELDS
|
|
86
|
+
{%- if has_metadata %}
|
|
87
|
+
🗹 has metadata?
|
|
88
|
+
{%- endif %}
|
|
89
|
+
{%- if has_timestamp %}
|
|
90
|
+
🗹 has timestamp?
|
|
91
|
+
{%- endif %}
|
|
92
|
+
{%- if has_identity %}
|
|
93
|
+
🗹 has identity?
|
|
94
|
+
{%- endif %}
|
|
95
|
+
{%- if has_key %}
|
|
96
|
+
🗹 has key?
|
|
97
|
+
{%- endif %}
|
|
98
|
+
{%- if has_hash %}
|
|
99
|
+
🗹 has hash?
|
|
100
|
+
{%- endif %}
|
|
101
|
+
{%- if has_operation %}
|
|
102
|
+
🗹 has operation?
|
|
103
|
+
{%- endif %}
|
|
104
|
+
{%- if has_order_by %}
|
|
105
|
+
🗹 has order by?
|
|
106
|
+
{%- endif %}
|
|
107
|
+
{%- if has_rescued_data %}
|
|
108
|
+
🗹 has rescued data?
|
|
109
|
+
{%- endif %}
|
|
110
|
+
|
|
111
|
+
➕ ADD COLUMNS
|
|
112
|
+
{%- if add_metadata %}
|
|
113
|
+
🗹 add metadata?
|
|
114
|
+
{%- endif %}
|
|
115
|
+
{%- if add_timestamp %}
|
|
116
|
+
🗹 add timestamp?
|
|
117
|
+
{%- endif %}
|
|
118
|
+
{%- if add_key %}
|
|
119
|
+
🗹 add key?
|
|
120
|
+
{%- endif %}
|
|
121
|
+
{%- if add_hash %}
|
|
122
|
+
🗹 add hash?
|
|
123
|
+
{%- endif %}
|
|
124
|
+
{%- if add_operation %}
|
|
125
|
+
☐ add_operation: {{ add_operation }}
|
|
126
|
+
{%- endif %}
|
|
127
|
+
{%- if add_source %}
|
|
128
|
+
☐ add_source: {{ add_source }}
|
|
129
|
+
{%- endif %}
|
|
130
|
+
{%- if add_calculated_columns %}
|
|
131
|
+
☐ add_calculated_columns: {{ add_calculated_columns }}
|
|
132
|
+
{%- endif %}
|
|
133
|
+
|
|
134
|
+
🔄 EXTRA COLUMN OPERATIONs
|
|
135
|
+
{%- if all_except %}
|
|
136
|
+
☐ all_except: {{ all_except | join(", ") | truncate(100, killwords=True) }}
|
|
137
|
+
{%- endif %}
|
|
138
|
+
{%- if all_overwrite %}
|
|
139
|
+
☐ all_overwrite: {{ all_overwrite | join(", ") | truncate(100, killwords=True) }}
|
|
140
|
+
{%- endif %}
|
|
141
|
+
{%- if overwrite %}
|
|
142
|
+
☐ overwrite: {{ overwrite | join(", ") | truncate(100, killwords=True) }}
|
|
143
|
+
{%- endif %}
|
|
144
|
+
|
|
145
|
+
👨👩👧 PARENTS
|
|
146
|
+
{%- if parent_slice %}
|
|
147
|
+
☐ parent_slice: {{ parent_slice }}
|
|
148
|
+
{%- endif %}
|
|
149
|
+
{%- if parent_rectify %}
|
|
150
|
+
☐ parent_rectify: {{ parent_rectify }}
|
|
151
|
+
{%- endif %}
|
|
152
|
+
{%- if parent_deduplicate_key %}
|
|
153
|
+
☐ parent_deduplicate_key: {{ parent_deduplicate_key }}
|
|
154
|
+
{%- endif %}
|
|
155
|
+
{%- if parent_deduplicate_hash %}
|
|
156
|
+
☐ parent_deduplicate_hash: {{ parent_deduplicate_hash }}
|
|
157
|
+
{%- endif %}
|
|
158
|
+
{%- if parent_cdc %}
|
|
159
|
+
☐ parent_cdc: {{ parent_cdc }}
|
|
160
|
+
{%- endif %}
|
|
161
|
+
{%- if parent_final %}
|
|
162
|
+
☐ parent_final: {{ parent_final }}
|
|
163
|
+
{%- endif %}
|
|
164
|
+
|
|
165
|
+
📦 LAYOUT
|
|
166
|
+
{%- if columns %}
|
|
167
|
+
☐ columns: {{ columns | join(", ") | truncate(100, killwords=True) }}
|
|
168
|
+
{%- endif %}
|
|
169
|
+
{%- if inputs %}
|
|
170
|
+
☐ inputs: {{ inputs | join(", ") | truncate(100, killwords=True) }}
|
|
171
|
+
{%- endif %}
|
|
172
|
+
{%- if intermediates %}
|
|
173
|
+
☐ intermediates: {{ intermediates | join(", ") | truncate(100, killwords=True) }}
|
|
174
|
+
{%- endif %}
|
|
175
|
+
{%- if outputs %}
|
|
176
|
+
☐ outputs: {{ outputs | join(", ") | truncate(100, killwords=True) }}
|
|
177
|
+
{%- endif %}
|
|
178
|
+
{%- if keys %}
|
|
179
|
+
☐ keys: {{ keys | join(", ") | truncate(100, killwords=True) }}
|
|
180
|
+
{%- endif %}
|
|
181
|
+
{%- if hashes %}
|
|
182
|
+
☐ hashes: {{ hashes | join(", ") | truncate(100, killwords=True) }}
|
|
183
|
+
{%- endif %}
|
|
184
|
+
|
|
185
|
+
*/
|
|
186
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{% if has_rows %}
|
|
2
|
+
__merge_condition as (
|
|
3
|
+
select
|
|
4
|
+
s.__key as __merge_key,
|
|
5
|
+
'upsert' as __merge_condition,
|
|
6
|
+
{% for output in outputs %} s.`{{ output }}`, {% endfor %}
|
|
7
|
+
from {{ parent_cdc }} s
|
|
8
|
+
left anti join
|
|
9
|
+
__current c on s.__key == c.__key and s.__hash == c.__hash
|
|
10
|
+
{% if has_source %} and s.__source = c.__source {% endif %}
|
|
11
|
+
{% if delete_missing %}
|
|
12
|
+
union all
|
|
13
|
+
select
|
|
14
|
+
c.__key as __merge_key,
|
|
15
|
+
'delete' as __merge_condition,
|
|
16
|
+
{% for output in outputs %} c.`{{ output }}`, {% endfor %}
|
|
17
|
+
from __current c
|
|
18
|
+
left anti join
|
|
19
|
+
{{ parent_cdc }} s on s.__key == c.__key and s.__hash == c.__hash
|
|
20
|
+
{% if has_source %} and s.__source = c.__source {% endif %}
|
|
21
|
+
{% endif %}
|
|
22
|
+
),
|
|
23
|
+
{% else %}
|
|
24
|
+
__merge_condition as (select s.__key as __merge_key, 'upsert' as __merge_condition, s.* from {{ parent_cdc }} s),
|
|
25
|
+
{% endif %}
|
|
26
|
+
__final as (
|
|
27
|
+
select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
28
|
+
from __merge_condition
|
|
29
|
+
{% if filter %}
|
|
30
|
+
where
|
|
31
|
+
true
|
|
32
|
+
-- operation current added by filter
|
|
33
|
+
and __operation <> 'current'
|
|
34
|
+
{% endif %}
|
|
35
|
+
)
|
|
@@ -49,16 +49,7 @@ __scd1 as (
|
|
|
49
49
|
),
|
|
50
50
|
{% if mode == "complete" %}
|
|
51
51
|
__final as (
|
|
52
|
-
select
|
|
53
|
-
{% for field in fields %} s.{{ field }}, {% endfor %},
|
|
54
|
-
{% if has_identity %} __identity, {% endif %}
|
|
55
|
-
s.__key,
|
|
56
|
-
s.__timestamp,
|
|
57
|
-
{% if soft_delete %} s.__is_current, s.__is_deleted, {% endif %}
|
|
58
|
-
s.__hash,
|
|
59
|
-
{% if has_source %} s.__source, {% endif %}
|
|
60
|
-
{% if has_metadata %} s.__metadata, {% endif %}
|
|
61
|
-
{% if has_rescued_data %} s.__rescued_data, {% endif %}
|
|
52
|
+
select {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
62
53
|
from __scd1 s
|
|
63
54
|
where true {% if not soft_delete %} and s.__is_current {% endif %}
|
|
64
55
|
)
|
|
@@ -86,24 +77,7 @@ __scd1 as (
|
|
|
86
77
|
),
|
|
87
78
|
{% endif %}
|
|
88
79
|
__final as (
|
|
89
|
-
select
|
|
90
|
-
__merge_key,
|
|
91
|
-
__merge_condition,
|
|
92
|
-
{% for field in fields %} {{ field }},
|
|
93
|
-
{% endfor %},
|
|
94
|
-
{% if has_identity %} __identity,
|
|
95
|
-
{% endif %}
|
|
96
|
-
__key,
|
|
97
|
-
__timestamp,
|
|
98
|
-
{% if soft_delete %} __is_current, __is_deleted,
|
|
99
|
-
{% endif %}
|
|
100
|
-
__hash,
|
|
101
|
-
{% if has_source %} __source,
|
|
102
|
-
{% endif %}
|
|
103
|
-
{% if has_metadata %} __metadata,
|
|
104
|
-
{% endif %}
|
|
105
|
-
{% if has_rescued_data %} __rescued_data,
|
|
106
|
-
{% endif %}
|
|
80
|
+
select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
107
81
|
{% if has_rows %} from __scd1_no_fake_update m
|
|
108
82
|
{% else %} from __merge_condition m
|
|
109
83
|
{% endif %}
|
|
@@ -26,27 +26,23 @@ __scd2 as (
|
|
|
26
26
|
),
|
|
27
27
|
{% if mode == "complete" %}
|
|
28
28
|
__complete as (select s.* from __scd2 s where true and not __operation <=> 'delete'),
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
__key,
|
|
34
|
-
{% if correct_valid_from %}
|
|
29
|
+
{% if correct_valid_from %}
|
|
30
|
+
__correct_valid_from as (
|
|
31
|
+
select
|
|
32
|
+
* except (__valid_from),
|
|
35
33
|
if(
|
|
36
34
|
__valid_from == min(__valid_from) over (partition by null),
|
|
37
35
|
cast('1900-01-01' as timestamp),
|
|
38
36
|
__valid_from
|
|
39
|
-
) as __valid_from
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
{% if has_rescued_data %} __rescued_data, {% endif %}
|
|
49
|
-
from __complete
|
|
37
|
+
) as __valid_from
|
|
38
|
+
from __complete
|
|
39
|
+
),
|
|
40
|
+
{% endif %}
|
|
41
|
+
__final as (
|
|
42
|
+
select {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
43
|
+
{% if correct_valid_from %} from __correct_valid_from
|
|
44
|
+
{% else %} from __complete
|
|
45
|
+
{% endif %}
|
|
50
46
|
)
|
|
51
47
|
{% else %}
|
|
52
48
|
{% if has_rows %}
|
|
@@ -81,37 +77,22 @@ __scd2 as (
|
|
|
81
77
|
-- only the first record can be an update or a delete
|
|
82
78
|
where (s.__scd2_rn == 1 and o.__merge_condition in ('update', 'delete')) or o.__merge_condition == 'insert'
|
|
83
79
|
),
|
|
80
|
+
{% if correct_valid_from %}
|
|
81
|
+
__correct_valid_from as (
|
|
82
|
+
select
|
|
83
|
+
* except (__valid_from),
|
|
84
|
+
if(
|
|
85
|
+
__valid_from == min(__valid_from) over (partition by null),
|
|
86
|
+
cast('1900-01-01' as timestamp),
|
|
87
|
+
__valid_from
|
|
88
|
+
) as __valid_from
|
|
89
|
+
from __merge_condition
|
|
90
|
+
),
|
|
91
|
+
{% endif %}
|
|
84
92
|
__final as (
|
|
85
|
-
select
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
{% endfor %},
|
|
90
|
-
{% if has_identity %} __identity,
|
|
91
|
-
{% endif %}
|
|
92
|
-
__key,
|
|
93
|
-
{% if correct_valid_from %}
|
|
94
|
-
{% if not has_rows %}
|
|
95
|
-
if(
|
|
96
|
-
__valid_from == min(__valid_from) over (partition by null),
|
|
97
|
-
cast('1900-01-01' as timestamp),
|
|
98
|
-
__valid_from
|
|
99
|
-
) as __valid_from,
|
|
100
|
-
{% else %} __valid_from,
|
|
101
|
-
{% endif %}
|
|
102
|
-
{% else %} __valid_from,
|
|
103
|
-
{% endif %}
|
|
104
|
-
__valid_to,
|
|
105
|
-
__is_current and __merge_condition == 'insert' as __is_current,
|
|
106
|
-
{% if soft_delete %} __is_deleted,
|
|
107
|
-
{% endif %}
|
|
108
|
-
__hash,
|
|
109
|
-
{% if has_source %} __source,
|
|
110
|
-
{% endif %}
|
|
111
|
-
{% if has_metadata %} __metadata,
|
|
112
|
-
{% endif %}
|
|
113
|
-
{% if has_rescued_data %} __rescued_data,
|
|
114
|
-
{% endif %}
|
|
115
|
-
from __merge_condition m
|
|
93
|
+
select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
94
|
+
{% if correct_valid_from %} from __correct_valid_from
|
|
95
|
+
{% else %} from __merge_condition
|
|
96
|
+
{% endif %}
|
|
116
97
|
)
|
|
117
98
|
{% endif %}
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
{% include '
|
|
2
|
-
{% include '
|
|
3
|
-
{% if slice %} {% include '
|
|
4
|
-
{% if deduplicate_key %} {% include '
|
|
5
|
-
{% if mode == "update" %} {% if has_rows %} {% include '
|
|
6
|
-
{% if rectify %} {% include '
|
|
7
|
-
{% if deduplicate_hash %} {% include '
|
|
8
|
-
{% if cdc == "nocdc" %}
|
|
9
|
-
{% if
|
|
10
|
-
{%
|
|
11
|
-
{%
|
|
1
|
+
{% include 'queries/context.sql.jinja' %}
|
|
2
|
+
{% include 'ctes/base.sql.jinja' %}
|
|
3
|
+
{% if slice %} {% include 'ctes/slice.sql.jinja' %} {% endif %}
|
|
4
|
+
{% if deduplicate_key %} {% include 'ctes/deduplicate_key.sql.jinja' %} {% endif %}
|
|
5
|
+
{% if mode == "update" %} {% if has_rows %} {% include 'ctes/current.sql.jinja' %} {% endif %} {% endif %}
|
|
6
|
+
{% if rectify %} {% include 'ctes/rectify.sql.jinja' %} {% endif %}
|
|
7
|
+
{% if deduplicate_hash %} {% include 'ctes/deduplicate_hash.sql.jinja' %} {% endif %}
|
|
8
|
+
{% if cdc == "nocdc" %}
|
|
9
|
+
{% if mode == "update" %} {% include 'queries/nocdc/update.sql.jinja' %}
|
|
10
|
+
{% else %} {% include 'queries/nocdc/complete.sql.jinja' %}
|
|
11
|
+
{% endif %}
|
|
12
|
+
{% endif %}
|
|
13
|
+
{% if cdc == "scd1" %} {% include 'queries/scd1.sql.jinja' %} {% endif %}
|
|
14
|
+
{% if cdc == "scd2" %} {% include 'queries/scd2.sql.jinja' %} {% endif %}
|
|
15
|
+
{% include 'queries/final.sql.jinja' %}
|