fabricks 3.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/__init__.py +0 -0
- fabricks/api/__init__.py +11 -0
- fabricks/api/cdc/__init__.py +6 -0
- fabricks/api/cdc/nocdc.py +3 -0
- fabricks/api/cdc/scd1.py +3 -0
- fabricks/api/cdc/scd2.py +3 -0
- fabricks/api/context.py +27 -0
- fabricks/api/core.py +4 -0
- fabricks/api/deploy.py +3 -0
- fabricks/api/exceptions.py +19 -0
- fabricks/api/extenders.py +3 -0
- fabricks/api/job_schema.py +3 -0
- fabricks/api/log.py +3 -0
- fabricks/api/masks.py +3 -0
- fabricks/api/metastore/__init__.py +10 -0
- fabricks/api/metastore/database.py +3 -0
- fabricks/api/metastore/table.py +3 -0
- fabricks/api/metastore/view.py +6 -0
- fabricks/api/notebooks/__init__.py +0 -0
- fabricks/api/notebooks/cluster.py +6 -0
- fabricks/api/notebooks/initialize.py +42 -0
- fabricks/api/notebooks/process.py +54 -0
- fabricks/api/notebooks/run.py +59 -0
- fabricks/api/notebooks/schedule.py +75 -0
- fabricks/api/notebooks/terminate.py +31 -0
- fabricks/api/parsers.py +3 -0
- fabricks/api/schedules.py +3 -0
- fabricks/api/udfs.py +3 -0
- fabricks/api/utils.py +9 -0
- fabricks/api/version.py +3 -0
- fabricks/api/views.py +6 -0
- fabricks/cdc/__init__.py +14 -0
- fabricks/cdc/base/__init__.py +4 -0
- fabricks/cdc/base/_types.py +10 -0
- fabricks/cdc/base/cdc.py +5 -0
- fabricks/cdc/base/configurator.py +223 -0
- fabricks/cdc/base/generator.py +177 -0
- fabricks/cdc/base/merger.py +110 -0
- fabricks/cdc/base/processor.py +471 -0
- fabricks/cdc/cdc.py +5 -0
- fabricks/cdc/nocdc.py +20 -0
- fabricks/cdc/scd.py +22 -0
- fabricks/cdc/scd1.py +15 -0
- fabricks/cdc/scd2.py +15 -0
- fabricks/cdc/templates/__init__.py +0 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
- fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
- fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
- fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
- fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
- fabricks/cdc/templates/filter.sql.jinja +4 -0
- fabricks/cdc/templates/filters/final.sql.jinja +4 -0
- fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
- fabricks/cdc/templates/filters/update.sql.jinja +30 -0
- fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
- fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
- fabricks/cdc/templates/merge.sql.jinja +3 -0
- fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
- fabricks/cdc/templates/queries/__init__.py +0 -0
- fabricks/cdc/templates/queries/context.sql.jinja +186 -0
- fabricks/cdc/templates/queries/final.sql.jinja +1 -0
- fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
- fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
- fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
- fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
- fabricks/cdc/templates/query.sql.jinja +15 -0
- fabricks/context/__init__.py +72 -0
- fabricks/context/_types.py +133 -0
- fabricks/context/config/__init__.py +92 -0
- fabricks/context/config/utils.py +53 -0
- fabricks/context/log.py +77 -0
- fabricks/context/runtime.py +117 -0
- fabricks/context/secret.py +103 -0
- fabricks/context/spark_session.py +82 -0
- fabricks/context/utils.py +80 -0
- fabricks/core/__init__.py +4 -0
- fabricks/core/dags/__init__.py +9 -0
- fabricks/core/dags/base.py +99 -0
- fabricks/core/dags/generator.py +157 -0
- fabricks/core/dags/log.py +12 -0
- fabricks/core/dags/processor.py +228 -0
- fabricks/core/dags/run.py +39 -0
- fabricks/core/dags/terminator.py +25 -0
- fabricks/core/dags/utils.py +54 -0
- fabricks/core/extenders.py +33 -0
- fabricks/core/job_schema.py +32 -0
- fabricks/core/jobs/__init__.py +21 -0
- fabricks/core/jobs/base/__init__.py +10 -0
- fabricks/core/jobs/base/_types.py +284 -0
- fabricks/core/jobs/base/checker.py +139 -0
- fabricks/core/jobs/base/configurator.py +306 -0
- fabricks/core/jobs/base/exception.py +85 -0
- fabricks/core/jobs/base/generator.py +447 -0
- fabricks/core/jobs/base/invoker.py +206 -0
- fabricks/core/jobs/base/job.py +5 -0
- fabricks/core/jobs/base/processor.py +249 -0
- fabricks/core/jobs/bronze.py +395 -0
- fabricks/core/jobs/get_job.py +127 -0
- fabricks/core/jobs/get_job_conf.py +152 -0
- fabricks/core/jobs/get_job_id.py +31 -0
- fabricks/core/jobs/get_jobs.py +107 -0
- fabricks/core/jobs/get_schedule.py +10 -0
- fabricks/core/jobs/get_schedules.py +32 -0
- fabricks/core/jobs/gold.py +415 -0
- fabricks/core/jobs/silver.py +373 -0
- fabricks/core/masks.py +52 -0
- fabricks/core/parsers/__init__.py +12 -0
- fabricks/core/parsers/_types.py +6 -0
- fabricks/core/parsers/base.py +95 -0
- fabricks/core/parsers/decorator.py +11 -0
- fabricks/core/parsers/get_parser.py +26 -0
- fabricks/core/parsers/utils.py +69 -0
- fabricks/core/schedules/__init__.py +14 -0
- fabricks/core/schedules/diagrams.py +21 -0
- fabricks/core/schedules/generate.py +20 -0
- fabricks/core/schedules/get_schedule.py +5 -0
- fabricks/core/schedules/get_schedules.py +9 -0
- fabricks/core/schedules/process.py +9 -0
- fabricks/core/schedules/run.py +3 -0
- fabricks/core/schedules/terminate.py +6 -0
- fabricks/core/schedules/views.py +61 -0
- fabricks/core/steps/__init__.py +4 -0
- fabricks/core/steps/_types.py +7 -0
- fabricks/core/steps/base.py +423 -0
- fabricks/core/steps/get_step.py +10 -0
- fabricks/core/steps/get_step_conf.py +26 -0
- fabricks/core/udfs.py +106 -0
- fabricks/core/views.py +41 -0
- fabricks/deploy/__init__.py +92 -0
- fabricks/deploy/masks.py +8 -0
- fabricks/deploy/notebooks.py +71 -0
- fabricks/deploy/schedules.py +10 -0
- fabricks/deploy/tables.py +82 -0
- fabricks/deploy/udfs.py +19 -0
- fabricks/deploy/utils.py +36 -0
- fabricks/deploy/views.py +509 -0
- fabricks/metastore/README.md +3 -0
- fabricks/metastore/__init__.py +5 -0
- fabricks/metastore/_types.py +65 -0
- fabricks/metastore/database.py +65 -0
- fabricks/metastore/dbobject.py +66 -0
- fabricks/metastore/pyproject.toml +20 -0
- fabricks/metastore/table.py +768 -0
- fabricks/metastore/utils.py +51 -0
- fabricks/metastore/view.py +53 -0
- fabricks/utils/__init__.py +0 -0
- fabricks/utils/_types.py +6 -0
- fabricks/utils/azure_queue.py +93 -0
- fabricks/utils/azure_table.py +154 -0
- fabricks/utils/console.py +51 -0
- fabricks/utils/fdict.py +240 -0
- fabricks/utils/helpers.py +228 -0
- fabricks/utils/log.py +236 -0
- fabricks/utils/mermaid.py +32 -0
- fabricks/utils/path.py +242 -0
- fabricks/utils/pip.py +61 -0
- fabricks/utils/pydantic.py +94 -0
- fabricks/utils/read/__init__.py +11 -0
- fabricks/utils/read/_types.py +3 -0
- fabricks/utils/read/read.py +305 -0
- fabricks/utils/read/read_excel.py +5 -0
- fabricks/utils/read/read_yaml.py +33 -0
- fabricks/utils/schema/__init__.py +7 -0
- fabricks/utils/schema/get_json_schema_for_type.py +161 -0
- fabricks/utils/schema/get_schema_for_type.py +99 -0
- fabricks/utils/spark.py +76 -0
- fabricks/utils/sqlglot.py +56 -0
- fabricks/utils/write/__init__.py +8 -0
- fabricks/utils/write/delta.py +46 -0
- fabricks/utils/write/stream.py +27 -0
- fabricks-3.0.11.dist-info/METADATA +23 -0
- fabricks-3.0.11.dist-info/RECORD +176 -0
- fabricks-3.0.11.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
{% if mode == "update" %}
|
|
2
|
+
__rectified_base as (
|
|
3
|
+
select
|
|
4
|
+
{% for i in intermediates %} `{{ i }}`, {% endfor %}
|
|
5
|
+
__timestamp as __original_timestamp,
|
|
6
|
+
__operation as __original_operation,
|
|
7
|
+
from {{ parent_rectify }}
|
|
8
|
+
{% if has_rows %}
|
|
9
|
+
union all
|
|
10
|
+
select
|
|
11
|
+
{% for i in intermediates %} `{{ i }}`, {% endfor %}
|
|
12
|
+
__timestamp as __original_timestamp,
|
|
13
|
+
__operation as __original_operation,
|
|
14
|
+
from __current
|
|
15
|
+
{% endif %}
|
|
16
|
+
),
|
|
17
|
+
{% endif %}
|
|
18
|
+
__rectified_next_operation as (
|
|
19
|
+
select
|
|
20
|
+
*,
|
|
21
|
+
lead(__operation) over (
|
|
22
|
+
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
|
|
23
|
+
) as __rectified_next_operation
|
|
24
|
+
{% if mode == "update" %} from __rectified_base p
|
|
25
|
+
{% else %} from {{ parent_rectify }}
|
|
26
|
+
{% endif %}
|
|
27
|
+
),
|
|
28
|
+
__rectified_timestamps as (
|
|
29
|
+
select
|
|
30
|
+
{% if has_source %} __source, {% endif %}
|
|
31
|
+
__timestamp,
|
|
32
|
+
lead(__timestamp) over (
|
|
33
|
+
{% if has_source %}partition by __source {% endif %} order by __timestamp asc
|
|
34
|
+
) as __rectified_next_timestamp,
|
|
35
|
+
lead(if(max(__operation) == 'reload', __timestamp, null)) ignore nulls over (
|
|
36
|
+
{% if has_source %}partition by __source {% endif %} order by __timestamp asc
|
|
37
|
+
) as __rectified_next_timestamp_reload,
|
|
38
|
+
from __rectified_next_operation
|
|
39
|
+
group by {% if has_source %} __source, {% endif %} __timestamp
|
|
40
|
+
),
|
|
41
|
+
__rectified_is_deleted_next as (
|
|
42
|
+
select
|
|
43
|
+
cur.*,
|
|
44
|
+
t.* except ({% if has_source %}t.__source, {% endif %} t.__timestamp),
|
|
45
|
+
-- there is more reload
|
|
46
|
+
__rectified_next_timestamp_reload is not null as __rectify_more_reload,
|
|
47
|
+
-- the next operation is bedore the next reload
|
|
48
|
+
if(
|
|
49
|
+
__rectify_more_reload, t.__rectified_next_timestamp < t.__rectified_next_timestamp_reload, true
|
|
50
|
+
) as __rectify_key_next_operation_before_next_reload,
|
|
51
|
+
-- the record is deleted
|
|
52
|
+
cur.__operation == 'delete' as __rectified_key_is_deleted,
|
|
53
|
+
-- the record is not found in next reload
|
|
54
|
+
__rectify_more_reload and nxt.__timestamp is null as __rectified_key_not_found_in_next_reload,
|
|
55
|
+
-- there is no more operation
|
|
56
|
+
t.__rectified_next_timestamp is null as __rectified_no_more_operation,
|
|
57
|
+
-- the record is deleted before next reload
|
|
58
|
+
__rectify_key_next_operation_before_next_reload
|
|
59
|
+
and cur.__rectified_next_operation <=> 'delete' as __rectified_key_is_deleted_next,
|
|
60
|
+
-- the record is updated before next reload
|
|
61
|
+
__rectify_key_next_operation_before_next_reload
|
|
62
|
+
and cur.__rectified_next_operation <=> 'upsert' as __rectified_key_is_updated_next,
|
|
63
|
+
case
|
|
64
|
+
when __rectified_key_is_deleted
|
|
65
|
+
then false
|
|
66
|
+
when __rectified_key_is_updated_next
|
|
67
|
+
then false
|
|
68
|
+
when __rectified_key_is_deleted_next
|
|
69
|
+
then false
|
|
70
|
+
when __rectified_no_more_operation
|
|
71
|
+
then false
|
|
72
|
+
when __rectified_key_not_found_in_next_reload
|
|
73
|
+
then true
|
|
74
|
+
else false
|
|
75
|
+
end as __rectified_is_deleted_next
|
|
76
|
+
from __rectified_next_operation cur
|
|
77
|
+
left join
|
|
78
|
+
__rectified_timestamps t on cur.__timestamp = t.__timestamp
|
|
79
|
+
{% if has_source %} and cur.__source == t.__source {% endif %}
|
|
80
|
+
left join
|
|
81
|
+
__rectified_next_operation nxt
|
|
82
|
+
on t.__rectified_next_timestamp_reload = nxt.__timestamp
|
|
83
|
+
and cur.__key == nxt.__key
|
|
84
|
+
{% if has_source %} and cur.__source == nxt.__source {% endif %}
|
|
85
|
+
),
|
|
86
|
+
{% if mode == "complete" %} __rectified as ( {% else %} __rectified_operation as (
|
|
87
|
+
{% endif %}
|
|
88
|
+
select
|
|
89
|
+
c.* except (c.__operation, c.__timestamp),
|
|
90
|
+
if(c.__operation == 'delete', 'delete', d2.__rectified_operation) as __operation,
|
|
91
|
+
case
|
|
92
|
+
when c.__operation == 'delete'
|
|
93
|
+
then c.__timestamp
|
|
94
|
+
when d2.__rectified_operation == 'upsert'
|
|
95
|
+
then c.__timestamp
|
|
96
|
+
else c.__rectified_next_timestamp_reload
|
|
97
|
+
end as __timestamp
|
|
98
|
+
from __rectified_is_deleted_next c
|
|
99
|
+
cross join
|
|
100
|
+
(
|
|
101
|
+
select false as __is_deleted, 'upsert' as __rectified_operation
|
|
102
|
+
union all
|
|
103
|
+
select true as __is_deleted, 'delete' as __rectified_operation
|
|
104
|
+
union all
|
|
105
|
+
select true as __is_deleted, 'upsert' as __rectified_operation
|
|
106
|
+
) d2
|
|
107
|
+
on c.__rectified_is_deleted_next = d2.__is_deleted
|
|
108
|
+
),
|
|
109
|
+
{% if mode == "update" %}
|
|
110
|
+
__rectified as (
|
|
111
|
+
select * from __rectified_operation where not (__original_operation == 'current' and __operation == 'upsert')
|
|
112
|
+
),
|
|
113
|
+
{% endif %}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__sliced as (select * from {{ parent_slice }} s where true and ({{ slices }})),
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
__latest as (
|
|
2
|
+
select {% if has_source %} __source, {% endif %} max(__timestamp) as __max_timestamp
|
|
3
|
+
from {{ parent_slice }}
|
|
4
|
+
{% if has_source %} group by __source {% endif %}
|
|
5
|
+
),
|
|
6
|
+
__final as (
|
|
7
|
+
select
|
|
8
|
+
{% if has_source %} concat('t.__source == \'', `__source`, '\'') as sources, {% endif %}
|
|
9
|
+
concat_ws(
|
|
10
|
+
' ',
|
|
11
|
+
' (',
|
|
12
|
+
concat('s.__timestamp == \'', `__max_timestamp`, '\''),
|
|
13
|
+
{% if has_source %} concat('and s.__source == \'', `__source`, '\''), {% endif %}
|
|
14
|
+
' )'
|
|
15
|
+
) as `slices`
|
|
16
|
+
from __latest
|
|
17
|
+
)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{% if has_source %} __update_source as (select __source from {{ parent_slice }} group by __source), {% endif %}
|
|
2
|
+
__update as (
|
|
3
|
+
select
|
|
4
|
+
{% if has_source %} s.__source, {% endif %}
|
|
5
|
+
{% if cdc == "nocdc" %}
|
|
6
|
+
coalesce(max(t.__timestamp), cast('0001-01-01' as timestamp)) as __max_timestamp
|
|
7
|
+
{% endif %}
|
|
8
|
+
{% if cdc == "scd1" %}
|
|
9
|
+
coalesce(max(t.__timestamp), cast('0001-01-01' as timestamp)) as __max_timestamp
|
|
10
|
+
{% endif %}
|
|
11
|
+
{% if cdc == "scd2" %}
|
|
12
|
+
coalesce(max(t.__valid_from), cast('0001-01-01' as timestamp)) as __max_timestamp
|
|
13
|
+
{% endif %}
|
|
14
|
+
from {{ tgt }} t
|
|
15
|
+
{% if has_source %} right join __update_source s on s.__source == t.__source {% endif %}
|
|
16
|
+
where true
|
|
17
|
+
{% if has_source %} group by s.__source {% endif %}
|
|
18
|
+
),
|
|
19
|
+
__final as (
|
|
20
|
+
select
|
|
21
|
+
{% if has_source %} concat('t.__source == \'', `__source`, '\'') as sources, {% endif %}
|
|
22
|
+
concat_ws(
|
|
23
|
+
' ',
|
|
24
|
+
' (',
|
|
25
|
+
concat('s.__timestamp > \'', `__max_timestamp`, '\''),
|
|
26
|
+
{% if has_source %} concat('and s.__source == \'', `__source`, '\''), {% endif %}
|
|
27
|
+
' )'
|
|
28
|
+
) as `slices`
|
|
29
|
+
from __update
|
|
30
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{% macro backtick(field) -%} `{{ field }}` {%- endmacro %}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{% macro add_hash(fields) -%}
|
|
2
|
+
md5(
|
|
3
|
+
array_join(
|
|
4
|
+
array(
|
|
5
|
+
{% for f in fields %}
|
|
6
|
+
{% if f == "__operation" %}cast(`{{ f }}` <=> 'delete' as string) -- reloads and upserts should have the same hash, not deletes,
|
|
7
|
+
{% else %}`{{ f }}`::string,
|
|
8
|
+
{% endif %}
|
|
9
|
+
{% endfor %}
|
|
10
|
+
),
|
|
11
|
+
'*',
|
|
12
|
+
'-1'
|
|
13
|
+
)
|
|
14
|
+
)
|
|
15
|
+
{%- endmacro %}
|
|
16
|
+
{% macro add_key(fields) -%}
|
|
17
|
+
md5(array_join(array({% for f in fields %}`{{ f }}`::string, {% endfor %}), '*', '-1'))
|
|
18
|
+
{%- endmacro %}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{% if format == "dataframe" %}
|
|
2
|
+
merge into {{ tgt }} t using {{ "{src}" }} s
|
|
3
|
+
{% endif %}
|
|
4
|
+
{% if format == "view" %}
|
|
5
|
+
merge into {{ tgt }} t using {{ src }} s
|
|
6
|
+
{% endif %}
|
|
7
|
+
{% if has_key %}
|
|
8
|
+
on t.__key == s.__merge_key
|
|
9
|
+
{% else %}
|
|
10
|
+
on
|
|
11
|
+
{% for k in keys %}
|
|
12
|
+
t.{{ k }} <=> s.{{ k }}
|
|
13
|
+
{% endfor %}
|
|
14
|
+
{% endif %}
|
|
15
|
+
{% if has_source %}
|
|
16
|
+
and t.__source == s.__source
|
|
17
|
+
{% endif %}
|
|
18
|
+
{% if update_where %} {{ update_where }} {% endif %}
|
|
19
|
+
when matched
|
|
20
|
+
and __merge_condition == 'upsert' then
|
|
21
|
+
update
|
|
22
|
+
set
|
|
23
|
+
{% for c in columns %}
|
|
24
|
+
{{ c }} = s.{{c}},
|
|
25
|
+
{% endfor %}
|
|
26
|
+
-- delete
|
|
27
|
+
when matched
|
|
28
|
+
and __merge_condition == 'delete' then
|
|
29
|
+
delete
|
|
30
|
+
when not matched
|
|
31
|
+
and __merge_condition == 'upsert' then
|
|
32
|
+
insert (
|
|
33
|
+
{% for c in columns %}
|
|
34
|
+
{{ c }},
|
|
35
|
+
{% endfor %}
|
|
36
|
+
)
|
|
37
|
+
values (
|
|
38
|
+
{% for c in columns %}
|
|
39
|
+
s.{{ c }},
|
|
40
|
+
{% endfor %}
|
|
41
|
+
)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
{% if format == "dataframe" %}
|
|
2
|
+
merge into {{ tgt }} t using {{ "{src}" }} s
|
|
3
|
+
{% endif %}
|
|
4
|
+
{% if format == "view" %}
|
|
5
|
+
merge into {{ tgt }} t using {{ src }} s
|
|
6
|
+
{% endif %}
|
|
7
|
+
{% if has_key %}
|
|
8
|
+
on t.__key == s.__merge_key
|
|
9
|
+
{% else %}
|
|
10
|
+
on
|
|
11
|
+
{% for k in keys %}
|
|
12
|
+
t.{{ k }} <=> s.{{ k }}
|
|
13
|
+
{% endfor %}
|
|
14
|
+
{% endif %}
|
|
15
|
+
{% if has_source %}
|
|
16
|
+
and t.__source == s.__source
|
|
17
|
+
{% endif %}
|
|
18
|
+
{% if update_where %} {{ update_where }} {% endif %}
|
|
19
|
+
when matched
|
|
20
|
+
and __merge_condition == 'upsert' then
|
|
21
|
+
update
|
|
22
|
+
set
|
|
23
|
+
{% for f in fields %}
|
|
24
|
+
{{ f }} = s.{{f}},
|
|
25
|
+
{% endfor %}
|
|
26
|
+
{% if has_timestamp %}
|
|
27
|
+
__timestamp = s.__timestamp,
|
|
28
|
+
{% endif %}
|
|
29
|
+
{% if has_metadata %}
|
|
30
|
+
__metadata.updated = cast(current_timestamp() as timestamp),
|
|
31
|
+
{% endif %}
|
|
32
|
+
{% if has_hash %}
|
|
33
|
+
__hash = s.__hash,
|
|
34
|
+
{% endif %}
|
|
35
|
+
{% if has_rescued_data %}
|
|
36
|
+
__rescued_data = s.__rescued_data,
|
|
37
|
+
{% endif %}
|
|
38
|
+
{% if soft_delete %}
|
|
39
|
+
__is_current = s.__is_current,
|
|
40
|
+
__is_deleted = s.__is_deleted,
|
|
41
|
+
{% endif %}
|
|
42
|
+
{% if soft_delete %}
|
|
43
|
+
-- soft delete
|
|
44
|
+
when matched
|
|
45
|
+
and __merge_condition == 'delete' then
|
|
46
|
+
update
|
|
47
|
+
set
|
|
48
|
+
__is_current = False,
|
|
49
|
+
__is_deleted = True,
|
|
50
|
+
{% if has_timestamp %}
|
|
51
|
+
__timestamp = s.__timestamp,
|
|
52
|
+
{% endif %}
|
|
53
|
+
{% if has_metadata %}
|
|
54
|
+
__metadata.updated = cast(current_timestamp() as timestamp),
|
|
55
|
+
{% endif %}
|
|
56
|
+
{% else %}
|
|
57
|
+
-- delete
|
|
58
|
+
when matched
|
|
59
|
+
and __merge_condition == 'delete' then
|
|
60
|
+
delete
|
|
61
|
+
{% endif %}
|
|
62
|
+
when not matched
|
|
63
|
+
and __merge_condition == 'upsert' then
|
|
64
|
+
insert (
|
|
65
|
+
{% for c in columns %}
|
|
66
|
+
{{ c }},
|
|
67
|
+
{% endfor %}
|
|
68
|
+
)
|
|
69
|
+
values (
|
|
70
|
+
{% for c in columns %}
|
|
71
|
+
s.{{ c }},
|
|
72
|
+
{% endfor %}
|
|
73
|
+
)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{% if format == "dataframe" %}
|
|
2
|
+
merge into {{ tgt }} t using {{ "{src}" }} s
|
|
3
|
+
{% endif %}
|
|
4
|
+
{% if format == "view" %}
|
|
5
|
+
merge into {{ tgt }} t using {{ src }} s
|
|
6
|
+
{% endif %}
|
|
7
|
+
{% if has_key %}
|
|
8
|
+
on t.__key == s.__merge_key
|
|
9
|
+
{% else %}
|
|
10
|
+
on
|
|
11
|
+
{% for k in keys %}
|
|
12
|
+
t.{{ k }} <=> s.{{ k }} and
|
|
13
|
+
{% endfor %}
|
|
14
|
+
{% endif %}
|
|
15
|
+
and t.__is_current
|
|
16
|
+
{% if has_source %}
|
|
17
|
+
and t.__source == s.__source
|
|
18
|
+
{% endif %}
|
|
19
|
+
when matched
|
|
20
|
+
and __merge_condition == 'update' then
|
|
21
|
+
update
|
|
22
|
+
set
|
|
23
|
+
__valid_to = s.__valid_from - interval 1 seconds,
|
|
24
|
+
__is_current = False,
|
|
25
|
+
{% if soft_delete %}
|
|
26
|
+
__is_deleted = False,
|
|
27
|
+
{% endif %}
|
|
28
|
+
{% if has_metadata %}
|
|
29
|
+
__metadata.updated = cast(current_timestamp() as timestamp),
|
|
30
|
+
{% endif %}
|
|
31
|
+
when matched
|
|
32
|
+
and __merge_condition == 'delete' then
|
|
33
|
+
update
|
|
34
|
+
set
|
|
35
|
+
__valid_to = s.__valid_from - interval 1 seconds,
|
|
36
|
+
__is_current = False,
|
|
37
|
+
{% if soft_delete %}
|
|
38
|
+
__is_deleted = True,
|
|
39
|
+
{% endif %}
|
|
40
|
+
{% if has_metadata %}
|
|
41
|
+
__metadata.updated = cast(current_timestamp() as timestamp),
|
|
42
|
+
{% endif %}
|
|
43
|
+
when not matched
|
|
44
|
+
and __merge_condition == 'insert' then
|
|
45
|
+
insert (
|
|
46
|
+
{% for c in columns %}
|
|
47
|
+
{{ c }},
|
|
48
|
+
{% endfor %}
|
|
49
|
+
)
|
|
50
|
+
values (
|
|
51
|
+
{% for c in columns %}
|
|
52
|
+
s.{{ c }},
|
|
53
|
+
{% endfor %}
|
|
54
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/*
|
|
2
|
+
|
|
3
|
+
⚙️ BASE
|
|
4
|
+
{%- if cdc %}
|
|
5
|
+
☐ cdc: {{ cdc }}
|
|
6
|
+
{%- endif %}
|
|
7
|
+
{%- if mode %}
|
|
8
|
+
☐ mode: {{ mode }}
|
|
9
|
+
{%- endif %}
|
|
10
|
+
|
|
11
|
+
🎯 SOURCE & TARTGET
|
|
12
|
+
{%- if format %}
|
|
13
|
+
☐ format: {{ format }}
|
|
14
|
+
{%- endif %}
|
|
15
|
+
{%- if src %}
|
|
16
|
+
☐ src: {{ src | truncate(100, killwords=True) }}
|
|
17
|
+
{%- endif %}
|
|
18
|
+
{%- if tgt %}
|
|
19
|
+
☐ tgt: {{ tgt }}
|
|
20
|
+
{%- endif %}
|
|
21
|
+
|
|
22
|
+
📊 CTE's
|
|
23
|
+
{%- if slice %}
|
|
24
|
+
🗹 slice?
|
|
25
|
+
{%- endif %}
|
|
26
|
+
{%- if deduplicate %}
|
|
27
|
+
🗹 deduplicate?
|
|
28
|
+
{%- endif %}
|
|
29
|
+
{%- if advanced_deduplication %}
|
|
30
|
+
🗹 advanced deduplication?
|
|
31
|
+
{%- endif %}
|
|
32
|
+
{%- if deduplicate_key %}
|
|
33
|
+
🗹 deduplicate key?
|
|
34
|
+
{%- endif %}
|
|
35
|
+
{%- if deduplicate_hash %}
|
|
36
|
+
🗹 deduplicate hash?
|
|
37
|
+
{%- endif %}
|
|
38
|
+
{%- if order_duplicate_by %}
|
|
39
|
+
🗹 order duplicate by?
|
|
40
|
+
{%- endif %}
|
|
41
|
+
{%- if rectify %}
|
|
42
|
+
🗹 rectify?
|
|
43
|
+
{%- endif %}
|
|
44
|
+
{%- if correct_valid_from %}
|
|
45
|
+
🗹 correct valid from?
|
|
46
|
+
{%- endif %}
|
|
47
|
+
|
|
48
|
+
🔪 FILTERING
|
|
49
|
+
{%- if filter_where %}
|
|
50
|
+
☐ filter where: {{ filter_where }}
|
|
51
|
+
{%- endif %}
|
|
52
|
+
{%- if update_where %}
|
|
53
|
+
☐ update where: {{ update_where }}
|
|
54
|
+
{%- endif %}
|
|
55
|
+
{%- if slices %}
|
|
56
|
+
☐ slices: {{ slices }}
|
|
57
|
+
{%- endif %}
|
|
58
|
+
{%- if sources %}
|
|
59
|
+
☐ sources: {{ sources }}
|
|
60
|
+
{%- endif %}
|
|
61
|
+
|
|
62
|
+
🗑️ DELETES
|
|
63
|
+
{%- if delete_missing %}
|
|
64
|
+
🗹 delete missing?
|
|
65
|
+
{%- endif %}
|
|
66
|
+
{%- if soft_delete %}
|
|
67
|
+
🗹 soft delete?
|
|
68
|
+
{%- endif %}
|
|
69
|
+
|
|
70
|
+
✅ DATA VALIDATION
|
|
71
|
+
{%- if has_no_data %}
|
|
72
|
+
☒ has_data?
|
|
73
|
+
{%- else %}
|
|
74
|
+
🗹 has data?
|
|
75
|
+
{%- endif %}
|
|
76
|
+
{%- if has_rows %}
|
|
77
|
+
🗹 has rows?
|
|
78
|
+
{%- else %}
|
|
79
|
+
☒ has rows?
|
|
80
|
+
{%- endif %}
|
|
81
|
+
{%- if has_source %}
|
|
82
|
+
🗹 has source?
|
|
83
|
+
{%- endif %}
|
|
84
|
+
|
|
85
|
+
🏷️ HAS FIELDS
|
|
86
|
+
{%- if has_metadata %}
|
|
87
|
+
🗹 has metadata?
|
|
88
|
+
{%- endif %}
|
|
89
|
+
{%- if has_timestamp %}
|
|
90
|
+
🗹 has timestamp?
|
|
91
|
+
{%- endif %}
|
|
92
|
+
{%- if has_identity %}
|
|
93
|
+
🗹 has identity?
|
|
94
|
+
{%- endif %}
|
|
95
|
+
{%- if has_key %}
|
|
96
|
+
🗹 has key?
|
|
97
|
+
{%- endif %}
|
|
98
|
+
{%- if has_hash %}
|
|
99
|
+
🗹 has hash?
|
|
100
|
+
{%- endif %}
|
|
101
|
+
{%- if has_operation %}
|
|
102
|
+
🗹 has operation?
|
|
103
|
+
{%- endif %}
|
|
104
|
+
{%- if has_order_by %}
|
|
105
|
+
🗹 has order by?
|
|
106
|
+
{%- endif %}
|
|
107
|
+
{%- if has_rescued_data %}
|
|
108
|
+
🗹 has rescued data?
|
|
109
|
+
{%- endif %}
|
|
110
|
+
|
|
111
|
+
➕ ADD COLUMNS
|
|
112
|
+
{%- if add_metadata %}
|
|
113
|
+
🗹 add metadata?
|
|
114
|
+
{%- endif %}
|
|
115
|
+
{%- if add_timestamp %}
|
|
116
|
+
🗹 add timestamp?
|
|
117
|
+
{%- endif %}
|
|
118
|
+
{%- if add_key %}
|
|
119
|
+
🗹 add key?
|
|
120
|
+
{%- endif %}
|
|
121
|
+
{%- if add_hash %}
|
|
122
|
+
🗹 add hash?
|
|
123
|
+
{%- endif %}
|
|
124
|
+
{%- if add_operation %}
|
|
125
|
+
☐ add_operation: {{ add_operation }}
|
|
126
|
+
{%- endif %}
|
|
127
|
+
{%- if add_source %}
|
|
128
|
+
☐ add_source: {{ add_source }}
|
|
129
|
+
{%- endif %}
|
|
130
|
+
{%- if add_calculated_columns %}
|
|
131
|
+
☐ add_calculated_columns: {{ add_calculated_columns }}
|
|
132
|
+
{%- endif %}
|
|
133
|
+
|
|
134
|
+
🔄 EXTRA COLUMN OPERATIONs
|
|
135
|
+
{%- if all_except %}
|
|
136
|
+
☐ all_except: {{ all_except | join(", ") | truncate(100, killwords=True) }}
|
|
137
|
+
{%- endif %}
|
|
138
|
+
{%- if all_overwrite %}
|
|
139
|
+
☐ all_overwrite: {{ all_overwrite | join(", ") | truncate(100, killwords=True) }}
|
|
140
|
+
{%- endif %}
|
|
141
|
+
{%- if overwrite %}
|
|
142
|
+
☐ overwrite: {{ overwrite | join(", ") | truncate(100, killwords=True) }}
|
|
143
|
+
{%- endif %}
|
|
144
|
+
|
|
145
|
+
👨👩👧 PARENTS
|
|
146
|
+
{%- if parent_slice %}
|
|
147
|
+
☐ parent_slice: {{ parent_slice }}
|
|
148
|
+
{%- endif %}
|
|
149
|
+
{%- if parent_rectify %}
|
|
150
|
+
☐ parent_rectify: {{ parent_rectify }}
|
|
151
|
+
{%- endif %}
|
|
152
|
+
{%- if parent_deduplicate_key %}
|
|
153
|
+
☐ parent_deduplicate_key: {{ parent_deduplicate_key }}
|
|
154
|
+
{%- endif %}
|
|
155
|
+
{%- if parent_deduplicate_hash %}
|
|
156
|
+
☐ parent_deduplicate_hash: {{ parent_deduplicate_hash }}
|
|
157
|
+
{%- endif %}
|
|
158
|
+
{%- if parent_cdc %}
|
|
159
|
+
☐ parent_cdc: {{ parent_cdc }}
|
|
160
|
+
{%- endif %}
|
|
161
|
+
{%- if parent_final %}
|
|
162
|
+
☐ parent_final: {{ parent_final }}
|
|
163
|
+
{%- endif %}
|
|
164
|
+
|
|
165
|
+
📦 LAYOUT
|
|
166
|
+
{%- if columns %}
|
|
167
|
+
☐ columns: {{ columns | join(", ") | truncate(100, killwords=True) }}
|
|
168
|
+
{%- endif %}
|
|
169
|
+
{%- if inputs %}
|
|
170
|
+
☐ inputs: {{ inputs | join(", ") | truncate(100, killwords=True) }}
|
|
171
|
+
{%- endif %}
|
|
172
|
+
{%- if intermediates %}
|
|
173
|
+
☐ intermediates: {{ intermediates | join(", ") | truncate(100, killwords=True) }}
|
|
174
|
+
{%- endif %}
|
|
175
|
+
{%- if outputs %}
|
|
176
|
+
☐ outputs: {{ outputs | join(", ") | truncate(100, killwords=True) }}
|
|
177
|
+
{%- endif %}
|
|
178
|
+
{%- if keys %}
|
|
179
|
+
☐ keys: {{ keys | join(", ") | truncate(100, killwords=True) }}
|
|
180
|
+
{%- endif %}
|
|
181
|
+
{%- if hashes %}
|
|
182
|
+
☐ hashes: {{ hashes | join(", ") | truncate(100, killwords=True) }}
|
|
183
|
+
{%- endif %}
|
|
184
|
+
|
|
185
|
+
*/
|
|
186
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
select * {% if all_except %} except ({% for e in all_except %}{{ e }}, {% endfor %}), {% endif %} from __final
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{% if has_rows %}
|
|
2
|
+
__merge_condition as (
|
|
3
|
+
select
|
|
4
|
+
s.__key as __merge_key,
|
|
5
|
+
'upsert' as __merge_condition,
|
|
6
|
+
{% for output in outputs %} s.`{{ output }}`, {% endfor %}
|
|
7
|
+
from {{ parent_cdc }} s
|
|
8
|
+
left anti join
|
|
9
|
+
__current c on s.__key == c.__key and s.__hash == c.__hash
|
|
10
|
+
{% if has_source %} and s.__source = c.__source {% endif %}
|
|
11
|
+
{% if delete_missing %}
|
|
12
|
+
union all
|
|
13
|
+
select
|
|
14
|
+
c.__key as __merge_key,
|
|
15
|
+
'delete' as __merge_condition,
|
|
16
|
+
{% for output in outputs %} c.`{{ output }}`, {% endfor %}
|
|
17
|
+
from __current c
|
|
18
|
+
left anti join
|
|
19
|
+
{{ parent_cdc }} s on s.__key == c.__key {% if has_source %} and s.__source = c.__source {% endif %}
|
|
20
|
+
{% endif %}
|
|
21
|
+
),
|
|
22
|
+
{% else %}
|
|
23
|
+
__merge_condition as (select s.__key as __merge_key, 'upsert' as __merge_condition, s.* from {{ parent_cdc }} s),
|
|
24
|
+
{% endif %}
|
|
25
|
+
__final as (
|
|
26
|
+
select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
27
|
+
from __merge_condition
|
|
28
|
+
{% if filter %}
|
|
29
|
+
where
|
|
30
|
+
true
|
|
31
|
+
-- operation current added by filter
|
|
32
|
+
and __operation <> 'current'
|
|
33
|
+
{% endif %}
|
|
34
|
+
)
|