fabricks 3.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. fabricks/__init__.py +0 -0
  2. fabricks/api/__init__.py +11 -0
  3. fabricks/api/cdc/__init__.py +6 -0
  4. fabricks/api/cdc/nocdc.py +3 -0
  5. fabricks/api/cdc/scd1.py +3 -0
  6. fabricks/api/cdc/scd2.py +3 -0
  7. fabricks/api/context.py +27 -0
  8. fabricks/api/core.py +4 -0
  9. fabricks/api/deploy.py +3 -0
  10. fabricks/api/exceptions.py +19 -0
  11. fabricks/api/extenders.py +3 -0
  12. fabricks/api/job_schema.py +3 -0
  13. fabricks/api/log.py +3 -0
  14. fabricks/api/masks.py +3 -0
  15. fabricks/api/metastore/__init__.py +10 -0
  16. fabricks/api/metastore/database.py +3 -0
  17. fabricks/api/metastore/table.py +3 -0
  18. fabricks/api/metastore/view.py +6 -0
  19. fabricks/api/notebooks/__init__.py +0 -0
  20. fabricks/api/notebooks/cluster.py +6 -0
  21. fabricks/api/notebooks/initialize.py +42 -0
  22. fabricks/api/notebooks/process.py +54 -0
  23. fabricks/api/notebooks/run.py +59 -0
  24. fabricks/api/notebooks/schedule.py +75 -0
  25. fabricks/api/notebooks/terminate.py +31 -0
  26. fabricks/api/parsers.py +3 -0
  27. fabricks/api/schedules.py +3 -0
  28. fabricks/api/udfs.py +3 -0
  29. fabricks/api/utils.py +9 -0
  30. fabricks/api/version.py +3 -0
  31. fabricks/api/views.py +6 -0
  32. fabricks/cdc/__init__.py +14 -0
  33. fabricks/cdc/base/__init__.py +4 -0
  34. fabricks/cdc/base/_types.py +10 -0
  35. fabricks/cdc/base/cdc.py +5 -0
  36. fabricks/cdc/base/configurator.py +223 -0
  37. fabricks/cdc/base/generator.py +177 -0
  38. fabricks/cdc/base/merger.py +110 -0
  39. fabricks/cdc/base/processor.py +471 -0
  40. fabricks/cdc/cdc.py +5 -0
  41. fabricks/cdc/nocdc.py +20 -0
  42. fabricks/cdc/scd.py +22 -0
  43. fabricks/cdc/scd1.py +15 -0
  44. fabricks/cdc/scd2.py +15 -0
  45. fabricks/cdc/templates/__init__.py +0 -0
  46. fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
  47. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  48. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  49. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  50. fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
  51. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  52. fabricks/cdc/templates/filter.sql.jinja +4 -0
  53. fabricks/cdc/templates/filters/final.sql.jinja +4 -0
  54. fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
  55. fabricks/cdc/templates/filters/update.sql.jinja +30 -0
  56. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  57. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  58. fabricks/cdc/templates/merge.sql.jinja +3 -0
  59. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  60. fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
  61. fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
  62. fabricks/cdc/templates/queries/__init__.py +0 -0
  63. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  64. fabricks/cdc/templates/queries/final.sql.jinja +1 -0
  65. fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
  66. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
  67. fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
  68. fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
  69. fabricks/cdc/templates/query.sql.jinja +15 -0
  70. fabricks/context/__init__.py +72 -0
  71. fabricks/context/_types.py +133 -0
  72. fabricks/context/config/__init__.py +92 -0
  73. fabricks/context/config/utils.py +53 -0
  74. fabricks/context/log.py +77 -0
  75. fabricks/context/runtime.py +117 -0
  76. fabricks/context/secret.py +103 -0
  77. fabricks/context/spark_session.py +82 -0
  78. fabricks/context/utils.py +80 -0
  79. fabricks/core/__init__.py +4 -0
  80. fabricks/core/dags/__init__.py +9 -0
  81. fabricks/core/dags/base.py +99 -0
  82. fabricks/core/dags/generator.py +157 -0
  83. fabricks/core/dags/log.py +12 -0
  84. fabricks/core/dags/processor.py +228 -0
  85. fabricks/core/dags/run.py +39 -0
  86. fabricks/core/dags/terminator.py +25 -0
  87. fabricks/core/dags/utils.py +54 -0
  88. fabricks/core/extenders.py +33 -0
  89. fabricks/core/job_schema.py +32 -0
  90. fabricks/core/jobs/__init__.py +21 -0
  91. fabricks/core/jobs/base/__init__.py +10 -0
  92. fabricks/core/jobs/base/_types.py +284 -0
  93. fabricks/core/jobs/base/checker.py +139 -0
  94. fabricks/core/jobs/base/configurator.py +306 -0
  95. fabricks/core/jobs/base/exception.py +85 -0
  96. fabricks/core/jobs/base/generator.py +447 -0
  97. fabricks/core/jobs/base/invoker.py +206 -0
  98. fabricks/core/jobs/base/job.py +5 -0
  99. fabricks/core/jobs/base/processor.py +249 -0
  100. fabricks/core/jobs/bronze.py +395 -0
  101. fabricks/core/jobs/get_job.py +127 -0
  102. fabricks/core/jobs/get_job_conf.py +152 -0
  103. fabricks/core/jobs/get_job_id.py +31 -0
  104. fabricks/core/jobs/get_jobs.py +107 -0
  105. fabricks/core/jobs/get_schedule.py +10 -0
  106. fabricks/core/jobs/get_schedules.py +32 -0
  107. fabricks/core/jobs/gold.py +415 -0
  108. fabricks/core/jobs/silver.py +373 -0
  109. fabricks/core/masks.py +52 -0
  110. fabricks/core/parsers/__init__.py +12 -0
  111. fabricks/core/parsers/_types.py +6 -0
  112. fabricks/core/parsers/base.py +95 -0
  113. fabricks/core/parsers/decorator.py +11 -0
  114. fabricks/core/parsers/get_parser.py +26 -0
  115. fabricks/core/parsers/utils.py +69 -0
  116. fabricks/core/schedules/__init__.py +14 -0
  117. fabricks/core/schedules/diagrams.py +21 -0
  118. fabricks/core/schedules/generate.py +20 -0
  119. fabricks/core/schedules/get_schedule.py +5 -0
  120. fabricks/core/schedules/get_schedules.py +9 -0
  121. fabricks/core/schedules/process.py +9 -0
  122. fabricks/core/schedules/run.py +3 -0
  123. fabricks/core/schedules/terminate.py +6 -0
  124. fabricks/core/schedules/views.py +61 -0
  125. fabricks/core/steps/__init__.py +4 -0
  126. fabricks/core/steps/_types.py +7 -0
  127. fabricks/core/steps/base.py +423 -0
  128. fabricks/core/steps/get_step.py +10 -0
  129. fabricks/core/steps/get_step_conf.py +26 -0
  130. fabricks/core/udfs.py +106 -0
  131. fabricks/core/views.py +41 -0
  132. fabricks/deploy/__init__.py +92 -0
  133. fabricks/deploy/masks.py +8 -0
  134. fabricks/deploy/notebooks.py +71 -0
  135. fabricks/deploy/schedules.py +10 -0
  136. fabricks/deploy/tables.py +82 -0
  137. fabricks/deploy/udfs.py +19 -0
  138. fabricks/deploy/utils.py +36 -0
  139. fabricks/deploy/views.py +509 -0
  140. fabricks/metastore/README.md +3 -0
  141. fabricks/metastore/__init__.py +5 -0
  142. fabricks/metastore/_types.py +65 -0
  143. fabricks/metastore/database.py +65 -0
  144. fabricks/metastore/dbobject.py +66 -0
  145. fabricks/metastore/pyproject.toml +20 -0
  146. fabricks/metastore/table.py +768 -0
  147. fabricks/metastore/utils.py +51 -0
  148. fabricks/metastore/view.py +53 -0
  149. fabricks/utils/__init__.py +0 -0
  150. fabricks/utils/_types.py +6 -0
  151. fabricks/utils/azure_queue.py +93 -0
  152. fabricks/utils/azure_table.py +154 -0
  153. fabricks/utils/console.py +51 -0
  154. fabricks/utils/fdict.py +240 -0
  155. fabricks/utils/helpers.py +228 -0
  156. fabricks/utils/log.py +236 -0
  157. fabricks/utils/mermaid.py +32 -0
  158. fabricks/utils/path.py +242 -0
  159. fabricks/utils/pip.py +61 -0
  160. fabricks/utils/pydantic.py +94 -0
  161. fabricks/utils/read/__init__.py +11 -0
  162. fabricks/utils/read/_types.py +3 -0
  163. fabricks/utils/read/read.py +305 -0
  164. fabricks/utils/read/read_excel.py +5 -0
  165. fabricks/utils/read/read_yaml.py +33 -0
  166. fabricks/utils/schema/__init__.py +7 -0
  167. fabricks/utils/schema/get_json_schema_for_type.py +161 -0
  168. fabricks/utils/schema/get_schema_for_type.py +99 -0
  169. fabricks/utils/spark.py +76 -0
  170. fabricks/utils/sqlglot.py +56 -0
  171. fabricks/utils/write/__init__.py +8 -0
  172. fabricks/utils/write/delta.py +46 -0
  173. fabricks/utils/write/stream.py +27 -0
  174. fabricks-3.0.11.dist-info/METADATA +23 -0
  175. fabricks-3.0.11.dist-info/RECORD +176 -0
  176. fabricks-3.0.11.dist-info/WHEEL +4 -0
@@ -0,0 +1,113 @@
1
+ {% if mode == "update" %}
2
+ __rectified_base as (
3
+ select
4
+ {% for i in intermediates %} `{{ i }}`, {% endfor %}
5
+ __timestamp as __original_timestamp,
6
+ __operation as __original_operation,
7
+ from {{ parent_rectify }}
8
+ {% if has_rows %}
9
+ union all
10
+ select
11
+ {% for i in intermediates %} `{{ i }}`, {% endfor %}
12
+ __timestamp as __original_timestamp,
13
+ __operation as __original_operation,
14
+ from __current
15
+ {% endif %}
16
+ ),
17
+ {% endif %}
18
+ __rectified_next_operation as (
19
+ select
20
+ *,
21
+ lead(__operation) over (
22
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
23
+ ) as __rectified_next_operation
24
+ {% if mode == "update" %} from __rectified_base p
25
+ {% else %} from {{ parent_rectify }}
26
+ {% endif %}
27
+ ),
28
+ __rectified_timestamps as (
29
+ select
30
+ {% if has_source %} __source, {% endif %}
31
+ __timestamp,
32
+ lead(__timestamp) over (
33
+ {% if has_source %}partition by __source {% endif %} order by __timestamp asc
34
+ ) as __rectified_next_timestamp,
35
+ lead(if(max(__operation) == 'reload', __timestamp, null)) ignore nulls over (
36
+ {% if has_source %}partition by __source {% endif %} order by __timestamp asc
37
+ ) as __rectified_next_timestamp_reload,
38
+ from __rectified_next_operation
39
+ group by {% if has_source %} __source, {% endif %} __timestamp
40
+ ),
41
+ __rectified_is_deleted_next as (
42
+ select
43
+ cur.*,
44
+ t.* except ({% if has_source %}t.__source, {% endif %} t.__timestamp),
45
+ -- there is more reload
46
+ __rectified_next_timestamp_reload is not null as __rectify_more_reload,
47
+ -- the next operation is bedore the next reload
48
+ if(
49
+ __rectify_more_reload, t.__rectified_next_timestamp < t.__rectified_next_timestamp_reload, true
50
+ ) as __rectify_key_next_operation_before_next_reload,
51
+ -- the record is deleted
52
+ cur.__operation == 'delete' as __rectified_key_is_deleted,
53
+ -- the record is not found in next reload
54
+ __rectify_more_reload and nxt.__timestamp is null as __rectified_key_not_found_in_next_reload,
55
+ -- there is no more operation
56
+ t.__rectified_next_timestamp is null as __rectified_no_more_operation,
57
+ -- the record is deleted before next reload
58
+ __rectify_key_next_operation_before_next_reload
59
+ and cur.__rectified_next_operation <=> 'delete' as __rectified_key_is_deleted_next,
60
+ -- the record is updated before next reload
61
+ __rectify_key_next_operation_before_next_reload
62
+ and cur.__rectified_next_operation <=> 'upsert' as __rectified_key_is_updated_next,
63
+ case
64
+ when __rectified_key_is_deleted
65
+ then false
66
+ when __rectified_key_is_updated_next
67
+ then false
68
+ when __rectified_key_is_deleted_next
69
+ then false
70
+ when __rectified_no_more_operation
71
+ then false
72
+ when __rectified_key_not_found_in_next_reload
73
+ then true
74
+ else false
75
+ end as __rectified_is_deleted_next
76
+ from __rectified_next_operation cur
77
+ left join
78
+ __rectified_timestamps t on cur.__timestamp = t.__timestamp
79
+ {% if has_source %} and cur.__source == t.__source {% endif %}
80
+ left join
81
+ __rectified_next_operation nxt
82
+ on t.__rectified_next_timestamp_reload = nxt.__timestamp
83
+ and cur.__key == nxt.__key
84
+ {% if has_source %} and cur.__source == nxt.__source {% endif %}
85
+ ),
86
+ {% if mode == "complete" %} __rectified as ( {% else %} __rectified_operation as (
87
+ {% endif %}
88
+ select
89
+ c.* except (c.__operation, c.__timestamp),
90
+ if(c.__operation == 'delete', 'delete', d2.__rectified_operation) as __operation,
91
+ case
92
+ when c.__operation == 'delete'
93
+ then c.__timestamp
94
+ when d2.__rectified_operation == 'upsert'
95
+ then c.__timestamp
96
+ else c.__rectified_next_timestamp_reload
97
+ end as __timestamp
98
+ from __rectified_is_deleted_next c
99
+ cross join
100
+ (
101
+ select false as __is_deleted, 'upsert' as __rectified_operation
102
+ union all
103
+ select true as __is_deleted, 'delete' as __rectified_operation
104
+ union all
105
+ select true as __is_deleted, 'upsert' as __rectified_operation
106
+ ) d2
107
+ on c.__rectified_is_deleted_next = d2.__is_deleted
108
+ ),
109
+ {% if mode == "update" %}
110
+ __rectified as (
111
+ select * from __rectified_operation where not (__original_operation == 'current' and __operation == 'upsert')
112
+ ),
113
+ {% endif %}
@@ -0,0 +1 @@
1
+ __sliced as (select * from {{ parent_slice }} s where true and ({{ slices }})),
@@ -0,0 +1,4 @@
1
+ {% include 'ctes/base.sql.jinja' %}
2
+ {% if slice == "update" %} {% include 'filters/update.sql.jinja' %} {% endif %}
3
+ {% if slice == "latest" %} {% include 'filters/latest.sql.jinja' %} {% endif %}
4
+ {% include 'filters/final.sql.jinja' %}
@@ -0,0 +1,4 @@
1
+ select
2
+ array_join(collect_list(`slices`), ' or ') as `slices`,
3
+ {% if has_source %} array_join(collect_list(`sources`), ' or ') as `sources` {% endif %}
4
+ from __final
@@ -0,0 +1,17 @@
1
+ __latest as (
2
+ select {% if has_source %} __source, {% endif %} max(__timestamp) as __max_timestamp
3
+ from {{ parent_slice }}
4
+ {% if has_source %} group by __source {% endif %}
5
+ ),
6
+ __final as (
7
+ select
8
+ {% if has_source %} concat('t.__source == \'', `__source`, '\'') as sources, {% endif %}
9
+ concat_ws(
10
+ ' ',
11
+ ' (',
12
+ concat('s.__timestamp == \'', `__max_timestamp`, '\''),
13
+ {% if has_source %} concat('and s.__source == \'', `__source`, '\''), {% endif %}
14
+ ' )'
15
+ ) as `slices`
16
+ from __latest
17
+ )
@@ -0,0 +1,30 @@
1
+ {% if has_source %} __update_source as (select __source from {{ parent_slice }} group by __source), {% endif %}
2
+ __update as (
3
+ select
4
+ {% if has_source %} s.__source, {% endif %}
5
+ {% if cdc == "nocdc" %}
6
+ coalesce(max(t.__timestamp), cast('0001-01-01' as timestamp)) as __max_timestamp
7
+ {% endif %}
8
+ {% if cdc == "scd1" %}
9
+ coalesce(max(t.__timestamp), cast('0001-01-01' as timestamp)) as __max_timestamp
10
+ {% endif %}
11
+ {% if cdc == "scd2" %}
12
+ coalesce(max(t.__valid_from), cast('0001-01-01' as timestamp)) as __max_timestamp
13
+ {% endif %}
14
+ from {{ tgt }} t
15
+ {% if has_source %} right join __update_source s on s.__source == t.__source {% endif %}
16
+ where true
17
+ {% if has_source %} group by s.__source {% endif %}
18
+ ),
19
+ __final as (
20
+ select
21
+ {% if has_source %} concat('t.__source == \'', `__source`, '\'') as sources, {% endif %}
22
+ concat_ws(
23
+ ' ',
24
+ ' (',
25
+ concat('s.__timestamp > \'', `__max_timestamp`, '\''),
26
+ {% if has_source %} concat('and s.__source == \'', `__source`, '\''), {% endif %}
27
+ ' )'
28
+ ) as `slices`
29
+ from __update
30
+ )
@@ -0,0 +1 @@
1
+ {% macro backtick(field) -%} `{{ field }}` {%- endmacro %}
@@ -0,0 +1,18 @@
1
+ {% macro add_hash(fields) -%}
2
+ md5(
3
+ array_join(
4
+ array(
5
+ {% for f in fields %}
6
+ {% if f == "__operation" %}cast(`{{ f }}` <=> 'delete' as string) -- reloads and upserts should have the same hash, not deletes,
7
+ {% else %}`{{ f }}`::string,
8
+ {% endif %}
9
+ {% endfor %}
10
+ ),
11
+ '*',
12
+ '-1'
13
+ )
14
+ )
15
+ {%- endmacro %}
16
+ {% macro add_key(fields) -%}
17
+ md5(array_join(array({% for f in fields %}`{{ f }}`::string, {% endfor %}), '*', '-1'))
18
+ {%- endmacro %}
@@ -0,0 +1,3 @@
1
+ {% if cdc == "scd1" %} {% include 'merges/scd1.sql.jinja' %} {% endif %}
2
+ {% if cdc == "scd2" %} {% include 'merges/scd2.sql.jinja' %} {% endif %}
3
+ {% if cdc == "nocdc" %} {% include 'merges/nocdc.sql.jinja' %} {% endif %}
@@ -0,0 +1,41 @@
1
+ {% if format == "dataframe" %}
2
+ merge into {{ tgt }} t using {{ "{src}" }} s
3
+ {% endif %}
4
+ {% if format == "view" %}
5
+ merge into {{ tgt }} t using {{ src }} s
6
+ {% endif %}
7
+ {% if has_key %}
8
+ on t.__key == s.__merge_key
9
+ {% else %}
10
+ on
11
+ {% for k in keys %}
12
+ t.{{ k }} <=> s.{{ k }}
13
+ {% endfor %}
14
+ {% endif %}
15
+ {% if has_source %}
16
+ and t.__source == s.__source
17
+ {% endif %}
18
+ {% if update_where %} {{ update_where }} {% endif %}
19
+ when matched
20
+ and __merge_condition == 'upsert' then
21
+ update
22
+ set
23
+ {% for c in columns %}
24
+ {{ c }} = s.{{c}},
25
+ {% endfor %}
26
+ -- delete
27
+ when matched
28
+ and __merge_condition == 'delete' then
29
+ delete
30
+ when not matched
31
+ and __merge_condition == 'upsert' then
32
+ insert (
33
+ {% for c in columns %}
34
+ {{ c }},
35
+ {% endfor %}
36
+ )
37
+ values (
38
+ {% for c in columns %}
39
+ s.{{ c }},
40
+ {% endfor %}
41
+ )
@@ -0,0 +1,73 @@
1
+ {% if format == "dataframe" %}
2
+ merge into {{ tgt }} t using {{ "{src}" }} s
3
+ {% endif %}
4
+ {% if format == "view" %}
5
+ merge into {{ tgt }} t using {{ src }} s
6
+ {% endif %}
7
+ {% if has_key %}
8
+ on t.__key == s.__merge_key
9
+ {% else %}
10
+ on
11
+ {% for k in keys %}
12
+ t.{{ k }} <=> s.{{ k }}
13
+ {% endfor %}
14
+ {% endif %}
15
+ {% if has_source %}
16
+ and t.__source == s.__source
17
+ {% endif %}
18
+ {% if update_where %} {{ update_where }} {% endif %}
19
+ when matched
20
+ and __merge_condition == 'upsert' then
21
+ update
22
+ set
23
+ {% for f in fields %}
24
+ {{ f }} = s.{{f}},
25
+ {% endfor %}
26
+ {% if has_timestamp %}
27
+ __timestamp = s.__timestamp,
28
+ {% endif %}
29
+ {% if has_metadata %}
30
+ __metadata.updated = cast(current_timestamp() as timestamp),
31
+ {% endif %}
32
+ {% if has_hash %}
33
+ __hash = s.__hash,
34
+ {% endif %}
35
+ {% if has_rescued_data %}
36
+ __rescued_data = s.__rescued_data,
37
+ {% endif %}
38
+ {% if soft_delete %}
39
+ __is_current = s.__is_current,
40
+ __is_deleted = s.__is_deleted,
41
+ {% endif %}
42
+ {% if soft_delete %}
43
+ -- soft delete
44
+ when matched
45
+ and __merge_condition == 'delete' then
46
+ update
47
+ set
48
+ __is_current = False,
49
+ __is_deleted = True,
50
+ {% if has_timestamp %}
51
+ __timestamp = s.__timestamp,
52
+ {% endif %}
53
+ {% if has_metadata %}
54
+ __metadata.updated = cast(current_timestamp() as timestamp),
55
+ {% endif %}
56
+ {% else %}
57
+ -- delete
58
+ when matched
59
+ and __merge_condition == 'delete' then
60
+ delete
61
+ {% endif %}
62
+ when not matched
63
+ and __merge_condition == 'upsert' then
64
+ insert (
65
+ {% for c in columns %}
66
+ {{ c }},
67
+ {% endfor %}
68
+ )
69
+ values (
70
+ {% for c in columns %}
71
+ s.{{ c }},
72
+ {% endfor %}
73
+ )
@@ -0,0 +1,54 @@
1
+ {% if format == "dataframe" %}
2
+ merge into {{ tgt }} t using {{ "{src}" }} s
3
+ {% endif %}
4
+ {% if format == "view" %}
5
+ merge into {{ tgt }} t using {{ src }} s
6
+ {% endif %}
7
+ {% if has_key %}
8
+ on t.__key == s.__merge_key
9
+ {% else %}
10
+ on
11
+ {% for k in keys %}
12
+ t.{{ k }} <=> s.{{ k }} and
13
+ {% endfor %}
14
+ {% endif %}
15
+ and t.__is_current
16
+ {% if has_source %}
17
+ and t.__source == s.__source
18
+ {% endif %}
19
+ when matched
20
+ and __merge_condition == 'update' then
21
+ update
22
+ set
23
+ __valid_to = s.__valid_from - interval 1 seconds,
24
+ __is_current = False,
25
+ {% if soft_delete %}
26
+ __is_deleted = False,
27
+ {% endif %}
28
+ {% if has_metadata %}
29
+ __metadata.updated = cast(current_timestamp() as timestamp),
30
+ {% endif %}
31
+ when matched
32
+ and __merge_condition == 'delete' then
33
+ update
34
+ set
35
+ __valid_to = s.__valid_from - interval 1 seconds,
36
+ __is_current = False,
37
+ {% if soft_delete %}
38
+ __is_deleted = True,
39
+ {% endif %}
40
+ {% if has_metadata %}
41
+ __metadata.updated = cast(current_timestamp() as timestamp),
42
+ {% endif %}
43
+ when not matched
44
+ and __merge_condition == 'insert' then
45
+ insert (
46
+ {% for c in columns %}
47
+ {{ c }},
48
+ {% endfor %}
49
+ )
50
+ values (
51
+ {% for c in columns %}
52
+ s.{{ c }},
53
+ {% endfor %}
54
+ )
File without changes
@@ -0,0 +1,186 @@
1
+ /*
2
+
3
+ ⚙️ BASE
4
+ {%- if cdc %}
5
+ ☐ cdc: {{ cdc }}
6
+ {%- endif %}
7
+ {%- if mode %}
8
+ ☐ mode: {{ mode }}
9
+ {%- endif %}
10
+
11
+ 🎯 SOURCE & TARTGET
12
+ {%- if format %}
13
+ ☐ format: {{ format }}
14
+ {%- endif %}
15
+ {%- if src %}
16
+ ☐ src: {{ src | truncate(100, killwords=True) }}
17
+ {%- endif %}
18
+ {%- if tgt %}
19
+ ☐ tgt: {{ tgt }}
20
+ {%- endif %}
21
+
22
+ 📊 CTE's
23
+ {%- if slice %}
24
+ 🗹 slice?
25
+ {%- endif %}
26
+ {%- if deduplicate %}
27
+ 🗹 deduplicate?
28
+ {%- endif %}
29
+ {%- if advanced_deduplication %}
30
+ 🗹 advanced deduplication?
31
+ {%- endif %}
32
+ {%- if deduplicate_key %}
33
+ 🗹 deduplicate key?
34
+ {%- endif %}
35
+ {%- if deduplicate_hash %}
36
+ 🗹 deduplicate hash?
37
+ {%- endif %}
38
+ {%- if order_duplicate_by %}
39
+ 🗹 order duplicate by?
40
+ {%- endif %}
41
+ {%- if rectify %}
42
+ 🗹 rectify?
43
+ {%- endif %}
44
+ {%- if correct_valid_from %}
45
+ 🗹 correct valid from?
46
+ {%- endif %}
47
+
48
+ 🔪 FILTERING
49
+ {%- if filter_where %}
50
+ ☐ filter where: {{ filter_where }}
51
+ {%- endif %}
52
+ {%- if update_where %}
53
+ ☐ update where: {{ update_where }}
54
+ {%- endif %}
55
+ {%- if slices %}
56
+ ☐ slices: {{ slices }}
57
+ {%- endif %}
58
+ {%- if sources %}
59
+ ☐ sources: {{ sources }}
60
+ {%- endif %}
61
+
62
+ 🗑️ DELETES
63
+ {%- if delete_missing %}
64
+ 🗹 delete missing?
65
+ {%- endif %}
66
+ {%- if soft_delete %}
67
+ 🗹 soft delete?
68
+ {%- endif %}
69
+
70
+ ✅ DATA VALIDATION
71
+ {%- if has_no_data %}
72
+ ☒ has_data?
73
+ {%- else %}
74
+ 🗹 has data?
75
+ {%- endif %}
76
+ {%- if has_rows %}
77
+ 🗹 has rows?
78
+ {%- else %}
79
+ ☒ has rows?
80
+ {%- endif %}
81
+ {%- if has_source %}
82
+ 🗹 has source?
83
+ {%- endif %}
84
+
85
+ 🏷️ HAS FIELDS
86
+ {%- if has_metadata %}
87
+ 🗹 has metadata?
88
+ {%- endif %}
89
+ {%- if has_timestamp %}
90
+ 🗹 has timestamp?
91
+ {%- endif %}
92
+ {%- if has_identity %}
93
+ 🗹 has identity?
94
+ {%- endif %}
95
+ {%- if has_key %}
96
+ 🗹 has key?
97
+ {%- endif %}
98
+ {%- if has_hash %}
99
+ 🗹 has hash?
100
+ {%- endif %}
101
+ {%- if has_operation %}
102
+ 🗹 has operation?
103
+ {%- endif %}
104
+ {%- if has_order_by %}
105
+ 🗹 has order by?
106
+ {%- endif %}
107
+ {%- if has_rescued_data %}
108
+ 🗹 has rescued data?
109
+ {%- endif %}
110
+
111
+ ➕ ADD COLUMNS
112
+ {%- if add_metadata %}
113
+ 🗹 add metadata?
114
+ {%- endif %}
115
+ {%- if add_timestamp %}
116
+ 🗹 add timestamp?
117
+ {%- endif %}
118
+ {%- if add_key %}
119
+ 🗹 add key?
120
+ {%- endif %}
121
+ {%- if add_hash %}
122
+ 🗹 add hash?
123
+ {%- endif %}
124
+ {%- if add_operation %}
125
+ ☐ add_operation: {{ add_operation }}
126
+ {%- endif %}
127
+ {%- if add_source %}
128
+ ☐ add_source: {{ add_source }}
129
+ {%- endif %}
130
+ {%- if add_calculated_columns %}
131
+ ☐ add_calculated_columns: {{ add_calculated_columns }}
132
+ {%- endif %}
133
+
134
+ 🔄 EXTRA COLUMN OPERATIONs
135
+ {%- if all_except %}
136
+ ☐ all_except: {{ all_except | join(", ") | truncate(100, killwords=True) }}
137
+ {%- endif %}
138
+ {%- if all_overwrite %}
139
+ ☐ all_overwrite: {{ all_overwrite | join(", ") | truncate(100, killwords=True) }}
140
+ {%- endif %}
141
+ {%- if overwrite %}
142
+ ☐ overwrite: {{ overwrite | join(", ") | truncate(100, killwords=True) }}
143
+ {%- endif %}
144
+
145
+ 👨‍👩‍👧 PARENTS
146
+ {%- if parent_slice %}
147
+ ☐ parent_slice: {{ parent_slice }}
148
+ {%- endif %}
149
+ {%- if parent_rectify %}
150
+ ☐ parent_rectify: {{ parent_rectify }}
151
+ {%- endif %}
152
+ {%- if parent_deduplicate_key %}
153
+ ☐ parent_deduplicate_key: {{ parent_deduplicate_key }}
154
+ {%- endif %}
155
+ {%- if parent_deduplicate_hash %}
156
+ ☐ parent_deduplicate_hash: {{ parent_deduplicate_hash }}
157
+ {%- endif %}
158
+ {%- if parent_cdc %}
159
+ ☐ parent_cdc: {{ parent_cdc }}
160
+ {%- endif %}
161
+ {%- if parent_final %}
162
+ ☐ parent_final: {{ parent_final }}
163
+ {%- endif %}
164
+
165
+ 📦 LAYOUT
166
+ {%- if columns %}
167
+ ☐ columns: {{ columns | join(", ") | truncate(100, killwords=True) }}
168
+ {%- endif %}
169
+ {%- if inputs %}
170
+ ☐ inputs: {{ inputs | join(", ") | truncate(100, killwords=True) }}
171
+ {%- endif %}
172
+ {%- if intermediates %}
173
+ ☐ intermediates: {{ intermediates | join(", ") | truncate(100, killwords=True) }}
174
+ {%- endif %}
175
+ {%- if outputs %}
176
+ ☐ outputs: {{ outputs | join(", ") | truncate(100, killwords=True) }}
177
+ {%- endif %}
178
+ {%- if keys %}
179
+ ☐ keys: {{ keys | join(", ") | truncate(100, killwords=True) }}
180
+ {%- endif %}
181
+ {%- if hashes %}
182
+ ☐ hashes: {{ hashes | join(", ") | truncate(100, killwords=True) }}
183
+ {%- endif %}
184
+
185
+ */
186
+
@@ -0,0 +1 @@
1
+ select * {% if all_except %} except ({% for e in all_except %}{{ e }}, {% endfor %}), {% endif %} from __final
@@ -0,0 +1,10 @@
1
+ __final as (
2
+ select {% for output in outputs %} `{{ output }}`, {% endfor %},
3
+ from {{ parent_cdc }}
4
+ {% if filter %}
5
+ where
6
+ true
7
+ -- operation current added by filter
8
+ and __operation <> 'current'
9
+ {% endif %}
10
+ )
@@ -0,0 +1,34 @@
1
+ {% if has_rows %}
2
+ __merge_condition as (
3
+ select
4
+ s.__key as __merge_key,
5
+ 'upsert' as __merge_condition,
6
+ {% for output in outputs %} s.`{{ output }}`, {% endfor %}
7
+ from {{ parent_cdc }} s
8
+ left anti join
9
+ __current c on s.__key == c.__key and s.__hash == c.__hash
10
+ {% if has_source %} and s.__source = c.__source {% endif %}
11
+ {% if delete_missing %}
12
+ union all
13
+ select
14
+ c.__key as __merge_key,
15
+ 'delete' as __merge_condition,
16
+ {% for output in outputs %} c.`{{ output }}`, {% endfor %}
17
+ from __current c
18
+ left anti join
19
+ {{ parent_cdc }} s on s.__key == c.__key {% if has_source %} and s.__source = c.__source {% endif %}
20
+ {% endif %}
21
+ ),
22
+ {% else %}
23
+ __merge_condition as (select s.__key as __merge_key, 'upsert' as __merge_condition, s.* from {{ parent_cdc }} s),
24
+ {% endif %}
25
+ __final as (
26
+ select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
27
+ from __merge_condition
28
+ {% if filter %}
29
+ where
30
+ true
31
+ -- operation current added by filter
32
+ and __operation <> 'current'
33
+ {% endif %}
34
+ )