fabricks 3.0.5.2__py3-none-any.whl → 3.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. fabricks/api/__init__.py +2 -0
  2. fabricks/api/context.py +1 -2
  3. fabricks/api/deploy.py +3 -0
  4. fabricks/api/job_schema.py +2 -2
  5. fabricks/api/masks.py +3 -0
  6. fabricks/api/notebooks/initialize.py +2 -2
  7. fabricks/api/notebooks/process.py +2 -2
  8. fabricks/api/notebooks/run.py +2 -2
  9. fabricks/api/notebooks/schedule.py +75 -0
  10. fabricks/api/notebooks/terminate.py +2 -2
  11. fabricks/api/schedules.py +2 -16
  12. fabricks/cdc/__init__.py +2 -2
  13. fabricks/cdc/base/__init__.py +2 -2
  14. fabricks/cdc/base/_types.py +9 -2
  15. fabricks/cdc/base/configurator.py +86 -41
  16. fabricks/cdc/base/generator.py +44 -35
  17. fabricks/cdc/base/merger.py +16 -14
  18. fabricks/cdc/base/processor.py +232 -144
  19. fabricks/cdc/nocdc.py +8 -7
  20. fabricks/cdc/templates/{query → ctes}/base.sql.jinja +7 -6
  21. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  22. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  23. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  24. fabricks/cdc/templates/{query → ctes}/rectify.sql.jinja +4 -22
  25. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  26. fabricks/cdc/templates/filter.sql.jinja +4 -4
  27. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  28. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  29. fabricks/cdc/templates/merge.sql.jinja +3 -2
  30. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  31. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  32. fabricks/cdc/templates/{query/nocdc.sql.jinja → queries/nocdc/complete.sql.jinja} +1 -1
  33. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +35 -0
  34. fabricks/cdc/templates/{query → queries}/scd1.sql.jinja +2 -28
  35. fabricks/cdc/templates/{query → queries}/scd2.sql.jinja +29 -48
  36. fabricks/cdc/templates/query.sql.jinja +15 -11
  37. fabricks/context/__init__.py +18 -4
  38. fabricks/context/_types.py +2 -0
  39. fabricks/context/config/__init__.py +92 -0
  40. fabricks/context/config/utils.py +53 -0
  41. fabricks/context/log.py +8 -2
  42. fabricks/context/runtime.py +87 -263
  43. fabricks/context/secret.py +1 -1
  44. fabricks/context/spark_session.py +1 -1
  45. fabricks/context/utils.py +76 -0
  46. fabricks/core/dags/generator.py +6 -7
  47. fabricks/core/dags/log.py +2 -15
  48. fabricks/core/dags/processor.py +11 -11
  49. fabricks/core/dags/utils.py +15 -1
  50. fabricks/core/{scripts/job_schema.py → job_schema.py} +4 -0
  51. fabricks/core/jobs/base/_types.py +64 -22
  52. fabricks/core/jobs/base/checker.py +13 -12
  53. fabricks/core/jobs/base/configurator.py +41 -67
  54. fabricks/core/jobs/base/generator.py +55 -24
  55. fabricks/core/jobs/base/invoker.py +54 -30
  56. fabricks/core/jobs/base/processor.py +43 -26
  57. fabricks/core/jobs/bronze.py +45 -38
  58. fabricks/core/jobs/get_jobs.py +2 -2
  59. fabricks/core/jobs/get_schedule.py +10 -0
  60. fabricks/core/jobs/get_schedules.py +32 -0
  61. fabricks/core/jobs/gold.py +61 -48
  62. fabricks/core/jobs/silver.py +39 -40
  63. fabricks/core/masks.py +52 -0
  64. fabricks/core/parsers/base.py +2 -2
  65. fabricks/core/schedules/__init__.py +14 -0
  66. fabricks/core/schedules/diagrams.py +46 -0
  67. fabricks/core/schedules/get_schedule.py +5 -0
  68. fabricks/core/schedules/get_schedules.py +9 -0
  69. fabricks/core/schedules/run.py +3 -0
  70. fabricks/core/schedules/views.py +61 -0
  71. fabricks/core/steps/base.py +110 -72
  72. fabricks/core/udfs.py +12 -23
  73. fabricks/core/views.py +20 -13
  74. fabricks/deploy/__init__.py +97 -0
  75. fabricks/deploy/masks.py +8 -0
  76. fabricks/deploy/notebooks.py +71 -0
  77. fabricks/deploy/schedules.py +8 -0
  78. fabricks/{core/deploy → deploy}/tables.py +16 -13
  79. fabricks/{core/deploy → deploy}/udfs.py +3 -1
  80. fabricks/deploy/utils.py +36 -0
  81. fabricks/{core/deploy → deploy}/views.py +5 -9
  82. fabricks/metastore/database.py +3 -3
  83. fabricks/metastore/dbobject.py +4 -4
  84. fabricks/metastore/table.py +157 -88
  85. fabricks/metastore/view.py +13 -6
  86. fabricks/utils/_types.py +6 -0
  87. fabricks/utils/azure_table.py +4 -3
  88. fabricks/utils/helpers.py +141 -11
  89. fabricks/utils/log.py +29 -18
  90. fabricks/utils/read/_types.py +1 -1
  91. fabricks/utils/schema/get_schema_for_type.py +6 -0
  92. fabricks/utils/write/delta.py +3 -3
  93. {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/METADATA +2 -1
  94. fabricks-3.0.6.dist-info/RECORD +175 -0
  95. fabricks/api/notebooks/add_fabricks.py +0 -13
  96. fabricks/api/notebooks/optimize.py +0 -29
  97. fabricks/api/notebooks/vacuum.py +0 -29
  98. fabricks/cdc/templates/query/context.sql.jinja +0 -101
  99. fabricks/cdc/templates/query/current.sql.jinja +0 -32
  100. fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +0 -21
  101. fabricks/cdc/templates/query/deduplicate_key.sql.jinja +0 -14
  102. fabricks/cdc/templates/query/hash.sql.jinja +0 -1
  103. fabricks/cdc/templates/query/slice.sql.jinja +0 -14
  104. fabricks/config/__init__.py +0 -0
  105. fabricks/config/base.py +0 -8
  106. fabricks/config/fabricks/__init__.py +0 -26
  107. fabricks/config/fabricks/base.py +0 -90
  108. fabricks/config/fabricks/environment.py +0 -9
  109. fabricks/config/fabricks/pyproject.py +0 -47
  110. fabricks/config/jobs/__init__.py +0 -6
  111. fabricks/config/jobs/base.py +0 -101
  112. fabricks/config/jobs/bronze.py +0 -38
  113. fabricks/config/jobs/gold.py +0 -27
  114. fabricks/config/jobs/silver.py +0 -22
  115. fabricks/config/runtime.py +0 -67
  116. fabricks/config/steps/__init__.py +0 -6
  117. fabricks/config/steps/base.py +0 -50
  118. fabricks/config/steps/bronze.py +0 -7
  119. fabricks/config/steps/gold.py +0 -14
  120. fabricks/config/steps/silver.py +0 -15
  121. fabricks/core/deploy/__init__.py +0 -17
  122. fabricks/core/schedules.py +0 -142
  123. fabricks/core/scripts/__init__.py +0 -9
  124. fabricks/core/scripts/armageddon.py +0 -87
  125. fabricks/core/scripts/stats.py +0 -51
  126. fabricks/core/scripts/steps.py +0 -26
  127. fabricks-3.0.5.2.dist-info/RECORD +0 -177
  128. /fabricks/cdc/templates/{filter → filters}/final.sql.jinja +0 -0
  129. /fabricks/cdc/templates/{filter → filters}/latest.sql.jinja +0 -0
  130. /fabricks/cdc/templates/{filter → filters}/update.sql.jinja +0 -0
  131. /fabricks/cdc/templates/{merge → merges}/scd1.sql.jinja +0 -0
  132. /fabricks/cdc/templates/{merge → merges}/scd2.sql.jinja +0 -0
  133. /fabricks/cdc/templates/{query → queries}/__init__.py +0 -0
  134. /fabricks/cdc/templates/{query → queries}/final.sql.jinja +0 -0
  135. /fabricks/core/{utils.py → parsers/utils.py} +0 -0
  136. /fabricks/core/{scripts → schedules}/generate.py +0 -0
  137. /fabricks/core/{scripts → schedules}/process.py +0 -0
  138. /fabricks/core/{scripts → schedules}/terminate.py +0 -0
  139. {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/WHEEL +0 -0
@@ -1,19 +1,20 @@
1
- {% import 'query/hash.sql.jinja' as h -%}
1
+ {% import 'macros/hash.sql.jinja' as h -%}
2
2
 
3
3
  with
4
4
  {% if format == "query" %} __query as ({{ src }}), {% endif %}
5
5
  __base as (
6
6
  select
7
7
  *
8
- {% if all_overwrite %} except ({% for o in all_overwrite %}{{ o }}, {% endfor %}),
9
- {% else %},
10
- {% endif %}
8
+ {% if overwrite %}
9
+ -- will be overwritten below
10
+ except ({% for o in overwrite %}{{ o }}, {% endfor %})
11
+ {% endif %},
11
12
  {% if add_calculated_columns %} {% for c in add_calculated_columns %} {{ c }}, {% endfor %} {% endif %}
12
13
  {% if add_timestamp %} cast(current_date() as timestamp) as __timestamp, {% endif %}
13
14
  {% if add_operation %} cast('{{ add_operation }}' as string) as __operation, {% endif %}
14
15
  {% if add_source %} cast('{{ add_source }}' as string) as __source, {% endif %}
15
- {% if add_hash %} {{ h.hash(fields=hashes) }} as __hash, {% endif %}
16
- {% if add_key %} {{ h.hash(fields=keys) }} as __key, {% endif %}
16
+ {% if add_hash %} {{ h.add_hash(fields=hashes) }} as __hash, {% endif %}
17
+ {% if add_key %} {{ h.add_hash(fields=keys) }} as __key, {% endif %}
17
18
  {% if add_metadata %}
18
19
  struct(
19
20
  {% if cdc == "nocdc" %}current_timestamp() as inserted,
@@ -0,0 +1,28 @@
1
+ {% import 'macros/hash.sql.jinja' as h -%}
2
+
3
+ __current as (
4
+ select
5
+ {% for i in intermediates %}
6
+ {% if i == "__timestamp" %}
7
+ {% if add_timestamp %} cast('0001-01-01' as timestamp) as __timestamp,
8
+ {% elif cdc == "nocdc" %} __timestamp,
9
+ {% elif cdc == "scd1" %} __timestamp,
10
+ {% elif cdc == "scd2" %} __valid_from as __timestamp,
11
+ {% endif %}
12
+ {% elif i == "__operation" %}
13
+ {% if has_no_data %} 'delete' as __operation, {% else %} 'current' as __operation, {% endif %}
14
+ {% elif i == "__hash" %}
15
+ {% if add_hash %} {{ h.add_hash(fields=hashes) }} as __hash, {% else %} __hash, {% endif %}
16
+ {% elif i == "__key" %}
17
+ {% if add_key %} {{ h.add_key(fields=keys) }} as __key, {% else %} __key, {% endif %}
18
+ {% else %} `{{ i }}`,
19
+ {% endif %}
20
+ {% endfor %}
21
+ from {{ tgt }} t
22
+ where
23
+ true
24
+ {% if cdc == "scd2" %} and __is_current {% endif %}
25
+ {% if cdc == "scd1" %} {% if soft_delete %} and __is_current {% endif %} {% endif %}
26
+ {% if sources %} and ({{ sources }}) {% endif %}
27
+ {% if update_where %} and {{ update_where }} {% endif %}
28
+ ),
@@ -0,0 +1,32 @@
1
+ {% if advanced_deduplication %}
2
+ __deduplicate_hash as (
3
+ select
4
+ *,
5
+ lag(__hash) over (
6
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
7
+ ) as __deduplicate_hash_previous__hash,
8
+ lag(__operation) over (
9
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
10
+ ) as __deduplicate_hash_previous_operation
11
+ from {{ parent_deduplicate_hash }}
12
+ where true
13
+ ),
14
+ __deduplicated_hash as (
15
+ select *
16
+ from __deduplicate_hash
17
+ where
18
+ true
19
+ and not (
20
+ __hash <=> __deduplicate_hash_previous__hash and __operation <=> __deduplicate_hash_previous_operation
21
+ )
22
+ ),
23
+ {% else %}
24
+ __deduplicated_hash as (
25
+ select *
26
+ from {{ parent_deduplicate_hash }}
27
+ where true
28
+ qualify
29
+ not lag(__hash) over (partition by {% if has_source %} __source, {% endif %} __key order by null)
30
+ <=> __hash
31
+ ),
32
+ {% endif %}
@@ -0,0 +1,31 @@
1
+ {% if advanced_deduplication %}
2
+ __deduplicate_key as (
3
+ select
4
+ *,
5
+ row_number() over (
6
+ partition by {% if has_source %} __source, {% endif %} __key, __timestamp
7
+ order by
8
+ /* prioritize delete over upsert */
9
+ __operation asc,
10
+ {% if has_order_by %} {% for o in order_duplicate_by %} {{ o }}, {% endfor %} {% endif %}
11
+ ) as __deduplicate_key_rn
12
+ from {{ parent_deduplicate_key }}
13
+ where true
14
+ ),
15
+ __deduplicated_key as (select *, from __deduplicate_key where __deduplicate_key_rn == 1),
16
+ {% else %}
17
+ __deduplicated_key as (
18
+ select *
19
+ from {{ parent_deduplicate_key }}
20
+ where true
21
+ qualify
22
+ row_number() over (
23
+ partition by {% if has_source %} __source, {% endif %} __key
24
+ order by
25
+ {% if has_order_by %} {% for o in order_duplicate_by %} {{ o }}, {% endfor %}
26
+ {% else %} null
27
+ {% endif %}
28
+ )
29
+ == 1
30
+ ),
31
+ {% endif %}
@@ -1,34 +1,16 @@
1
- {% import 'query/hash.sql.jinja' as h -%}
2
-
3
1
  {% if mode == "update" %}
4
2
  __rectified_base as (
5
3
  select
6
- {% for field in fields %} {{ field }}, {% endfor %}
7
- __operation,
8
- __operation as __original_operation,
9
- __timestamp,
4
+ {% for i in intermediates %} `{{ i }}`, {% endfor %}
10
5
  __timestamp as __original_timestamp,
11
- __hash,
12
- __key,
13
- {% if has_identity %} __identity, {% endif %}
14
- {% if has_source %} __source, {% endif %}
15
- {% if has_metadata %} __metadata, {% endif %}
16
- {% if has_rescued_data %} __rescued_data, {% endif %}
6
+ __operation as __original_operation,
17
7
  from {{ parent_rectify }}
18
8
  {% if has_rows %}
19
9
  union all
20
10
  select
21
- {% for field in fields %} {{ field }}, {% endfor %}
22
- __operation,
23
- __operation as __original_operation,
24
- __timestamp,
11
+ {% for i in intermediates %} `{{ i }}`, {% endfor %}
25
12
  __timestamp as __original_timestamp,
26
- __hash,
27
- __key,
28
- {% if has_identity %} __identity, {% endif %}
29
- {% if has_source %} __source, {% endif %}
30
- {% if has_metadata %} __metadata, {% endif %}
31
- {% if has_rescued_data %} __rescued_data, {% endif %}
13
+ __operation as __original_operation,
32
14
  from __current
33
15
  {% endif %}
34
16
  ),
@@ -0,0 +1 @@
1
+ __sliced as (select * from {{ parent_slice }} s where true and ({{ slices }})),
@@ -1,4 +1,4 @@
1
- {% include 'query/base.sql.jinja' %}
2
- {% if slice == "update" %} {% include 'filter/update.sql.jinja' %} {% endif %}
3
- {% if slice == "latest" %} {% include 'filter/latest.sql.jinja' %} {% endif %}
4
- {% include 'filter/final.sql.jinja' %}
1
+ {% include 'ctes/base.sql.jinja' %}
2
+ {% if slice == "update" %} {% include 'filters/update.sql.jinja' %} {% endif %}
3
+ {% if slice == "latest" %} {% include 'filters/latest.sql.jinja' %} {% endif %}
4
+ {% include 'filters/final.sql.jinja' %}
@@ -0,0 +1 @@
1
+ {% macro backtick(field) -%} `{{ field }}` {%- endmacro %}
@@ -0,0 +1,18 @@
1
+ {% macro add_hash(fields) -%}
2
+ md5(
3
+ array_join(
4
+ array(
5
+ {% for f in fields %}
6
+ {% if f == "__operation" %}cast(`{{ f }}` <=> 'delete' as string) -- reloads and upserts should have the same hash, not deletes,
7
+ {% else %}`{{ f }}`::string,
8
+ {% endif %}
9
+ {% endfor %}
10
+ ),
11
+ '*',
12
+ '-1'
13
+ )
14
+ )
15
+ {%- endmacro %}
16
+ {% macro add_key(fields) -%}
17
+ md5(array_join(array({% for f in fields %}`{{ f }}`::string, {% endfor %}), '*', '-1'))
18
+ {%- endmacro %}
@@ -1,2 +1,3 @@
1
- {% if cdc == "scd1" %} {% include 'merge/scd1.sql.jinja' %} {% endif %}
2
- {% if cdc == "scd2" %} {% include 'merge/scd2.sql.jinja' %} {% endif %}
1
+ {% if cdc == "scd1" %} {% include 'merges/scd1.sql.jinja' %} {% endif %}
2
+ {% if cdc == "scd2" %} {% include 'merges/scd2.sql.jinja' %} {% endif %}
3
+ {% if cdc == "nocdc" %} {% include 'merges/nocdc.sql.jinja' %} {% endif %}
@@ -0,0 +1,41 @@
1
+ {% if format == "dataframe" %}
2
+ merge into {{ tgt }} t using {{ "{src}" }} s
3
+ {% endif %}
4
+ {% if format == "view" %}
5
+ merge into {{ tgt }} t using {{ src }} s
6
+ {% endif %}
7
+ {% if has_key %}
8
+ on t.__key == s.__merge_key
9
+ {% else %}
10
+ on
11
+ {% for k in keys %}
12
+ t.{{ k }} <=> s.{{ k }}
13
+ {% endfor %}
14
+ {% endif %}
15
+ {% if has_source %}
16
+ and t.__source == s.__source
17
+ {% endif %}
18
+ {% if update_where %} {{ update_where }} {% endif %}
19
+ when matched
20
+ and __merge_condition == 'upsert' then
21
+ update
22
+ set
23
+ {% for c in columns %}
24
+ {{ c }} = s.{{c}},
25
+ {% endfor %}
26
+ -- delete
27
+ when matched
28
+ and __merge_condition == 'delete' then
29
+ delete
30
+ when not matched
31
+ and __merge_condition == 'upsert' then
32
+ insert (
33
+ {% for c in columns %}
34
+ {{ c }},
35
+ {% endfor %}
36
+ )
37
+ values (
38
+ {% for c in columns %}
39
+ s.{{ c }},
40
+ {% endfor %}
41
+ )
@@ -0,0 +1,186 @@
1
+ /*
2
+
3
+ ⚙️ BASE
4
+ {%- if cdc %}
5
+ ☐ cdc: {{ cdc }}
6
+ {%- endif %}
7
+ {%- if mode %}
8
+ ☐ mode: {{ mode }}
9
+ {%- endif %}
10
+
11
+ 🎯 SOURCE & TARTGET
12
+ {%- if format %}
13
+ ☐ format: {{ format }}
14
+ {%- endif %}
15
+ {%- if src %}
16
+ ☐ src: {{ src | truncate(100, killwords=True) }}
17
+ {%- endif %}
18
+ {%- if tgt %}
19
+ ☐ tgt: {{ tgt }}
20
+ {%- endif %}
21
+
22
+ 📊 CTE's
23
+ {%- if slice %}
24
+ 🗹 slice?
25
+ {%- endif %}
26
+ {%- if deduplicate %}
27
+ 🗹 deduplicate?
28
+ {%- endif %}
29
+ {%- if advanced_deduplication %}
30
+ 🗹 advanced deduplication?
31
+ {%- endif %}
32
+ {%- if deduplicate_key %}
33
+ 🗹 deduplicate key?
34
+ {%- endif %}
35
+ {%- if deduplicate_hash %}
36
+ 🗹 deduplicate hash?
37
+ {%- endif %}
38
+ {%- if order_duplicate_by %}
39
+ 🗹 order duplicate by?
40
+ {%- endif %}
41
+ {%- if rectify %}
42
+ 🗹 rectify?
43
+ {%- endif %}
44
+ {%- if correct_valid_from %}
45
+ 🗹 correct valid from?
46
+ {%- endif %}
47
+
48
+ 🔪 FILTERING
49
+ {%- if filter_where %}
50
+ ☐ filter where: {{ filter_where }}
51
+ {%- endif %}
52
+ {%- if update_where %}
53
+ ☐ update where: {{ update_where }}
54
+ {%- endif %}
55
+ {%- if slices %}
56
+ ☐ slices: {{ slices }}
57
+ {%- endif %}
58
+ {%- if sources %}
59
+ ☐ sources: {{ sources }}
60
+ {%- endif %}
61
+
62
+ 🗑️ DELETES
63
+ {%- if delete_missing %}
64
+ 🗹 delete missing?
65
+ {%- endif %}
66
+ {%- if soft_delete %}
67
+ 🗹 soft delete?
68
+ {%- endif %}
69
+
70
+ ✅ DATA VALIDATION
71
+ {%- if has_no_data %}
72
+ ☒ has_data?
73
+ {%- else %}
74
+ 🗹 has data?
75
+ {%- endif %}
76
+ {%- if has_rows %}
77
+ 🗹 has rows?
78
+ {%- else %}
79
+ ☒ has rows?
80
+ {%- endif %}
81
+ {%- if has_source %}
82
+ 🗹 has source?
83
+ {%- endif %}
84
+
85
+ 🏷️ HAS FIELDS
86
+ {%- if has_metadata %}
87
+ 🗹 has metadata?
88
+ {%- endif %}
89
+ {%- if has_timestamp %}
90
+ 🗹 has timestamp?
91
+ {%- endif %}
92
+ {%- if has_identity %}
93
+ 🗹 has identity?
94
+ {%- endif %}
95
+ {%- if has_key %}
96
+ 🗹 has key?
97
+ {%- endif %}
98
+ {%- if has_hash %}
99
+ 🗹 has hash?
100
+ {%- endif %}
101
+ {%- if has_operation %}
102
+ 🗹 has operation?
103
+ {%- endif %}
104
+ {%- if has_order_by %}
105
+ 🗹 has order by?
106
+ {%- endif %}
107
+ {%- if has_rescued_data %}
108
+ 🗹 has rescued data?
109
+ {%- endif %}
110
+
111
+ ➕ ADD COLUMNS
112
+ {%- if add_metadata %}
113
+ 🗹 add metadata?
114
+ {%- endif %}
115
+ {%- if add_timestamp %}
116
+ 🗹 add timestamp?
117
+ {%- endif %}
118
+ {%- if add_key %}
119
+ 🗹 add key?
120
+ {%- endif %}
121
+ {%- if add_hash %}
122
+ 🗹 add hash?
123
+ {%- endif %}
124
+ {%- if add_operation %}
125
+ ☐ add_operation: {{ add_operation }}
126
+ {%- endif %}
127
+ {%- if add_source %}
128
+ ☐ add_source: {{ add_source }}
129
+ {%- endif %}
130
+ {%- if add_calculated_columns %}
131
+ ☐ add_calculated_columns: {{ add_calculated_columns }}
132
+ {%- endif %}
133
+
134
+ 🔄 EXTRA COLUMN OPERATIONs
135
+ {%- if all_except %}
136
+ ☐ all_except: {{ all_except | join(", ") | truncate(100, killwords=True) }}
137
+ {%- endif %}
138
+ {%- if all_overwrite %}
139
+ ☐ all_overwrite: {{ all_overwrite | join(", ") | truncate(100, killwords=True) }}
140
+ {%- endif %}
141
+ {%- if overwrite %}
142
+ ☐ overwrite: {{ overwrite | join(", ") | truncate(100, killwords=True) }}
143
+ {%- endif %}
144
+
145
+ 👨‍👩‍👧 PARENTS
146
+ {%- if parent_slice %}
147
+ ☐ parent_slice: {{ parent_slice }}
148
+ {%- endif %}
149
+ {%- if parent_rectify %}
150
+ ☐ parent_rectify: {{ parent_rectify }}
151
+ {%- endif %}
152
+ {%- if parent_deduplicate_key %}
153
+ ☐ parent_deduplicate_key: {{ parent_deduplicate_key }}
154
+ {%- endif %}
155
+ {%- if parent_deduplicate_hash %}
156
+ ☐ parent_deduplicate_hash: {{ parent_deduplicate_hash }}
157
+ {%- endif %}
158
+ {%- if parent_cdc %}
159
+ ☐ parent_cdc: {{ parent_cdc }}
160
+ {%- endif %}
161
+ {%- if parent_final %}
162
+ ☐ parent_final: {{ parent_final }}
163
+ {%- endif %}
164
+
165
+ 📦 LAYOUT
166
+ {%- if columns %}
167
+ ☐ columns: {{ columns | join(", ") | truncate(100, killwords=True) }}
168
+ {%- endif %}
169
+ {%- if inputs %}
170
+ ☐ inputs: {{ inputs | join(", ") | truncate(100, killwords=True) }}
171
+ {%- endif %}
172
+ {%- if intermediates %}
173
+ ☐ intermediates: {{ intermediates | join(", ") | truncate(100, killwords=True) }}
174
+ {%- endif %}
175
+ {%- if outputs %}
176
+ ☐ outputs: {{ outputs | join(", ") | truncate(100, killwords=True) }}
177
+ {%- endif %}
178
+ {%- if keys %}
179
+ ☐ keys: {{ keys | join(", ") | truncate(100, killwords=True) }}
180
+ {%- endif %}
181
+ {%- if hashes %}
182
+ ☐ hashes: {{ hashes | join(", ") | truncate(100, killwords=True) }}
183
+ {%- endif %}
184
+
185
+ */
186
+
@@ -1,5 +1,5 @@
1
1
  __final as (
2
- select *
2
+ select {% for output in outputs %} `{{ output }}`, {% endfor %},
3
3
  from {{ parent_cdc }}
4
4
  {% if filter %}
5
5
  where
@@ -0,0 +1,35 @@
1
+ {% if has_rows %}
2
+ __merge_condition as (
3
+ select
4
+ s.__key as __merge_key,
5
+ 'upsert' as __merge_condition,
6
+ {% for output in outputs %} s.`{{ output }}`, {% endfor %}
7
+ from {{ parent_cdc }} s
8
+ left anti join
9
+ __current c on s.__key == c.__key and s.__hash == c.__hash
10
+ {% if has_source %} and s.__source = c.__source {% endif %}
11
+ {% if delete_missing %}
12
+ union all
13
+ select
14
+ c.__key as __merge_key,
15
+ 'delete' as __merge_condition,
16
+ {% for output in outputs %} c.`{{ output }}`, {% endfor %}
17
+ from __current c
18
+ left anti join
19
+ {{ parent_cdc }} s on s.__key == c.__key and s.__hash == c.__hash
20
+ {% if has_source %} and s.__source = c.__source {% endif %}
21
+ {% endif %}
22
+ ),
23
+ {% else %}
24
+ __merge_condition as (select s.__key as __merge_key, 'upsert' as __merge_condition, s.* from {{ parent_cdc }} s),
25
+ {% endif %}
26
+ __final as (
27
+ select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
28
+ from __merge_condition
29
+ {% if filter %}
30
+ where
31
+ true
32
+ -- operation current added by filter
33
+ and __operation <> 'current'
34
+ {% endif %}
35
+ )
@@ -49,16 +49,7 @@ __scd1 as (
49
49
  ),
50
50
  {% if mode == "complete" %}
51
51
  __final as (
52
- select
53
- {% for field in fields %} s.{{ field }}, {% endfor %},
54
- {% if has_identity %} __identity, {% endif %}
55
- s.__key,
56
- s.__timestamp,
57
- {% if soft_delete %} s.__is_current, s.__is_deleted, {% endif %}
58
- s.__hash,
59
- {% if has_source %} s.__source, {% endif %}
60
- {% if has_metadata %} s.__metadata, {% endif %}
61
- {% if has_rescued_data %} s.__rescued_data, {% endif %}
52
+ select {% for output in outputs %} `{{ output }}`, {% endfor %}
62
53
  from __scd1 s
63
54
  where true {% if not soft_delete %} and s.__is_current {% endif %}
64
55
  )
@@ -86,24 +77,7 @@ __scd1 as (
86
77
  ),
87
78
  {% endif %}
88
79
  __final as (
89
- select
90
- __merge_key,
91
- __merge_condition,
92
- {% for field in fields %} {{ field }},
93
- {% endfor %},
94
- {% if has_identity %} __identity,
95
- {% endif %}
96
- __key,
97
- __timestamp,
98
- {% if soft_delete %} __is_current, __is_deleted,
99
- {% endif %}
100
- __hash,
101
- {% if has_source %} __source,
102
- {% endif %}
103
- {% if has_metadata %} __metadata,
104
- {% endif %}
105
- {% if has_rescued_data %} __rescued_data,
106
- {% endif %}
80
+ select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
107
81
  {% if has_rows %} from __scd1_no_fake_update m
108
82
  {% else %} from __merge_condition m
109
83
  {% endif %}
@@ -26,27 +26,23 @@ __scd2 as (
26
26
  ),
27
27
  {% if mode == "complete" %}
28
28
  __complete as (select s.* from __scd2 s where true and not __operation <=> 'delete'),
29
- __final as (
30
- select
31
- {% for field in fields %} {{ field }}, {% endfor %},
32
- {% if has_identity %} __identity, {% endif %}
33
- __key,
34
- {% if correct_valid_from %}
29
+ {% if correct_valid_from %}
30
+ __correct_valid_from as (
31
+ select
32
+ * except (__valid_from),
35
33
  if(
36
34
  __valid_from == min(__valid_from) over (partition by null),
37
35
  cast('1900-01-01' as timestamp),
38
36
  __valid_from
39
- ) as __valid_from,
40
- {% else %} __valid_from,
41
- {% endif %}
42
- __valid_to,
43
- __is_current,
44
- {% if soft_delete %} __is_deleted, {% endif %}
45
- __hash,
46
- {% if has_source %} __source, {% endif %}
47
- {% if has_metadata %} __metadata, {% endif %}
48
- {% if has_rescued_data %} __rescued_data, {% endif %}
49
- from __complete
37
+ ) as __valid_from
38
+ from __complete
39
+ ),
40
+ {% endif %}
41
+ __final as (
42
+ select {% for output in outputs %} `{{ output }}`, {% endfor %}
43
+ {% if correct_valid_from %} from __correct_valid_from
44
+ {% else %} from __complete
45
+ {% endif %}
50
46
  )
51
47
  {% else %}
52
48
  {% if has_rows %}
@@ -81,37 +77,22 @@ __scd2 as (
81
77
  -- only the first record can be an update or a delete
82
78
  where (s.__scd2_rn == 1 and o.__merge_condition in ('update', 'delete')) or o.__merge_condition == 'insert'
83
79
  ),
80
+ {% if correct_valid_from %}
81
+ __correct_valid_from as (
82
+ select
83
+ * except (__valid_from),
84
+ if(
85
+ __valid_from == min(__valid_from) over (partition by null),
86
+ cast('1900-01-01' as timestamp),
87
+ __valid_from
88
+ ) as __valid_from
89
+ from __merge_condition
90
+ ),
91
+ {% endif %}
84
92
  __final as (
85
- select
86
- __merge_key,
87
- __merge_condition,
88
- {% for field in fields %} {{ field }},
89
- {% endfor %},
90
- {% if has_identity %} __identity,
91
- {% endif %}
92
- __key,
93
- {% if correct_valid_from %}
94
- {% if not has_rows %}
95
- if(
96
- __valid_from == min(__valid_from) over (partition by null),
97
- cast('1900-01-01' as timestamp),
98
- __valid_from
99
- ) as __valid_from,
100
- {% else %} __valid_from,
101
- {% endif %}
102
- {% else %} __valid_from,
103
- {% endif %}
104
- __valid_to,
105
- __is_current and __merge_condition == 'insert' as __is_current,
106
- {% if soft_delete %} __is_deleted,
107
- {% endif %}
108
- __hash,
109
- {% if has_source %} __source,
110
- {% endif %}
111
- {% if has_metadata %} __metadata,
112
- {% endif %}
113
- {% if has_rescued_data %} __rescued_data,
114
- {% endif %}
115
- from __merge_condition m
93
+ select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
94
+ {% if correct_valid_from %} from __correct_valid_from
95
+ {% else %} from __merge_condition
96
+ {% endif %}
116
97
  )
117
98
  {% endif %}
@@ -1,11 +1,15 @@
1
- {% include 'query/context.sql.jinja' %}
2
- {% include 'query/base.sql.jinja' %}
3
- {% if slice %} {% include 'query/slice.sql.jinja' %} {% endif %}
4
- {% if deduplicate_key %} {% include 'query/deduplicate_key.sql.jinja' %} {% endif %}
5
- {% if mode == "update" %} {% if has_rows %} {% include 'query/current.sql.jinja' %} {% endif %} {% endif %}
6
- {% if rectify %} {% include 'query/rectify.sql.jinja' %} {% endif %}
7
- {% if deduplicate_hash %} {% include 'query/deduplicate_hash.sql.jinja' %} {% endif %}
8
- {% if cdc == "nocdc" %} {% include 'query/nocdc.sql.jinja' %} {% endif %}
9
- {% if cdc == "scd1" %} {% include 'query/scd1.sql.jinja' %} {% endif %}
10
- {% if cdc == "scd2" %} {% include 'query/scd2.sql.jinja' %} {% endif %}
11
- {% include 'query/final.sql.jinja' %}
1
+ {% include 'queries/context.sql.jinja' %}
2
+ {% include 'ctes/base.sql.jinja' %}
3
+ {% if slice %} {% include 'ctes/slice.sql.jinja' %} {% endif %}
4
+ {% if deduplicate_key %} {% include 'ctes/deduplicate_key.sql.jinja' %} {% endif %}
5
+ {% if mode == "update" %} {% if has_rows %} {% include 'ctes/current.sql.jinja' %} {% endif %} {% endif %}
6
+ {% if rectify %} {% include 'ctes/rectify.sql.jinja' %} {% endif %}
7
+ {% if deduplicate_hash %} {% include 'ctes/deduplicate_hash.sql.jinja' %} {% endif %}
8
+ {% if cdc == "nocdc" %}
9
+ {% if mode == "update" %} {% include 'queries/nocdc/update.sql.jinja' %}
10
+ {% else %} {% include 'queries/nocdc/complete.sql.jinja' %}
11
+ {% endif %}
12
+ {% endif %}
13
+ {% if cdc == "scd1" %} {% include 'queries/scd1.sql.jinja' %} {% endif %}
14
+ {% if cdc == "scd2" %} {% include 'queries/scd2.sql.jinja' %} {% endif %}
15
+ {% include 'queries/final.sql.jinja' %}