fabricks 2024.7.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. fabricks/__init__.py +0 -0
  2. fabricks/api/__init__.py +7 -0
  3. fabricks/api/cdc/__init__.py +6 -0
  4. fabricks/api/cdc/nocdc.py +3 -0
  5. fabricks/api/cdc/scd1.py +3 -0
  6. fabricks/api/cdc/scd2.py +3 -0
  7. fabricks/api/context.py +31 -0
  8. fabricks/api/core.py +4 -0
  9. fabricks/api/extenders.py +3 -0
  10. fabricks/api/log.py +3 -0
  11. fabricks/api/metastore/__init__.py +10 -0
  12. fabricks/api/metastore/database.py +3 -0
  13. fabricks/api/metastore/table.py +3 -0
  14. fabricks/api/metastore/view.py +6 -0
  15. fabricks/api/notebooks/__init__.py +0 -0
  16. fabricks/api/notebooks/cluster.py +6 -0
  17. fabricks/api/notebooks/deploy/__init__.py +0 -0
  18. fabricks/api/notebooks/deploy/fabricks.py +147 -0
  19. fabricks/api/notebooks/deploy/notebooks.py +86 -0
  20. fabricks/api/notebooks/initialize.py +38 -0
  21. fabricks/api/notebooks/optimize.py +25 -0
  22. fabricks/api/notebooks/process.py +50 -0
  23. fabricks/api/notebooks/run.py +87 -0
  24. fabricks/api/notebooks/terminate.py +27 -0
  25. fabricks/api/notebooks/vacuum.py +25 -0
  26. fabricks/api/parsers.py +3 -0
  27. fabricks/api/udfs.py +3 -0
  28. fabricks/api/utils.py +9 -0
  29. fabricks/cdc/__init__.py +14 -0
  30. fabricks/cdc/base/__init__.py +4 -0
  31. fabricks/cdc/base/cdc.py +5 -0
  32. fabricks/cdc/base/configurator.py +145 -0
  33. fabricks/cdc/base/generator.py +117 -0
  34. fabricks/cdc/base/merger.py +107 -0
  35. fabricks/cdc/base/processor.py +338 -0
  36. fabricks/cdc/base/types.py +3 -0
  37. fabricks/cdc/cdc.py +5 -0
  38. fabricks/cdc/nocdc.py +19 -0
  39. fabricks/cdc/scd.py +21 -0
  40. fabricks/cdc/scd1.py +15 -0
  41. fabricks/cdc/scd2.py +15 -0
  42. fabricks/cdc/templates/__init__.py +0 -0
  43. fabricks/cdc/templates/merge/scd1.sql.jinja +72 -0
  44. fabricks/cdc/templates/merge/scd2.sql.jinja +54 -0
  45. fabricks/cdc/templates/merge.sql.jinja +2 -0
  46. fabricks/cdc/templates/query/__init__.py +0 -0
  47. fabricks/cdc/templates/query/base.sql.jinja +34 -0
  48. fabricks/cdc/templates/query/context.sql.jinja +95 -0
  49. fabricks/cdc/templates/query/current.sql.jinja +32 -0
  50. fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +21 -0
  51. fabricks/cdc/templates/query/deduplicate_key.sql.jinja +14 -0
  52. fabricks/cdc/templates/query/filter.sql.jinja +71 -0
  53. fabricks/cdc/templates/query/final.sql.jinja +1 -0
  54. fabricks/cdc/templates/query/hash.sql.jinja +1 -0
  55. fabricks/cdc/templates/query/nocdc.sql.jinja +10 -0
  56. fabricks/cdc/templates/query/rectify.sql.jinja +120 -0
  57. fabricks/cdc/templates/query/scd1.sql.jinja +112 -0
  58. fabricks/cdc/templates/query/scd2.sql.jinja +114 -0
  59. fabricks/cdc/templates/query.sql.jinja +11 -0
  60. fabricks/context/__init__.py +51 -0
  61. fabricks/context/log.py +26 -0
  62. fabricks/context/runtime.py +143 -0
  63. fabricks/context/spark.py +43 -0
  64. fabricks/context/types.py +123 -0
  65. fabricks/core/__init__.py +4 -0
  66. fabricks/core/dags/__init__.py +9 -0
  67. fabricks/core/dags/base.py +72 -0
  68. fabricks/core/dags/generator.py +154 -0
  69. fabricks/core/dags/log.py +14 -0
  70. fabricks/core/dags/processor.py +163 -0
  71. fabricks/core/dags/terminator.py +26 -0
  72. fabricks/core/deploy/__init__.py +12 -0
  73. fabricks/core/deploy/tables.py +76 -0
  74. fabricks/core/deploy/views.py +417 -0
  75. fabricks/core/extenders.py +29 -0
  76. fabricks/core/jobs/__init__.py +20 -0
  77. fabricks/core/jobs/base/__init__.py +10 -0
  78. fabricks/core/jobs/base/checker.py +89 -0
  79. fabricks/core/jobs/base/configurator.py +323 -0
  80. fabricks/core/jobs/base/error.py +16 -0
  81. fabricks/core/jobs/base/generator.py +391 -0
  82. fabricks/core/jobs/base/invoker.py +119 -0
  83. fabricks/core/jobs/base/job.py +5 -0
  84. fabricks/core/jobs/base/processor.py +204 -0
  85. fabricks/core/jobs/base/types.py +191 -0
  86. fabricks/core/jobs/bronze.py +333 -0
  87. fabricks/core/jobs/get_job.py +126 -0
  88. fabricks/core/jobs/get_job_conf.py +115 -0
  89. fabricks/core/jobs/get_job_id.py +26 -0
  90. fabricks/core/jobs/get_jobs.py +89 -0
  91. fabricks/core/jobs/gold.py +218 -0
  92. fabricks/core/jobs/silver.py +354 -0
  93. fabricks/core/parsers/__init__.py +12 -0
  94. fabricks/core/parsers/base.py +91 -0
  95. fabricks/core/parsers/decorator.py +11 -0
  96. fabricks/core/parsers/get_parser.py +25 -0
  97. fabricks/core/parsers/types.py +6 -0
  98. fabricks/core/schedules.py +89 -0
  99. fabricks/core/scripts/__init__.py +13 -0
  100. fabricks/core/scripts/armageddon.py +82 -0
  101. fabricks/core/scripts/generate.py +20 -0
  102. fabricks/core/scripts/job_schema.py +28 -0
  103. fabricks/core/scripts/optimize.py +45 -0
  104. fabricks/core/scripts/process.py +9 -0
  105. fabricks/core/scripts/stats.py +48 -0
  106. fabricks/core/scripts/steps.py +27 -0
  107. fabricks/core/scripts/terminate.py +6 -0
  108. fabricks/core/scripts/vacuum.py +45 -0
  109. fabricks/core/site_packages.py +55 -0
  110. fabricks/core/steps/__init__.py +4 -0
  111. fabricks/core/steps/base.py +282 -0
  112. fabricks/core/steps/get_step.py +10 -0
  113. fabricks/core/steps/get_step_conf.py +33 -0
  114. fabricks/core/steps/types.py +7 -0
  115. fabricks/core/udfs.py +106 -0
  116. fabricks/core/utils.py +69 -0
  117. fabricks/core/views.py +36 -0
  118. fabricks/metastore/README.md +3 -0
  119. fabricks/metastore/__init__.py +5 -0
  120. fabricks/metastore/database.py +71 -0
  121. fabricks/metastore/pyproject.toml +20 -0
  122. fabricks/metastore/relational.py +61 -0
  123. fabricks/metastore/table.py +529 -0
  124. fabricks/metastore/utils.py +35 -0
  125. fabricks/metastore/view.py +40 -0
  126. fabricks/utils/README.md +3 -0
  127. fabricks/utils/__init__.py +0 -0
  128. fabricks/utils/azure_queue.py +63 -0
  129. fabricks/utils/azure_table.py +99 -0
  130. fabricks/utils/console.py +51 -0
  131. fabricks/utils/container.py +57 -0
  132. fabricks/utils/fdict.py +28 -0
  133. fabricks/utils/helpers.py +89 -0
  134. fabricks/utils/log.py +153 -0
  135. fabricks/utils/path.py +206 -0
  136. fabricks/utils/pip.py +61 -0
  137. fabricks/utils/pydantic.py +92 -0
  138. fabricks/utils/pyproject.toml +18 -0
  139. fabricks/utils/read/__init__.py +11 -0
  140. fabricks/utils/read/read.py +305 -0
  141. fabricks/utils/read/read_excel.py +5 -0
  142. fabricks/utils/read/read_yaml.py +43 -0
  143. fabricks/utils/read/types.py +3 -0
  144. fabricks/utils/schema/__init__.py +7 -0
  145. fabricks/utils/schema/get_json_schema_for_type.py +161 -0
  146. fabricks/utils/schema/get_schema_for_type.py +93 -0
  147. fabricks/utils/secret.py +78 -0
  148. fabricks/utils/sqlglot.py +48 -0
  149. fabricks/utils/write/__init__.py +8 -0
  150. fabricks/utils/write/delta.py +46 -0
  151. fabricks/utils/write/stream.py +27 -0
  152. fabricks-2024.7.1.5.dist-info/METADATA +212 -0
  153. fabricks-2024.7.1.5.dist-info/RECORD +154 -0
  154. fabricks-2024.7.1.5.dist-info/WHEEL +4 -0
@@ -0,0 +1,32 @@
1
+ {% import 'query/hash.sql.jinja' as h -%}
2
+
3
+ __current as (
4
+ select
5
+ {% for field in fields %} {{ field }}, {% endfor %}
6
+ 'current' as __operation,
7
+ 'current' as __original_operation,
8
+ {% if has_timestamp %}
9
+ {% if cdc == "nocdc" %} __timestamp as __timestamp, {% endif %}
10
+ {% if cdc == "scd1" %} __timestamp as __timestamp, {% endif %}
11
+ {% if cdc == "scd2" %} __valid_from as __timestamp, {% endif %}
12
+ {% else %} cast('0001-01-01' as timestamp) as __timestamp,
13
+ {% endif %}
14
+ __timestamp as __original_timestamp,
15
+ {% if has_hash %} __hash,
16
+ {% else %} {{ h.hash(fields=hashes) }} as __hash,
17
+ {% endif %}
18
+ {% if has_identity %} __identity, {% endif %}
19
+ {% if has_key %} __key,
20
+ {% else %} {{ h.hash(fields=keys) }} as __key,
21
+ {% endif %}
22
+ {% if has_source %} __source, {% endif %}
23
+ {% if has_metadata %} __metadata, {% endif %}
24
+ {% if has_rescued_data %} __rescued_data, {% endif %}
25
+ from {{ tgt }} t
26
+ where
27
+ true
28
+ {% if cdc == "scd2" %} and __is_current {% endif %}
29
+ {% if cdc == "scd1" %} {% if soft_delete %} and __is_current {% endif %} {% endif %}
30
+ {% if has_source %} and exists (select 1 from __base s where s.__source == t.__source) {% endif %}
31
+ {% if update_where %} and {{ update_where }} {% endif %}
32
+ ),
@@ -0,0 +1,21 @@
1
+ __deduplicate_hash as (
2
+ select
3
+ *,
4
+ lag(__hash) over (
5
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
6
+ ) as __deduplicate_hash_previous__hash,
7
+ lag(__operation) over (
8
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
9
+ ) as __deduplicate_hash_previous_operation
10
+ from {{ parent_deduplicate_hash }}
11
+ where true
12
+ ),
13
+ __deduplicated_hash as (
14
+ select *
15
+ from __deduplicate_hash
16
+ where
17
+ true
18
+ and not (
19
+ __hash <=> __deduplicate_hash_previous__hash and __operation <=> __deduplicate_hash_previous_operation
20
+ )
21
+ ),
@@ -0,0 +1,14 @@
1
+ __deduplicate_key as (
2
+ select
3
+ *,
4
+ row_number() over (
5
+ partition by {% if has_source %} __source, {% endif %} __key, __timestamp
6
+ order by
7
+ /* prioritize delete over upsert */
8
+ __operation asc,
9
+ {% if has_order_by %} {% for o in order_duplicate_by %} {{ o }}, {% endfor %} {% endif %}
10
+ ) as __deduplicate_key_rn
11
+ from {{ parent_deduplicate_key }}
12
+ where true
13
+ ),
14
+ __deduplicated_key as (select *, from __deduplicate_key where __deduplicate_key_rn == 1),
@@ -0,0 +1,71 @@
1
+ {% if filter == "latest" %}
2
+ __filter_latest_timestamp as (
3
+ select {% if has_source %} t.__source, {% endif %} max(t.__timestamp) as __max_timestamp
4
+ from {{ parent_filter }} t
5
+ {% if has_source %} group by t.__source {% endif %}
6
+ ),
7
+ __filtered as (
8
+ select
9
+ {% for field in fields %} {{ field }}, {% endfor %}
10
+ s.__operation,
11
+ s.__timestamp,
12
+ s.__hash,
13
+ s.__key,
14
+ {% if has_source %} s.__source, {% endif %}
15
+ {% if has_metadata %} s.__metadata, {% endif %}
16
+ {% if has_rescued_data %} __rescued_data, {% endif %}
17
+ from {{ parent_filter }} s
18
+ where
19
+ true
20
+ and exists (
21
+ select 1
22
+ from __filter_latest_timestamp m
23
+ where
24
+ true and s.__timestamp = m.__max_timestamp
25
+ {% if has_source %} and s.__source == m.__source {% endif %}
26
+ )
27
+ ),
28
+ {% else %}
29
+ __filter_updated_timestamp as (
30
+ select
31
+ {% if has_source %} t.__source,
32
+ {% endif %}
33
+ {% if cdc == "nocdc" %} coalesce(max(t.__timestamp), cast('0001-01-01' as timestamp)) as __max_timestamp
34
+ {% endif %}
35
+ {% if cdc == "scd1" %} coalesce(max(t.__timestamp), cast('0001-01-01' as timestamp)) as __max_timestamp
36
+ {% endif %}
37
+ {% if cdc == "scd2" %} coalesce(max(t.__valid_from), cast('0001-01-01' as timestamp)) as __max_timestamp
38
+ {% endif %}
39
+ from {{ tgt }} t
40
+ where true
41
+ {% if has_source %}
42
+ and exists (select 1 from {{ parent_filter }} s where s.__source == t.__source) group by t.__source
43
+ {% endif %}
44
+ ),
45
+ __filtered as (
46
+ select
47
+ {% for field in fields %} {{ field }},
48
+ {% endfor %}
49
+ s.__operation,
50
+ s.__timestamp,
51
+ s.__hash,
52
+ s.__key,
53
+ {% if has_source %} s.__source,
54
+ {% endif %}
55
+ {% if has_metadata %} s.__metadata,
56
+ {% endif %}
57
+ {% if has_rescued_data %} __rescued_data,
58
+ {% endif %}
59
+ from {{ parent_filter }} s
60
+ where
61
+ true
62
+ and exists (
63
+ select 1
64
+ from __filter_updated_timestamp m
65
+ where
66
+ true and s.__timestamp > m.__max_timestamp
67
+ {% if has_source %} and s.__source == m.__source
68
+ {% endif %}
69
+ )
70
+ ),
71
+ {% endif %}
@@ -0,0 +1 @@
1
+ select * {% if all_except %} except ({% for e in all_except %}{{ e }}, {% endfor %}), {% endif %} from __final
@@ -0,0 +1 @@
1
+ {% macro hash(fields) -%} md5(array_join(array({% for f in fields %}{{ f }}, {% endfor %}), '*', '-1')) {%- endmacro %}
@@ -0,0 +1,10 @@
1
+ __final as (
2
+ select *
3
+ from {{ parent_cdc }}
4
+ {% if filter %}
5
+ where
6
+ true
7
+ -- operation current added by filter
8
+ and __operation <> 'current'
9
+ {% endif %}
10
+ )
@@ -0,0 +1,120 @@
1
+ {% import 'query/hash.sql.jinja' as h -%}
2
+
3
+ {% if mode == "update" %}
4
+ __rectified_base as (
5
+ select
6
+ {% for field in fields %} {{ field }}, {% endfor %}
7
+ s.__operation,
8
+ s.__operation as __original_operation,
9
+ s.__timestamp,
10
+ s.__timestamp as __original_timestamp,
11
+ s.__hash,
12
+ s.__key,
13
+ {% if has_identity %} s.__identity, {% endif %}
14
+ {% if has_source %} s.__source, {% endif %}
15
+ {% if has_metadata %} s.__metadata, {% endif %}
16
+ {% if has_rescued_data %} s.__rescued_data, {% endif %}
17
+ from {{ parent_rectify }} s
18
+ {% if has_rows %}
19
+ union all
20
+ select *
21
+ from __current
22
+ {% endif %}
23
+ ),
24
+ {% endif %}
25
+ __rectified_next_operation as (
26
+ select
27
+ *,
28
+ lead(__operation) over (
29
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
30
+ ) as __rectified_next_operation
31
+ {% if mode == "update" %} from __rectified_base p
32
+ {% else %} from {{ parent_rectify }}
33
+ {% endif %}
34
+ ),
35
+ __rectified_timestamps as (
36
+ select
37
+ {% if has_source %} __source, {% endif %}
38
+ __timestamp,
39
+ lead(__timestamp) over (
40
+ {% if has_source %}partition by __source {% endif %} order by __timestamp asc
41
+ ) as __rectified_next_timestamp,
42
+ lead(if(max(__operation) == 'reload', __timestamp, null)) ignore nulls over (
43
+ {% if has_source %}partition by __source {% endif %} order by __timestamp asc
44
+ ) as __rectified_next_timestamp_reload,
45
+ from __rectified_next_operation
46
+ group by {% if has_source %} __source, {% endif %} __timestamp
47
+ ),
48
+ __rectified_is_deleted_next as (
49
+ select
50
+ cur.*,
51
+ t.* except ({% if has_source %}t.__source, {% endif %} t.__timestamp),
52
+ -- there is more reload
53
+ __rectified_next_timestamp_reload is not null as __rectify_more_reload,
54
+ -- the next operation is bedore the next reload
55
+ if(
56
+ __rectify_more_reload, t.__rectified_next_timestamp < t.__rectified_next_timestamp_reload, true
57
+ ) as __rectify_key_next_operation_before_next_reload,
58
+ -- the record is deleted
59
+ cur.__operation == 'delete' as __rectified_key_is_deleted,
60
+ -- the record is not found in next reload
61
+ __rectify_more_reload and nxt.__timestamp is null as __rectified_key_not_found_in_next_reload,
62
+ -- there is no more operation
63
+ t.__rectified_next_timestamp is null as __rectified_no_more_operation,
64
+ -- the record is deleted before next reload
65
+ __rectify_key_next_operation_before_next_reload
66
+ and cur.__rectified_next_operation <=> 'delete' as __rectified_key_is_deleted_next,
67
+ -- the record is updated before next reload
68
+ __rectify_key_next_operation_before_next_reload
69
+ and cur.__rectified_next_operation <=> 'upsert' as __rectified_key_is_updated_next,
70
+ case
71
+ when __rectified_key_is_deleted
72
+ then false
73
+ when __rectified_key_is_updated_next
74
+ then false
75
+ when __rectified_key_is_deleted_next
76
+ then false
77
+ when __rectified_no_more_operation
78
+ then false
79
+ when __rectified_key_not_found_in_next_reload
80
+ then true
81
+ else false
82
+ end as __rectified_is_deleted_next
83
+ from __rectified_next_operation cur
84
+ left join
85
+ __rectified_timestamps t on cur.__timestamp = t.__timestamp
86
+ {% if has_source %} and cur.__source == t.__source {% endif %}
87
+ left join
88
+ __rectified_next_operation nxt
89
+ on t.__rectified_next_timestamp_reload = nxt.__timestamp
90
+ and cur.__key == nxt.__key
91
+ {% if has_source %} and cur.__source == nxt.__source {% endif %}
92
+ ),
93
+ {% if mode == "complete" %} __rectified as ( {% else %} __rectified_operation as (
94
+ {% endif %}
95
+ select
96
+ c.* except (c.__operation, c.__timestamp),
97
+ if(c.__operation == 'delete', 'delete', d2.__rectified_operation) as __operation,
98
+ case
99
+ when c.__operation == 'delete'
100
+ then c.__timestamp
101
+ when d2.__rectified_operation == 'upsert'
102
+ then c.__timestamp
103
+ else c.__rectified_next_timestamp_reload
104
+ end as __timestamp
105
+ from __rectified_is_deleted_next c
106
+ cross join
107
+ (
108
+ select false as __is_deleted, 'upsert' as __rectified_operation
109
+ union all
110
+ select true as __is_deleted, 'delete' as __rectified_operation
111
+ union all
112
+ select true as __is_deleted, 'upsert' as __rectified_operation
113
+ ) d2
114
+ on c.__rectified_is_deleted_next = d2.__is_deleted
115
+ ),
116
+ {% if mode == "update" %}
117
+ __rectified as (
118
+ select * from __rectified_operation where not (__original_operation == 'current' and __operation == 'upsert')
119
+ ),
120
+ {% endif %}
@@ -0,0 +1,112 @@
1
+ __scd1_base as (
2
+ select
3
+ *,
4
+ {% if not rectify %} __operation as __original_operation, {% endif %}
5
+ lead(__operation) over (
6
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
7
+ ) as __scd1_next_operation
8
+ from {{ parent_cdc }}
9
+ ),
10
+ __scd1_last_key as (
11
+ -- take last update as it is the latest picture
12
+ select
13
+ *,
14
+ row_number() over (
15
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp desc
16
+ ) as __scd1_rn
17
+ from __scd1_base
18
+ where true and __operation == 'upsert'
19
+ {% if mode == "update" %}
20
+ {% if has_rows %}
21
+ -- take first delete ONLY if no upsert is present
22
+ union all
23
+ select
24
+ *,
25
+ row_number() over (
26
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
27
+ ) as __scd1_rn
28
+ from __scd1_base b
29
+ where
30
+ true
31
+ and __operation == 'delete'
32
+ and not exists (
33
+ select 1
34
+ from __scd1_base b2
35
+ where
36
+ true and b.__key == b2.__key
37
+ {% if has_source %} and b.__source == b2.__source {% endif %} and b2.__operation == 'upsert'
38
+ )
39
+ {% endif %}
40
+ {% endif %}
41
+ ),
42
+ __scd1 as (
43
+ select
44
+ *,
45
+ __scd1_next_operation <=> 'delete'
46
+ or __operation == 'delete' as __is_deleted,
47
+ not (__scd1_next_operation <=> 'delete' or __operation == 'delete') as __is_current
48
+ from __scd1_last_key
49
+ where true and __scd1_rn == 1
50
+ ),
51
+ {% if mode == "complete" %}
52
+ __final as (
53
+ select
54
+ {% for field in fields %} s.{{ field }}, {% endfor %},
55
+ {% if has_identity %} __identity, {% endif %}
56
+ s.__key,
57
+ s.__timestamp,
58
+ {% if soft_delete %} s.__is_current, s.__is_deleted, {% endif %}
59
+ s.__hash,
60
+ {% if has_source %} s.__source, {% endif %}
61
+ {% if has_metadata %} s.__metadata, {% endif %}
62
+ {% if has_rescued_data %} s.__rescued_data, {% endif %}
63
+ from __scd1 s
64
+ where true {% if not soft_delete %} and s.__is_current {% endif %}
65
+ )
66
+ {% else %}
67
+ __merge_condition as (
68
+ select s.*, s.__key as __merge_key, o.__merge_condition
69
+ from __scd1 s
70
+ left join
71
+ (
72
+ select 'upsert' as __operation, 'upsert' as __merge_condition
73
+ {% if has_rows %}
74
+ union all
75
+ select 'delete' as __operation, 'delete' as __merge_condition
76
+ {% endif %}
77
+ ) o
78
+ on s.__operation = o.__operation
79
+ ),
80
+ {% if has_rows %}
81
+ __scd1_no_fake_update as (
82
+ select *
83
+ from __merge_condition m left
84
+ anti join
85
+ __current c on m.__key == c.__key and m.__hash = c.__hash
86
+ {% if has_source %} and m.__source = c.__source {% endif %} and m.__operation == 'upsert'
87
+ ),
88
+ {% endif %}
89
+ __final as (
90
+ select
91
+ __merge_key,
92
+ __merge_condition,
93
+ {% for field in fields %} {{ field }},
94
+ {% endfor %},
95
+ {% if has_identity %} __identity,
96
+ {% endif %}
97
+ __key,
98
+ __timestamp,
99
+ {% if soft_delete %} __is_current, __is_deleted,
100
+ {% endif %}
101
+ __hash,
102
+ {% if has_source %} __source,
103
+ {% endif %}
104
+ {% if has_metadata %} __metadata,
105
+ {% endif %}
106
+ {% if has_rescued_data %} __rescued_data,
107
+ {% endif %}
108
+ {% if has_rows %} from __scd1_no_fake_update m
109
+ {% else %} from __merge_condition m
110
+ {% endif %}
111
+ )
112
+ {% endif %}
@@ -0,0 +1,114 @@
1
+ __scd2_base as (
2
+ select
3
+ *,
4
+ {% if not rectify %} __operation as __original_operation, {% endif %}
5
+ lead(__operation) over (
6
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp
7
+ ) as __scd2_next_operation,
8
+ lead(__timestamp) over (
9
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp
10
+ ) as __scd2_next_timestamp
11
+ from {{ parent_cdc }}
12
+ ),
13
+ __scd2 as (
14
+ select
15
+ *,
16
+ __timestamp as __valid_from,
17
+ coalesce(__scd2_next_timestamp - interval 1 second, cast('9999-12-31' as timestamp)) as __valid_to,
18
+ __operation <> 'delete' and __valid_to <=> '9999-12-31' as __is_current,
19
+ __operation == 'delete' or __scd2_next_operation <=> 'delete' as __is_deleted,
20
+ {% if mode == "update" %}
21
+ row_number() over (
22
+ partition by __key{% if has_source %}, __source{% endif %} order by __timestamp asc
23
+ ) as __scd2_rn
24
+ {% endif %}
25
+ from __scd2_base
26
+ ),
27
+ {% if mode == "complete" %}
28
+ __complete as (select s.* from __scd2 s where true and not __operation <=> 'delete'),
29
+ __final as (
30
+ select
31
+ {% for field in fields %} {{ field }}, {% endfor %},
32
+ {% if has_identity %} __identity, {% endif %}
33
+ __key,
34
+ if(
35
+ __valid_from == min(__valid_from) over (partition by null),
36
+ cast('1900-01-01' as timestamp),
37
+ __valid_from
38
+ ) as __valid_from,
39
+ __valid_to,
40
+ __is_current,
41
+ {% if soft_delete %} __is_deleted, {% endif %}
42
+ __hash,
43
+ {% if has_source %} __source, {% endif %}
44
+ {% if has_metadata %} __metadata, {% endif %}
45
+ {% if has_rescued_data %} __rescued_data, {% endif %}
46
+ from __complete
47
+ )
48
+ {% else %}
49
+ {% if has_rows %}
50
+ __scd2_no_fake_update as (
51
+ select
52
+ * except (__scd2_rn),
53
+ row_number() over (partition by `__key` order by `__timestamp` asc) as `__scd2_rn`
54
+ from __scd2 s left
55
+ anti join
56
+ __current c on s.__key == c.__key and s.__hash == c.__hash
57
+ {% if has_source %} and s.__source == c.__source {% endif %}
58
+ and s.__operation == 'upsert'
59
+ and s.__scd2_rn == 1
60
+ ),
61
+ {% endif %}
62
+ __merge_condition as (
63
+ select s.*, if(__merge_condition == 'insert', null, __key) as __merge_key, o.__merge_condition
64
+ {% if has_rows %} from __scd2_no_fake_update s
65
+ {% else %} from __scd2 s
66
+ {% endif %}
67
+ inner join
68
+ (
69
+ select 'upsert' as __operation, 'insert' as __merge_condition
70
+ {% if has_rows %}
71
+ union all
72
+ select 'upsert' as __operation, 'update' as __merge_condition
73
+ union all
74
+ select 'delete' as __operation, 'delete' as __merge_condition
75
+ {% endif %}
76
+ ) o
77
+ on s.__operation = o.__operation
78
+ -- only the first record can be an update or a delete
79
+ where (s.__scd2_rn == 1 and o.__merge_condition in ('update', 'delete')) or o.__merge_condition == 'insert'
80
+ ),
81
+ __final as (
82
+ select
83
+ __merge_key,
84
+ __merge_condition,
85
+ {% for field in fields %} {{ field }},
86
+ {% endfor %},
87
+ {% if has_identity %} __identity,
88
+ {% endif %}
89
+ __key,
90
+ {% if fix_valid_from %}
91
+ {% if not has_rows %}
92
+ if(
93
+ __valid_from == min(__valid_from) over (partition by null),
94
+ cast('1900-01-01' as timestamp),
95
+ __valid_from
96
+ ) as __valid_from,
97
+ {% else %} __valid_from,
98
+ {% endif %}
99
+ {% else %} __valid_from,
100
+ {% endif %}
101
+ __valid_to,
102
+ __is_current and __merge_condition == 'insert' as __is_current,
103
+ {% if soft_delete %} __is_deleted,
104
+ {% endif %}
105
+ __hash,
106
+ {% if has_source %} __source,
107
+ {% endif %}
108
+ {% if has_metadata %} __metadata,
109
+ {% endif %}
110
+ {% if has_rescued_data %} __rescued_data,
111
+ {% endif %}
112
+ from __merge_condition m
113
+ )
114
+ {% endif %}
@@ -0,0 +1,11 @@
1
+ {% include 'query/context.sql.jinja' %}
2
+ {% include 'query/base.sql.jinja' %}
3
+ {% if filter %} {% include 'query/filter.sql.jinja' %} {% endif %}
4
+ {% if deduplicate_key %} {% include 'query/deduplicate_key.sql.jinja' %} {% endif %}
5
+ {% if mode == "update" %} {% if has_rows %} {% include 'query/current.sql.jinja' %} {% endif %} {% endif %}
6
+ {% if rectify %} {% include 'query/rectify.sql.jinja' %} {% endif %}
7
+ {% if deduplicate_hash %} {% include 'query/deduplicate_hash.sql.jinja' %} {% endif %}
8
+ {% if cdc == "nocdc" %} {% include 'query/nocdc.sql.jinja' %} {% endif %}
9
+ {% if cdc == "scd1" %} {% include 'query/scd1.sql.jinja' %} {% endif %}
10
+ {% if cdc == "scd2" %} {% include 'query/scd2.sql.jinja' %} {% endif %}
11
+ {% include 'query/final.sql.jinja' %}
@@ -0,0 +1,51 @@
1
+ from fabricks.context.runtime import (
2
+ BRONZE,
3
+ CONF_RUNTIME,
4
+ FABRICKS_STORAGE,
5
+ GOLD,
6
+ IS_DEBUG,
7
+ IS_LIVE,
8
+ IS_TEST,
9
+ PATH_EXTENDERS,
10
+ PATH_LIBRARIES,
11
+ PATH_PARSERS,
12
+ PATH_REQUIREMENTS,
13
+ PATH_RUNTIME,
14
+ PATH_SCHEDULES,
15
+ PATH_UDFS,
16
+ PATH_VIEWS,
17
+ PATHS_RUNTIME,
18
+ PATHS_STORAGE,
19
+ SECRET_SCOPE,
20
+ SILVER,
21
+ STEPS,
22
+ VARIABLES,
23
+ VERSION,
24
+ )
25
+ from fabricks.context.spark import build_spark_session
26
+
27
+ __all__ = [
28
+ "BRONZE",
29
+ "build_spark_session",
30
+ "CONF_RUNTIME",
31
+ "FABRICKS_STORAGE",
32
+ "GOLD",
33
+ "IS_DEBUG",
34
+ "IS_LIVE",
35
+ "IS_TEST",
36
+ "PATH_EXTENDERS",
37
+ "PATH_LIBRARIES",
38
+ "PATH_PARSERS",
39
+ "PATH_REQUIREMENTS",
40
+ "PATH_RUNTIME",
41
+ "PATH_SCHEDULES",
42
+ "PATH_UDFS",
43
+ "PATH_VIEWS",
44
+ "PATHS_RUNTIME",
45
+ "PATHS_STORAGE",
46
+ "SECRET_SCOPE",
47
+ "SILVER",
48
+ "STEPS",
49
+ "VARIABLES",
50
+ "VERSION",
51
+ ]
@@ -0,0 +1,26 @@
1
+ import logging
2
+ from functools import wraps
3
+ from typing import Callable, cast
4
+
5
+ from fabricks.context.runtime import FABRICKS_STORAGE, SECRET_SCOPE
6
+ from fabricks.utils.azure_table import AzureTable
7
+ from fabricks.utils.log import get_logger
8
+ from fabricks.utils.secret import AccessKey, get_secret_from_secret_scope
9
+
10
+ storage_account = FABRICKS_STORAGE.get_storage_account()
11
+ secret = get_secret_from_secret_scope(SECRET_SCOPE, f"{storage_account}-access-key")
12
+ access_key = cast(AccessKey, secret).key
13
+
14
+ table = AzureTable("logs", storage_account=storage_account, access_key=access_key)
15
+ Logger, TableLogger = get_logger("logs", logging.DEBUG, table=table)
16
+
17
+
18
+ def flush(func: Callable):
19
+ @wraps(func)
20
+ def wrapper(*args, **kwargs):
21
+ try:
22
+ return func(*args, **kwargs)
23
+ finally:
24
+ TableLogger.flush()
25
+
26
+ return wrapper