fabricks 3.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. fabricks/__init__.py +0 -0
  2. fabricks/api/__init__.py +11 -0
  3. fabricks/api/cdc/__init__.py +6 -0
  4. fabricks/api/cdc/nocdc.py +3 -0
  5. fabricks/api/cdc/scd1.py +3 -0
  6. fabricks/api/cdc/scd2.py +3 -0
  7. fabricks/api/context.py +27 -0
  8. fabricks/api/core.py +4 -0
  9. fabricks/api/deploy.py +3 -0
  10. fabricks/api/exceptions.py +19 -0
  11. fabricks/api/extenders.py +3 -0
  12. fabricks/api/job_schema.py +3 -0
  13. fabricks/api/log.py +3 -0
  14. fabricks/api/masks.py +3 -0
  15. fabricks/api/metastore/__init__.py +10 -0
  16. fabricks/api/metastore/database.py +3 -0
  17. fabricks/api/metastore/table.py +3 -0
  18. fabricks/api/metastore/view.py +6 -0
  19. fabricks/api/notebooks/__init__.py +0 -0
  20. fabricks/api/notebooks/cluster.py +6 -0
  21. fabricks/api/notebooks/initialize.py +42 -0
  22. fabricks/api/notebooks/process.py +54 -0
  23. fabricks/api/notebooks/run.py +59 -0
  24. fabricks/api/notebooks/schedule.py +75 -0
  25. fabricks/api/notebooks/terminate.py +31 -0
  26. fabricks/api/parsers.py +3 -0
  27. fabricks/api/schedules.py +3 -0
  28. fabricks/api/udfs.py +3 -0
  29. fabricks/api/utils.py +9 -0
  30. fabricks/api/version.py +3 -0
  31. fabricks/api/views.py +6 -0
  32. fabricks/cdc/__init__.py +14 -0
  33. fabricks/cdc/base/__init__.py +4 -0
  34. fabricks/cdc/base/_types.py +10 -0
  35. fabricks/cdc/base/cdc.py +5 -0
  36. fabricks/cdc/base/configurator.py +223 -0
  37. fabricks/cdc/base/generator.py +177 -0
  38. fabricks/cdc/base/merger.py +110 -0
  39. fabricks/cdc/base/processor.py +471 -0
  40. fabricks/cdc/cdc.py +5 -0
  41. fabricks/cdc/nocdc.py +20 -0
  42. fabricks/cdc/scd.py +22 -0
  43. fabricks/cdc/scd1.py +15 -0
  44. fabricks/cdc/scd2.py +15 -0
  45. fabricks/cdc/templates/__init__.py +0 -0
  46. fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
  47. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  48. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  49. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  50. fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
  51. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  52. fabricks/cdc/templates/filter.sql.jinja +4 -0
  53. fabricks/cdc/templates/filters/final.sql.jinja +4 -0
  54. fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
  55. fabricks/cdc/templates/filters/update.sql.jinja +30 -0
  56. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  57. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  58. fabricks/cdc/templates/merge.sql.jinja +3 -0
  59. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  60. fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
  61. fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
  62. fabricks/cdc/templates/queries/__init__.py +0 -0
  63. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  64. fabricks/cdc/templates/queries/final.sql.jinja +1 -0
  65. fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
  66. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
  67. fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
  68. fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
  69. fabricks/cdc/templates/query.sql.jinja +15 -0
  70. fabricks/context/__init__.py +72 -0
  71. fabricks/context/_types.py +133 -0
  72. fabricks/context/config/__init__.py +92 -0
  73. fabricks/context/config/utils.py +53 -0
  74. fabricks/context/log.py +77 -0
  75. fabricks/context/runtime.py +117 -0
  76. fabricks/context/secret.py +103 -0
  77. fabricks/context/spark_session.py +82 -0
  78. fabricks/context/utils.py +80 -0
  79. fabricks/core/__init__.py +4 -0
  80. fabricks/core/dags/__init__.py +9 -0
  81. fabricks/core/dags/base.py +99 -0
  82. fabricks/core/dags/generator.py +157 -0
  83. fabricks/core/dags/log.py +12 -0
  84. fabricks/core/dags/processor.py +228 -0
  85. fabricks/core/dags/run.py +39 -0
  86. fabricks/core/dags/terminator.py +25 -0
  87. fabricks/core/dags/utils.py +54 -0
  88. fabricks/core/extenders.py +33 -0
  89. fabricks/core/job_schema.py +32 -0
  90. fabricks/core/jobs/__init__.py +21 -0
  91. fabricks/core/jobs/base/__init__.py +10 -0
  92. fabricks/core/jobs/base/_types.py +284 -0
  93. fabricks/core/jobs/base/checker.py +139 -0
  94. fabricks/core/jobs/base/configurator.py +306 -0
  95. fabricks/core/jobs/base/exception.py +85 -0
  96. fabricks/core/jobs/base/generator.py +447 -0
  97. fabricks/core/jobs/base/invoker.py +206 -0
  98. fabricks/core/jobs/base/job.py +5 -0
  99. fabricks/core/jobs/base/processor.py +249 -0
  100. fabricks/core/jobs/bronze.py +395 -0
  101. fabricks/core/jobs/get_job.py +127 -0
  102. fabricks/core/jobs/get_job_conf.py +152 -0
  103. fabricks/core/jobs/get_job_id.py +31 -0
  104. fabricks/core/jobs/get_jobs.py +107 -0
  105. fabricks/core/jobs/get_schedule.py +10 -0
  106. fabricks/core/jobs/get_schedules.py +32 -0
  107. fabricks/core/jobs/gold.py +415 -0
  108. fabricks/core/jobs/silver.py +373 -0
  109. fabricks/core/masks.py +52 -0
  110. fabricks/core/parsers/__init__.py +12 -0
  111. fabricks/core/parsers/_types.py +6 -0
  112. fabricks/core/parsers/base.py +95 -0
  113. fabricks/core/parsers/decorator.py +11 -0
  114. fabricks/core/parsers/get_parser.py +26 -0
  115. fabricks/core/parsers/utils.py +69 -0
  116. fabricks/core/schedules/__init__.py +14 -0
  117. fabricks/core/schedules/diagrams.py +21 -0
  118. fabricks/core/schedules/generate.py +20 -0
  119. fabricks/core/schedules/get_schedule.py +5 -0
  120. fabricks/core/schedules/get_schedules.py +9 -0
  121. fabricks/core/schedules/process.py +9 -0
  122. fabricks/core/schedules/run.py +3 -0
  123. fabricks/core/schedules/terminate.py +6 -0
  124. fabricks/core/schedules/views.py +61 -0
  125. fabricks/core/steps/__init__.py +4 -0
  126. fabricks/core/steps/_types.py +7 -0
  127. fabricks/core/steps/base.py +423 -0
  128. fabricks/core/steps/get_step.py +10 -0
  129. fabricks/core/steps/get_step_conf.py +26 -0
  130. fabricks/core/udfs.py +106 -0
  131. fabricks/core/views.py +41 -0
  132. fabricks/deploy/__init__.py +92 -0
  133. fabricks/deploy/masks.py +8 -0
  134. fabricks/deploy/notebooks.py +71 -0
  135. fabricks/deploy/schedules.py +10 -0
  136. fabricks/deploy/tables.py +82 -0
  137. fabricks/deploy/udfs.py +19 -0
  138. fabricks/deploy/utils.py +36 -0
  139. fabricks/deploy/views.py +509 -0
  140. fabricks/metastore/README.md +3 -0
  141. fabricks/metastore/__init__.py +5 -0
  142. fabricks/metastore/_types.py +65 -0
  143. fabricks/metastore/database.py +65 -0
  144. fabricks/metastore/dbobject.py +66 -0
  145. fabricks/metastore/pyproject.toml +20 -0
  146. fabricks/metastore/table.py +768 -0
  147. fabricks/metastore/utils.py +51 -0
  148. fabricks/metastore/view.py +53 -0
  149. fabricks/utils/__init__.py +0 -0
  150. fabricks/utils/_types.py +6 -0
  151. fabricks/utils/azure_queue.py +93 -0
  152. fabricks/utils/azure_table.py +154 -0
  153. fabricks/utils/console.py +51 -0
  154. fabricks/utils/fdict.py +240 -0
  155. fabricks/utils/helpers.py +228 -0
  156. fabricks/utils/log.py +236 -0
  157. fabricks/utils/mermaid.py +32 -0
  158. fabricks/utils/path.py +242 -0
  159. fabricks/utils/pip.py +61 -0
  160. fabricks/utils/pydantic.py +94 -0
  161. fabricks/utils/read/__init__.py +11 -0
  162. fabricks/utils/read/_types.py +3 -0
  163. fabricks/utils/read/read.py +305 -0
  164. fabricks/utils/read/read_excel.py +5 -0
  165. fabricks/utils/read/read_yaml.py +33 -0
  166. fabricks/utils/schema/__init__.py +7 -0
  167. fabricks/utils/schema/get_json_schema_for_type.py +161 -0
  168. fabricks/utils/schema/get_schema_for_type.py +99 -0
  169. fabricks/utils/spark.py +76 -0
  170. fabricks/utils/sqlglot.py +56 -0
  171. fabricks/utils/write/__init__.py +8 -0
  172. fabricks/utils/write/delta.py +46 -0
  173. fabricks/utils/write/stream.py +27 -0
  174. fabricks-3.0.11.dist-info/METADATA +23 -0
  175. fabricks-3.0.11.dist-info/RECORD +176 -0
  176. fabricks-3.0.11.dist-info/WHEEL +4 -0
@@ -0,0 +1,85 @@
1
+ __scd1_base as (
2
+ select
3
+ *,
4
+ {% if not rectify %} __operation as __original_operation, {% endif %}
5
+ lead(__operation) over (
6
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
7
+ ) as __scd1_next_operation
8
+ from {{ parent_cdc }}
9
+ ),
10
+ __scd1_last_key as (
11
+ -- take last update as it is the latest picture
12
+ select
13
+ *,
14
+ row_number() over (
15
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp desc
16
+ ) as __scd1_rn
17
+ from __scd1_base
18
+ where true and __operation == 'upsert'
19
+ {% if mode == "update" %}
20
+ {% if has_rows %}
21
+ -- take first delete ONLY if no upsert is present
22
+ union all
23
+ select
24
+ *,
25
+ row_number() over (
26
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
27
+ ) as __scd1_rn
28
+ from __scd1_base b
29
+ where
30
+ true
31
+ and __operation == 'delete'
32
+ and not exists (
33
+ select 1
34
+ from __scd1_base b2
35
+ where
36
+ true and b.__key == b2.__key
37
+ {% if has_source %} and b.__source == b2.__source {% endif %} and b2.__operation == 'upsert'
38
+ )
39
+ {% endif %}
40
+ {% endif %}
41
+ ),
42
+ __scd1 as (
43
+ select
44
+ *,
45
+ __scd1_next_operation <=> 'delete' or __operation == 'delete' as __is_deleted,
46
+ not (__scd1_next_operation <=> 'delete' or __operation == 'delete') as __is_current
47
+ from __scd1_last_key
48
+ where true and __scd1_rn == 1
49
+ ),
50
+ {% if mode == "complete" %}
51
+ __final as (
52
+ select {% for output in outputs %} `{{ output }}`, {% endfor %}
53
+ from __scd1 s
54
+ where true {% if not soft_delete %} and s.__is_current {% endif %}
55
+ )
56
+ {% else %}
57
+ __merge_condition as (
58
+ select s.*, s.__key as __merge_key, o.__merge_condition
59
+ from __scd1 s
60
+ left join
61
+ (
62
+ select 'upsert' as __operation, 'upsert' as __merge_condition
63
+ {% if has_rows %}
64
+ union all
65
+ select 'delete' as __operation, 'delete' as __merge_condition
66
+ {% endif %}
67
+ ) o
68
+ on s.__operation = o.__operation
69
+ ),
70
+ {% if has_rows %}
71
+ __scd1_no_fake_update as (
72
+ select *
73
+ from __merge_condition m
74
+ left anti join
75
+ __current c on m.__key == c.__key and m.__hash = c.__hash
76
+ {% if has_source %} and m.__source = c.__source {% endif %} and m.__operation == 'upsert'
77
+ ),
78
+ {% endif %}
79
+ __final as (
80
+ select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
81
+ {% if has_rows %} from __scd1_no_fake_update m
82
+ {% else %} from __merge_condition m
83
+ {% endif %}
84
+ )
85
+ {% endif %}
@@ -0,0 +1,98 @@
1
+ __scd2_base as (
2
+ select
3
+ *,
4
+ {% if not rectify %} __operation as __original_operation, {% endif %}
5
+ lead(__operation) over (
6
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp
7
+ ) as __scd2_next_operation,
8
+ lead(__timestamp) over (
9
+ partition by {% if has_source %} __source, {% endif %} __key order by __timestamp
10
+ ) as __scd2_next_timestamp
11
+ from {{ parent_cdc }}
12
+ ),
13
+ __scd2 as (
14
+ select
15
+ *,
16
+ __timestamp as __valid_from,
17
+ coalesce(__scd2_next_timestamp - interval 1 second, cast('9999-12-31' as timestamp)) as __valid_to,
18
+ __operation <> 'delete' and __valid_to <=> '9999-12-31' as __is_current,
19
+ __operation == 'delete' or __scd2_next_operation <=> 'delete' as __is_deleted,
20
+ {% if mode == "update" %}
21
+ row_number() over (
22
+ partition by __key{% if has_source %}, __source{% endif %} order by __timestamp asc
23
+ ) as __scd2_rn
24
+ {% endif %}
25
+ from __scd2_base
26
+ ),
27
+ {% if mode == "complete" %}
28
+ __complete as (select s.* from __scd2 s where true and not __operation <=> 'delete'),
29
+ {% if correct_valid_from %}
30
+ __correct_valid_from as (
31
+ select
32
+ * except (__valid_from),
33
+ if(
34
+ __valid_from == min(__valid_from) over (partition by null),
35
+ cast('1900-01-01' as timestamp),
36
+ __valid_from
37
+ ) as __valid_from
38
+ from __complete
39
+ ),
40
+ {% endif %}
41
+ __final as (
42
+ select {% for output in outputs %} `{{ output }}`, {% endfor %}
43
+ {% if correct_valid_from %} from __correct_valid_from
44
+ {% else %} from __complete
45
+ {% endif %}
46
+ )
47
+ {% else %}
48
+ {% if has_rows %}
49
+ __scd2_no_fake_update as (
50
+ select
51
+ * except (__scd2_rn),
52
+ row_number() over (partition by `__key` order by `__timestamp` asc) as `__scd2_rn`
53
+ from __scd2 s
54
+ left anti join
55
+ __current c on s.__key == c.__key and s.__hash == c.__hash
56
+ {% if has_source %} and s.__source == c.__source {% endif %}
57
+ and s.__operation == 'upsert'
58
+ and s.__scd2_rn == 1
59
+ ),
60
+ {% endif %}
61
+ __merge_condition as (
62
+ select s.*, if(__merge_condition == 'insert', null, __key) as __merge_key, o.__merge_condition
63
+ {% if has_rows %} from __scd2_no_fake_update s
64
+ {% else %} from __scd2 s
65
+ {% endif %}
66
+ inner join
67
+ (
68
+ select 'upsert' as __operation, 'insert' as __merge_condition
69
+ {% if has_rows %}
70
+ union all
71
+ select 'upsert' as __operation, 'update' as __merge_condition
72
+ union all
73
+ select 'delete' as __operation, 'delete' as __merge_condition
74
+ {% endif %}
75
+ ) o
76
+ on s.__operation = o.__operation
77
+ -- only the first record can be an update or a delete
78
+ where (s.__scd2_rn == 1 and o.__merge_condition in ('update', 'delete')) or o.__merge_condition == 'insert'
79
+ ),
80
+ {% if correct_valid_from %}
81
+ __correct_valid_from as (
82
+ select
83
+ * except (__valid_from),
84
+ if(
85
+ __valid_from == min(__valid_from) over (partition by null),
86
+ cast('1900-01-01' as timestamp),
87
+ __valid_from
88
+ ) as __valid_from
89
+ from __merge_condition
90
+ ),
91
+ {% endif %}
92
+ __final as (
93
+ select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
94
+ {% if correct_valid_from %} from __correct_valid_from
95
+ {% else %} from __merge_condition
96
+ {% endif %}
97
+ )
98
+ {% endif %}
@@ -0,0 +1,15 @@
1
+ {% include 'queries/context.sql.jinja' %}
2
+ {% include 'ctes/base.sql.jinja' %}
3
+ {% if slice %} {% include 'ctes/slice.sql.jinja' %} {% endif %}
4
+ {% if deduplicate_key %} {% include 'ctes/deduplicate_key.sql.jinja' %} {% endif %}
5
+ {% if mode == "update" %} {% if has_rows %} {% include 'ctes/current.sql.jinja' %} {% endif %} {% endif %}
6
+ {% if rectify %} {% include 'ctes/rectify.sql.jinja' %} {% endif %}
7
+ {% if deduplicate_hash %} {% include 'ctes/deduplicate_hash.sql.jinja' %} {% endif %}
8
+ {% if cdc == "nocdc" %}
9
+ {% if mode == "update" %} {% include 'queries/nocdc/update.sql.jinja' %}
10
+ {% else %} {% include 'queries/nocdc/complete.sql.jinja' %}
11
+ {% endif %}
12
+ {% endif %}
13
+ {% if cdc == "scd1" %} {% include 'queries/scd1.sql.jinja' %} {% endif %}
14
+ {% if cdc == "scd2" %} {% include 'queries/scd2.sql.jinja' %} {% endif %}
15
+ {% include 'queries/final.sql.jinja' %}
@@ -0,0 +1,72 @@
1
+ from fabricks.context.config import (
2
+ IS_DEBUGMODE,
3
+ IS_DEVMODE,
4
+ IS_JOB_CONFIG_FROM_YAML,
5
+ LOGLEVEL,
6
+ PATH_CONFIG,
7
+ PATH_NOTEBOOKS,
8
+ PATH_RUNTIME,
9
+ )
10
+ from fabricks.context.runtime import (
11
+ BRONZE,
12
+ CATALOG,
13
+ CONF_RUNTIME,
14
+ FABRICKS_STORAGE,
15
+ FABRICKS_STORAGE_CREDENTIAL,
16
+ GOLD,
17
+ IS_TYPE_WIDENING,
18
+ IS_UNITY_CATALOG,
19
+ PATH_EXTENDERS,
20
+ PATH_MASKS,
21
+ PATH_PARSERS,
22
+ PATH_REQUIREMENTS,
23
+ PATH_SCHEDULES,
24
+ PATH_UDFS,
25
+ PATH_VIEWS,
26
+ PATHS_RUNTIME,
27
+ PATHS_STORAGE,
28
+ SECRET_SCOPE,
29
+ SILVER,
30
+ STEPS,
31
+ TIMEZONE,
32
+ VARIABLES,
33
+ )
34
+ from fabricks.context.spark_session import DBUTILS, SPARK, build_spark_session, init_spark_session
35
+ from fabricks.context.utils import pprint_runtime
36
+
37
+ __all__ = [
38
+ "BRONZE",
39
+ "build_spark_session",
40
+ "CATALOG",
41
+ "CONF_RUNTIME",
42
+ "DBUTILS",
43
+ "FABRICKS_STORAGE_CREDENTIAL",
44
+ "FABRICKS_STORAGE",
45
+ "GOLD",
46
+ "init_spark_session",
47
+ "IS_DEBUGMODE",
48
+ "IS_DEVMODE",
49
+ "IS_JOB_CONFIG_FROM_YAML",
50
+ "IS_TYPE_WIDENING",
51
+ "IS_UNITY_CATALOG",
52
+ "LOGLEVEL",
53
+ "PATH_CONFIG",
54
+ "PATH_EXTENDERS",
55
+ "PATH_MASKS",
56
+ "PATH_NOTEBOOKS",
57
+ "PATH_PARSERS",
58
+ "PATH_REQUIREMENTS",
59
+ "PATH_RUNTIME",
60
+ "PATH_SCHEDULES",
61
+ "PATH_UDFS",
62
+ "PATH_VIEWS",
63
+ "PATHS_RUNTIME",
64
+ "pprint_runtime",
65
+ "PATHS_STORAGE",
66
+ "SECRET_SCOPE",
67
+ "SILVER",
68
+ "SPARK",
69
+ "STEPS",
70
+ "TIMEZONE",
71
+ "VARIABLES",
72
+ ]
@@ -0,0 +1,133 @@
1
+ from typing import List, Optional, TypedDict
2
+
3
+
4
+ class RuntimePathOptions(TypedDict):
5
+ storage: str
6
+ udfs: str
7
+ parsers: str
8
+ schedules: str
9
+ views: str
10
+ requirements: str
11
+
12
+
13
+ class RuntimeTimeoutOptions(TypedDict):
14
+ step: int
15
+ job: int
16
+ pre_run: int
17
+ post_run: int
18
+
19
+
20
+ class StepTimeoutOptions(TypedDict):
21
+ step: Optional[int]
22
+ job: Optional[int]
23
+ pre_run: Optional[int]
24
+ post_run: Optional[int]
25
+
26
+
27
+ class RuntimeOptions(TypedDict):
28
+ secret_scope: str
29
+ unity_catalog: Optional[bool]
30
+ type_widening: Optional[bool]
31
+ catalog: Optional[str]
32
+ workers: int
33
+ timeouts: RuntimeTimeoutOptions
34
+ retention_days: int
35
+ timezone: Optional[str]
36
+
37
+
38
+ class SparkOptions(TypedDict):
39
+ sql: dict
40
+ conf: dict
41
+
42
+
43
+ class StepPathOptions(TypedDict):
44
+ runtime: str
45
+ storage: str
46
+
47
+
48
+ class InvokeOptions(TypedDict):
49
+ notebook: str
50
+ arguments: Optional[dict[str, str]]
51
+
52
+
53
+ class ExtenderOptions(TypedDict):
54
+ extender: str
55
+ arguments: Optional[dict[str, str]]
56
+
57
+
58
+ class StepOptions(TypedDict):
59
+ order: int
60
+ workers: Optional[int]
61
+ timeouts: StepTimeoutOptions
62
+ extenders: Optional[List[str]]
63
+ pre_run: Optional[InvokeOptions]
64
+ post_run: Optional[InvokeOptions]
65
+
66
+
67
+ class SilverOptions(StepOptions):
68
+ parent: str
69
+ stream: Optional[bool]
70
+ local_checkpoint: Optional[bool]
71
+
72
+
73
+ class GoldOptions(StepOptions):
74
+ schema_drift: Optional[bool]
75
+ metadata: Optional[bool]
76
+
77
+
78
+ class Step(TypedDict):
79
+ name: str
80
+
81
+
82
+ class TableOptions(TypedDict):
83
+ powerbi: Optional[bool]
84
+ liquid_clustering: Optional[bool]
85
+ properties: Optional[dict[str, str]]
86
+ retention_days: Optional[int]
87
+ masks: Optional[dict[str, str]]
88
+
89
+
90
+ class Bronze(Step):
91
+ options: StepOptions
92
+ path_options: StepPathOptions
93
+ table_options: Optional[TableOptions]
94
+
95
+
96
+ class Silver(Step):
97
+ options: SilverOptions
98
+ path_options: StepPathOptions
99
+ table_options: Optional[TableOptions]
100
+
101
+
102
+ class Gold(Step):
103
+ options: GoldOptions
104
+ path_options: StepPathOptions
105
+ table_options: Optional[TableOptions]
106
+
107
+
108
+ class PowerBI(Step):
109
+ pass
110
+
111
+
112
+ class DatabasePathOptions(TypedDict):
113
+ storage: str
114
+
115
+
116
+ class Database(TypedDict):
117
+ name: str
118
+ path_options: DatabasePathOptions
119
+
120
+
121
+ class Conf(TypedDict):
122
+ name: str
123
+ options: RuntimeOptions
124
+ path_options: RuntimePathOptions
125
+ extender_options: Optional[ExtenderOptions]
126
+ spark_options: SparkOptions
127
+ bronze: Optional[List[Bronze]]
128
+ silver: Optional[List[Silver]]
129
+ gold: Optional[List[Gold]]
130
+ powerbi: Optional[List[PowerBI]]
131
+ databases: Optional[List[Database]]
132
+ variables: Optional[List[dict[str, str]]]
133
+ credentials: Optional[List[dict[str, str]]]
@@ -0,0 +1,92 @@
1
+ import logging
2
+ import os
3
+ from typing import Final
4
+
5
+ from fabricks.context.config.utils import get_config_from_file
6
+ from fabricks.utils.path import Path
7
+ from fabricks.utils.spark import spark
8
+
9
+ file_path, file_config = get_config_from_file()
10
+
11
+ runtime = os.environ.get("FABRICKS_RUNTIME", "none")
12
+ runtime = None if runtime.lower() == "none" else runtime
13
+ if runtime is None:
14
+ if runtime := file_config.get("runtime"):
15
+ assert file_path is not None
16
+ runtime = file_path.joinpath(runtime)
17
+
18
+ if runtime is None:
19
+ if file_path is not None:
20
+ runtime = file_path
21
+ else:
22
+ raise ValueError(
23
+ "could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
24
+ )
25
+
26
+ path_runtime = Path(runtime, assume_git=True)
27
+ PATH_RUNTIME: Final[Path] = path_runtime
28
+
29
+ notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
30
+ notebooks = None if notebooks.lower() == "none" else notebooks
31
+ if notebooks is None:
32
+ if notebooks := file_config.get("notebooks"):
33
+ assert file_path is not None
34
+ notebooks = file_path.joinpath(notebooks)
35
+
36
+ notebooks = notebooks if notebooks else path_runtime.joinpath("notebooks")
37
+ PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
38
+
39
+ is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
40
+ if is_job_config_from_yaml is None:
41
+ assert file_path is not None
42
+ is_job_config_from_yaml = file_config.get("job_config_from_yaml")
43
+
44
+ IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
45
+
46
+ is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
47
+ if is_debugmode is None:
48
+ is_debugmode = file_config.get("debugmode")
49
+
50
+ IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
51
+
52
+ is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
53
+ if is_devmode is None:
54
+ is_devmode = file_config.get("devmode")
55
+
56
+ IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
57
+
58
+ loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
59
+ if loglevel is None:
60
+ loglevel = file_config.get("loglevel")
61
+
62
+ loglevel = loglevel.upper() if loglevel else "INFO"
63
+ if loglevel == "DEBUG":
64
+ _loglevel = logging.DEBUG
65
+ elif loglevel == "INFO":
66
+ _loglevel = logging.INFO
67
+ elif loglevel == "WARNING":
68
+ _loglevel = logging.WARNING
69
+ elif loglevel == "ERROR":
70
+ _loglevel = logging.ERROR
71
+ elif loglevel == "CRITICAL":
72
+ _loglevel = logging.CRITICAL
73
+ else:
74
+ raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
75
+
76
+ LOGLEVEL = _loglevel
77
+
78
+ path_config = os.environ.get("FABRICKS_CONFIG")
79
+ if path_config is None:
80
+ if path_config := file_config.get("config"):
81
+ assert file_path is not None
82
+ path_config = file_path.joinpath(path_config)
83
+ else:
84
+ path_config = PATH_RUNTIME.joinpath(path_config).string if path_config else None
85
+
86
+ if not path_config:
87
+ path_config = PATH_RUNTIME.joinpath(
88
+ "fabricks",
89
+ f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
90
+ ).string
91
+
92
+ PATH_CONFIG: Final[Path] = Path(path_config, assume_git=True)
@@ -0,0 +1,53 @@
1
+ def get_config_from_toml():
2
+ import os
3
+ import pathlib
4
+ import sys
5
+
6
+ if sys.version_info >= (3, 11):
7
+ import tomllib
8
+ else:
9
+ import tomli as tomllib # type: ignore
10
+
11
+ path = pathlib.Path(os.getcwd())
12
+ while path is not None and not (path / "pyproject.toml").exists():
13
+ if path == path.parent:
14
+ break
15
+ path = path.parent
16
+
17
+ if (path / "pyproject.toml").exists():
18
+ with open((path / "pyproject.toml"), "rb") as f:
19
+ config = tomllib.load(f)
20
+ return path, config.get("tool", {}).get("fabricks", {})
21
+
22
+ return None, {}
23
+
24
+
25
+ def get_config_from_json():
26
+ import json
27
+ import os
28
+ import pathlib
29
+
30
+ path = pathlib.Path(os.getcwd())
31
+ while path is not None and not (path / "fabricksconfig.json").exists():
32
+ if path == path.parent:
33
+ break
34
+ path = path.parent
35
+
36
+ if (path / "fabricksconfig.json").exists():
37
+ with open((path / "fabricksconfig.json"), "r") as f:
38
+ config = json.load(f)
39
+ return path, config
40
+
41
+ return None, {}
42
+
43
+
44
+ def get_config_from_file():
45
+ json_path, json_config = get_config_from_json()
46
+ if json_config:
47
+ return json_path, json_config
48
+
49
+ pyproject_path, pyproject_config = get_config_from_toml()
50
+ if pyproject_config:
51
+ return pyproject_path, pyproject_config
52
+
53
+ return None, {}
@@ -0,0 +1,77 @@
1
+ import json
2
+ import logging
3
+ from typing import Final, Literal, Optional
4
+
5
+ import requests
6
+
7
+ from fabricks.context import IS_DEBUGMODE, LOGLEVEL, SECRET_SCOPE, TIMEZONE
8
+ from fabricks.utils.log import get_logger
9
+
10
+ logger, _ = get_logger(
11
+ "logs",
12
+ LOGLEVEL,
13
+ table=None,
14
+ debugmode=IS_DEBUGMODE,
15
+ timezone=TIMEZONE,
16
+ )
17
+ logging.getLogger("SQLQueryContextLogger").setLevel(logging.CRITICAL)
18
+
19
+ DEFAULT_LOGGER: Final[logging.Logger] = logger
20
+
21
+
22
+ def send_message_to_channel(
23
+ channel: str,
24
+ title: str,
25
+ message: str,
26
+ color: Optional[str] = None,
27
+ loglevel: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO",
28
+ ) -> bool:
29
+ """
30
+ Send a message to Microsoft Teams via webhook
31
+
32
+ Args:
33
+ webhook_url (str): The webhook URL for your Teams channel
34
+ message (str): The message to send
35
+ title (str, optional): Title for the message card
36
+ color (str, optional): Hex color for the message card)
37
+
38
+ Returns:
39
+ bool: True if message was sent successfully, False otherwise
40
+ """
41
+ from databricks.sdk.runtime import dbutils
42
+
43
+ channel = channel.lower()
44
+ channel = channel.replace(" ", "-")
45
+ webhook_url = dbutils.secrets.get(scope=SECRET_SCOPE, key=f"{channel}-webhook-url")
46
+
47
+ teams_message = {
48
+ "@type": "MessageCard",
49
+ "@context": "http://schema.org/extensions",
50
+ "summary": title,
51
+ }
52
+
53
+ if title:
54
+ teams_message["title"] = title
55
+
56
+ if color:
57
+ teams_message["themeColor"] = color
58
+ else:
59
+ COLORS = {
60
+ "DEBUG": "#00FFFF",
61
+ "INFO": "#00FF00 ",
62
+ "WARNING": "#FFFF00 ",
63
+ "ERROR": "#FF0000 ",
64
+ "CRITICAL": "#FF0000",
65
+ }
66
+ color = COLORS[loglevel]
67
+ teams_message["themeColor"] = color
68
+
69
+ teams_message["text"] = message
70
+
71
+ teams_message_json = json.dumps(teams_message)
72
+
73
+ response = requests.post(webhook_url, data=teams_message_json, headers={"Content-Type": "application/json"})
74
+ if response.status_code == 200:
75
+ return True
76
+ else:
77
+ return False