fabricks 3.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/__init__.py +0 -0
- fabricks/api/__init__.py +11 -0
- fabricks/api/cdc/__init__.py +6 -0
- fabricks/api/cdc/nocdc.py +3 -0
- fabricks/api/cdc/scd1.py +3 -0
- fabricks/api/cdc/scd2.py +3 -0
- fabricks/api/context.py +27 -0
- fabricks/api/core.py +4 -0
- fabricks/api/deploy.py +3 -0
- fabricks/api/exceptions.py +19 -0
- fabricks/api/extenders.py +3 -0
- fabricks/api/job_schema.py +3 -0
- fabricks/api/log.py +3 -0
- fabricks/api/masks.py +3 -0
- fabricks/api/metastore/__init__.py +10 -0
- fabricks/api/metastore/database.py +3 -0
- fabricks/api/metastore/table.py +3 -0
- fabricks/api/metastore/view.py +6 -0
- fabricks/api/notebooks/__init__.py +0 -0
- fabricks/api/notebooks/cluster.py +6 -0
- fabricks/api/notebooks/initialize.py +42 -0
- fabricks/api/notebooks/process.py +54 -0
- fabricks/api/notebooks/run.py +59 -0
- fabricks/api/notebooks/schedule.py +75 -0
- fabricks/api/notebooks/terminate.py +31 -0
- fabricks/api/parsers.py +3 -0
- fabricks/api/schedules.py +3 -0
- fabricks/api/udfs.py +3 -0
- fabricks/api/utils.py +9 -0
- fabricks/api/version.py +3 -0
- fabricks/api/views.py +6 -0
- fabricks/cdc/__init__.py +14 -0
- fabricks/cdc/base/__init__.py +4 -0
- fabricks/cdc/base/_types.py +10 -0
- fabricks/cdc/base/cdc.py +5 -0
- fabricks/cdc/base/configurator.py +223 -0
- fabricks/cdc/base/generator.py +177 -0
- fabricks/cdc/base/merger.py +110 -0
- fabricks/cdc/base/processor.py +471 -0
- fabricks/cdc/cdc.py +5 -0
- fabricks/cdc/nocdc.py +20 -0
- fabricks/cdc/scd.py +22 -0
- fabricks/cdc/scd1.py +15 -0
- fabricks/cdc/scd2.py +15 -0
- fabricks/cdc/templates/__init__.py +0 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
- fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
- fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
- fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
- fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
- fabricks/cdc/templates/filter.sql.jinja +4 -0
- fabricks/cdc/templates/filters/final.sql.jinja +4 -0
- fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
- fabricks/cdc/templates/filters/update.sql.jinja +30 -0
- fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
- fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
- fabricks/cdc/templates/merge.sql.jinja +3 -0
- fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
- fabricks/cdc/templates/queries/__init__.py +0 -0
- fabricks/cdc/templates/queries/context.sql.jinja +186 -0
- fabricks/cdc/templates/queries/final.sql.jinja +1 -0
- fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
- fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
- fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
- fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
- fabricks/cdc/templates/query.sql.jinja +15 -0
- fabricks/context/__init__.py +72 -0
- fabricks/context/_types.py +133 -0
- fabricks/context/config/__init__.py +92 -0
- fabricks/context/config/utils.py +53 -0
- fabricks/context/log.py +77 -0
- fabricks/context/runtime.py +117 -0
- fabricks/context/secret.py +103 -0
- fabricks/context/spark_session.py +82 -0
- fabricks/context/utils.py +80 -0
- fabricks/core/__init__.py +4 -0
- fabricks/core/dags/__init__.py +9 -0
- fabricks/core/dags/base.py +99 -0
- fabricks/core/dags/generator.py +157 -0
- fabricks/core/dags/log.py +12 -0
- fabricks/core/dags/processor.py +228 -0
- fabricks/core/dags/run.py +39 -0
- fabricks/core/dags/terminator.py +25 -0
- fabricks/core/dags/utils.py +54 -0
- fabricks/core/extenders.py +33 -0
- fabricks/core/job_schema.py +32 -0
- fabricks/core/jobs/__init__.py +21 -0
- fabricks/core/jobs/base/__init__.py +10 -0
- fabricks/core/jobs/base/_types.py +284 -0
- fabricks/core/jobs/base/checker.py +139 -0
- fabricks/core/jobs/base/configurator.py +306 -0
- fabricks/core/jobs/base/exception.py +85 -0
- fabricks/core/jobs/base/generator.py +447 -0
- fabricks/core/jobs/base/invoker.py +206 -0
- fabricks/core/jobs/base/job.py +5 -0
- fabricks/core/jobs/base/processor.py +249 -0
- fabricks/core/jobs/bronze.py +395 -0
- fabricks/core/jobs/get_job.py +127 -0
- fabricks/core/jobs/get_job_conf.py +152 -0
- fabricks/core/jobs/get_job_id.py +31 -0
- fabricks/core/jobs/get_jobs.py +107 -0
- fabricks/core/jobs/get_schedule.py +10 -0
- fabricks/core/jobs/get_schedules.py +32 -0
- fabricks/core/jobs/gold.py +415 -0
- fabricks/core/jobs/silver.py +373 -0
- fabricks/core/masks.py +52 -0
- fabricks/core/parsers/__init__.py +12 -0
- fabricks/core/parsers/_types.py +6 -0
- fabricks/core/parsers/base.py +95 -0
- fabricks/core/parsers/decorator.py +11 -0
- fabricks/core/parsers/get_parser.py +26 -0
- fabricks/core/parsers/utils.py +69 -0
- fabricks/core/schedules/__init__.py +14 -0
- fabricks/core/schedules/diagrams.py +21 -0
- fabricks/core/schedules/generate.py +20 -0
- fabricks/core/schedules/get_schedule.py +5 -0
- fabricks/core/schedules/get_schedules.py +9 -0
- fabricks/core/schedules/process.py +9 -0
- fabricks/core/schedules/run.py +3 -0
- fabricks/core/schedules/terminate.py +6 -0
- fabricks/core/schedules/views.py +61 -0
- fabricks/core/steps/__init__.py +4 -0
- fabricks/core/steps/_types.py +7 -0
- fabricks/core/steps/base.py +423 -0
- fabricks/core/steps/get_step.py +10 -0
- fabricks/core/steps/get_step_conf.py +26 -0
- fabricks/core/udfs.py +106 -0
- fabricks/core/views.py +41 -0
- fabricks/deploy/__init__.py +92 -0
- fabricks/deploy/masks.py +8 -0
- fabricks/deploy/notebooks.py +71 -0
- fabricks/deploy/schedules.py +10 -0
- fabricks/deploy/tables.py +82 -0
- fabricks/deploy/udfs.py +19 -0
- fabricks/deploy/utils.py +36 -0
- fabricks/deploy/views.py +509 -0
- fabricks/metastore/README.md +3 -0
- fabricks/metastore/__init__.py +5 -0
- fabricks/metastore/_types.py +65 -0
- fabricks/metastore/database.py +65 -0
- fabricks/metastore/dbobject.py +66 -0
- fabricks/metastore/pyproject.toml +20 -0
- fabricks/metastore/table.py +768 -0
- fabricks/metastore/utils.py +51 -0
- fabricks/metastore/view.py +53 -0
- fabricks/utils/__init__.py +0 -0
- fabricks/utils/_types.py +6 -0
- fabricks/utils/azure_queue.py +93 -0
- fabricks/utils/azure_table.py +154 -0
- fabricks/utils/console.py +51 -0
- fabricks/utils/fdict.py +240 -0
- fabricks/utils/helpers.py +228 -0
- fabricks/utils/log.py +236 -0
- fabricks/utils/mermaid.py +32 -0
- fabricks/utils/path.py +242 -0
- fabricks/utils/pip.py +61 -0
- fabricks/utils/pydantic.py +94 -0
- fabricks/utils/read/__init__.py +11 -0
- fabricks/utils/read/_types.py +3 -0
- fabricks/utils/read/read.py +305 -0
- fabricks/utils/read/read_excel.py +5 -0
- fabricks/utils/read/read_yaml.py +33 -0
- fabricks/utils/schema/__init__.py +7 -0
- fabricks/utils/schema/get_json_schema_for_type.py +161 -0
- fabricks/utils/schema/get_schema_for_type.py +99 -0
- fabricks/utils/spark.py +76 -0
- fabricks/utils/sqlglot.py +56 -0
- fabricks/utils/write/__init__.py +8 -0
- fabricks/utils/write/delta.py +46 -0
- fabricks/utils/write/stream.py +27 -0
- fabricks-3.0.11.dist-info/METADATA +23 -0
- fabricks-3.0.11.dist-info/RECORD +176 -0
- fabricks-3.0.11.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
__scd1_base as (
|
|
2
|
+
select
|
|
3
|
+
*,
|
|
4
|
+
{% if not rectify %} __operation as __original_operation, {% endif %}
|
|
5
|
+
lead(__operation) over (
|
|
6
|
+
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
|
|
7
|
+
) as __scd1_next_operation
|
|
8
|
+
from {{ parent_cdc }}
|
|
9
|
+
),
|
|
10
|
+
__scd1_last_key as (
|
|
11
|
+
-- take last update as it is the latest picture
|
|
12
|
+
select
|
|
13
|
+
*,
|
|
14
|
+
row_number() over (
|
|
15
|
+
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp desc
|
|
16
|
+
) as __scd1_rn
|
|
17
|
+
from __scd1_base
|
|
18
|
+
where true and __operation == 'upsert'
|
|
19
|
+
{% if mode == "update" %}
|
|
20
|
+
{% if has_rows %}
|
|
21
|
+
-- take first delete ONLY if no upsert is present
|
|
22
|
+
union all
|
|
23
|
+
select
|
|
24
|
+
*,
|
|
25
|
+
row_number() over (
|
|
26
|
+
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp asc
|
|
27
|
+
) as __scd1_rn
|
|
28
|
+
from __scd1_base b
|
|
29
|
+
where
|
|
30
|
+
true
|
|
31
|
+
and __operation == 'delete'
|
|
32
|
+
and not exists (
|
|
33
|
+
select 1
|
|
34
|
+
from __scd1_base b2
|
|
35
|
+
where
|
|
36
|
+
true and b.__key == b2.__key
|
|
37
|
+
{% if has_source %} and b.__source == b2.__source {% endif %} and b2.__operation == 'upsert'
|
|
38
|
+
)
|
|
39
|
+
{% endif %}
|
|
40
|
+
{% endif %}
|
|
41
|
+
),
|
|
42
|
+
__scd1 as (
|
|
43
|
+
select
|
|
44
|
+
*,
|
|
45
|
+
__scd1_next_operation <=> 'delete' or __operation == 'delete' as __is_deleted,
|
|
46
|
+
not (__scd1_next_operation <=> 'delete' or __operation == 'delete') as __is_current
|
|
47
|
+
from __scd1_last_key
|
|
48
|
+
where true and __scd1_rn == 1
|
|
49
|
+
),
|
|
50
|
+
{% if mode == "complete" %}
|
|
51
|
+
__final as (
|
|
52
|
+
select {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
53
|
+
from __scd1 s
|
|
54
|
+
where true {% if not soft_delete %} and s.__is_current {% endif %}
|
|
55
|
+
)
|
|
56
|
+
{% else %}
|
|
57
|
+
__merge_condition as (
|
|
58
|
+
select s.*, s.__key as __merge_key, o.__merge_condition
|
|
59
|
+
from __scd1 s
|
|
60
|
+
left join
|
|
61
|
+
(
|
|
62
|
+
select 'upsert' as __operation, 'upsert' as __merge_condition
|
|
63
|
+
{% if has_rows %}
|
|
64
|
+
union all
|
|
65
|
+
select 'delete' as __operation, 'delete' as __merge_condition
|
|
66
|
+
{% endif %}
|
|
67
|
+
) o
|
|
68
|
+
on s.__operation = o.__operation
|
|
69
|
+
),
|
|
70
|
+
{% if has_rows %}
|
|
71
|
+
__scd1_no_fake_update as (
|
|
72
|
+
select *
|
|
73
|
+
from __merge_condition m
|
|
74
|
+
left anti join
|
|
75
|
+
__current c on m.__key == c.__key and m.__hash = c.__hash
|
|
76
|
+
{% if has_source %} and m.__source = c.__source {% endif %} and m.__operation == 'upsert'
|
|
77
|
+
),
|
|
78
|
+
{% endif %}
|
|
79
|
+
__final as (
|
|
80
|
+
select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
81
|
+
{% if has_rows %} from __scd1_no_fake_update m
|
|
82
|
+
{% else %} from __merge_condition m
|
|
83
|
+
{% endif %}
|
|
84
|
+
)
|
|
85
|
+
{% endif %}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
__scd2_base as (
|
|
2
|
+
select
|
|
3
|
+
*,
|
|
4
|
+
{% if not rectify %} __operation as __original_operation, {% endif %}
|
|
5
|
+
lead(__operation) over (
|
|
6
|
+
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp
|
|
7
|
+
) as __scd2_next_operation,
|
|
8
|
+
lead(__timestamp) over (
|
|
9
|
+
partition by {% if has_source %} __source, {% endif %} __key order by __timestamp
|
|
10
|
+
) as __scd2_next_timestamp
|
|
11
|
+
from {{ parent_cdc }}
|
|
12
|
+
),
|
|
13
|
+
__scd2 as (
|
|
14
|
+
select
|
|
15
|
+
*,
|
|
16
|
+
__timestamp as __valid_from,
|
|
17
|
+
coalesce(__scd2_next_timestamp - interval 1 second, cast('9999-12-31' as timestamp)) as __valid_to,
|
|
18
|
+
__operation <> 'delete' and __valid_to <=> '9999-12-31' as __is_current,
|
|
19
|
+
__operation == 'delete' or __scd2_next_operation <=> 'delete' as __is_deleted,
|
|
20
|
+
{% if mode == "update" %}
|
|
21
|
+
row_number() over (
|
|
22
|
+
partition by __key{% if has_source %}, __source{% endif %} order by __timestamp asc
|
|
23
|
+
) as __scd2_rn
|
|
24
|
+
{% endif %}
|
|
25
|
+
from __scd2_base
|
|
26
|
+
),
|
|
27
|
+
{% if mode == "complete" %}
|
|
28
|
+
__complete as (select s.* from __scd2 s where true and not __operation <=> 'delete'),
|
|
29
|
+
{% if correct_valid_from %}
|
|
30
|
+
__correct_valid_from as (
|
|
31
|
+
select
|
|
32
|
+
* except (__valid_from),
|
|
33
|
+
if(
|
|
34
|
+
__valid_from == min(__valid_from) over (partition by null),
|
|
35
|
+
cast('1900-01-01' as timestamp),
|
|
36
|
+
__valid_from
|
|
37
|
+
) as __valid_from
|
|
38
|
+
from __complete
|
|
39
|
+
),
|
|
40
|
+
{% endif %}
|
|
41
|
+
__final as (
|
|
42
|
+
select {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
43
|
+
{% if correct_valid_from %} from __correct_valid_from
|
|
44
|
+
{% else %} from __complete
|
|
45
|
+
{% endif %}
|
|
46
|
+
)
|
|
47
|
+
{% else %}
|
|
48
|
+
{% if has_rows %}
|
|
49
|
+
__scd2_no_fake_update as (
|
|
50
|
+
select
|
|
51
|
+
* except (__scd2_rn),
|
|
52
|
+
row_number() over (partition by `__key` order by `__timestamp` asc) as `__scd2_rn`
|
|
53
|
+
from __scd2 s
|
|
54
|
+
left anti join
|
|
55
|
+
__current c on s.__key == c.__key and s.__hash == c.__hash
|
|
56
|
+
{% if has_source %} and s.__source == c.__source {% endif %}
|
|
57
|
+
and s.__operation == 'upsert'
|
|
58
|
+
and s.__scd2_rn == 1
|
|
59
|
+
),
|
|
60
|
+
{% endif %}
|
|
61
|
+
__merge_condition as (
|
|
62
|
+
select s.*, if(__merge_condition == 'insert', null, __key) as __merge_key, o.__merge_condition
|
|
63
|
+
{% if has_rows %} from __scd2_no_fake_update s
|
|
64
|
+
{% else %} from __scd2 s
|
|
65
|
+
{% endif %}
|
|
66
|
+
inner join
|
|
67
|
+
(
|
|
68
|
+
select 'upsert' as __operation, 'insert' as __merge_condition
|
|
69
|
+
{% if has_rows %}
|
|
70
|
+
union all
|
|
71
|
+
select 'upsert' as __operation, 'update' as __merge_condition
|
|
72
|
+
union all
|
|
73
|
+
select 'delete' as __operation, 'delete' as __merge_condition
|
|
74
|
+
{% endif %}
|
|
75
|
+
) o
|
|
76
|
+
on s.__operation = o.__operation
|
|
77
|
+
-- only the first record can be an update or a delete
|
|
78
|
+
where (s.__scd2_rn == 1 and o.__merge_condition in ('update', 'delete')) or o.__merge_condition == 'insert'
|
|
79
|
+
),
|
|
80
|
+
{% if correct_valid_from %}
|
|
81
|
+
__correct_valid_from as (
|
|
82
|
+
select
|
|
83
|
+
* except (__valid_from),
|
|
84
|
+
if(
|
|
85
|
+
__valid_from == min(__valid_from) over (partition by null),
|
|
86
|
+
cast('1900-01-01' as timestamp),
|
|
87
|
+
__valid_from
|
|
88
|
+
) as __valid_from
|
|
89
|
+
from __merge_condition
|
|
90
|
+
),
|
|
91
|
+
{% endif %}
|
|
92
|
+
__final as (
|
|
93
|
+
select __merge_key, __merge_condition, {% for output in outputs %} `{{ output }}`, {% endfor %}
|
|
94
|
+
{% if correct_valid_from %} from __correct_valid_from
|
|
95
|
+
{% else %} from __merge_condition
|
|
96
|
+
{% endif %}
|
|
97
|
+
)
|
|
98
|
+
{% endif %}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{% include 'queries/context.sql.jinja' %}
|
|
2
|
+
{% include 'ctes/base.sql.jinja' %}
|
|
3
|
+
{% if slice %} {% include 'ctes/slice.sql.jinja' %} {% endif %}
|
|
4
|
+
{% if deduplicate_key %} {% include 'ctes/deduplicate_key.sql.jinja' %} {% endif %}
|
|
5
|
+
{% if mode == "update" %} {% if has_rows %} {% include 'ctes/current.sql.jinja' %} {% endif %} {% endif %}
|
|
6
|
+
{% if rectify %} {% include 'ctes/rectify.sql.jinja' %} {% endif %}
|
|
7
|
+
{% if deduplicate_hash %} {% include 'ctes/deduplicate_hash.sql.jinja' %} {% endif %}
|
|
8
|
+
{% if cdc == "nocdc" %}
|
|
9
|
+
{% if mode == "update" %} {% include 'queries/nocdc/update.sql.jinja' %}
|
|
10
|
+
{% else %} {% include 'queries/nocdc/complete.sql.jinja' %}
|
|
11
|
+
{% endif %}
|
|
12
|
+
{% endif %}
|
|
13
|
+
{% if cdc == "scd1" %} {% include 'queries/scd1.sql.jinja' %} {% endif %}
|
|
14
|
+
{% if cdc == "scd2" %} {% include 'queries/scd2.sql.jinja' %} {% endif %}
|
|
15
|
+
{% include 'queries/final.sql.jinja' %}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from fabricks.context.config import (
|
|
2
|
+
IS_DEBUGMODE,
|
|
3
|
+
IS_DEVMODE,
|
|
4
|
+
IS_JOB_CONFIG_FROM_YAML,
|
|
5
|
+
LOGLEVEL,
|
|
6
|
+
PATH_CONFIG,
|
|
7
|
+
PATH_NOTEBOOKS,
|
|
8
|
+
PATH_RUNTIME,
|
|
9
|
+
)
|
|
10
|
+
from fabricks.context.runtime import (
|
|
11
|
+
BRONZE,
|
|
12
|
+
CATALOG,
|
|
13
|
+
CONF_RUNTIME,
|
|
14
|
+
FABRICKS_STORAGE,
|
|
15
|
+
FABRICKS_STORAGE_CREDENTIAL,
|
|
16
|
+
GOLD,
|
|
17
|
+
IS_TYPE_WIDENING,
|
|
18
|
+
IS_UNITY_CATALOG,
|
|
19
|
+
PATH_EXTENDERS,
|
|
20
|
+
PATH_MASKS,
|
|
21
|
+
PATH_PARSERS,
|
|
22
|
+
PATH_REQUIREMENTS,
|
|
23
|
+
PATH_SCHEDULES,
|
|
24
|
+
PATH_UDFS,
|
|
25
|
+
PATH_VIEWS,
|
|
26
|
+
PATHS_RUNTIME,
|
|
27
|
+
PATHS_STORAGE,
|
|
28
|
+
SECRET_SCOPE,
|
|
29
|
+
SILVER,
|
|
30
|
+
STEPS,
|
|
31
|
+
TIMEZONE,
|
|
32
|
+
VARIABLES,
|
|
33
|
+
)
|
|
34
|
+
from fabricks.context.spark_session import DBUTILS, SPARK, build_spark_session, init_spark_session
|
|
35
|
+
from fabricks.context.utils import pprint_runtime
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"BRONZE",
|
|
39
|
+
"build_spark_session",
|
|
40
|
+
"CATALOG",
|
|
41
|
+
"CONF_RUNTIME",
|
|
42
|
+
"DBUTILS",
|
|
43
|
+
"FABRICKS_STORAGE_CREDENTIAL",
|
|
44
|
+
"FABRICKS_STORAGE",
|
|
45
|
+
"GOLD",
|
|
46
|
+
"init_spark_session",
|
|
47
|
+
"IS_DEBUGMODE",
|
|
48
|
+
"IS_DEVMODE",
|
|
49
|
+
"IS_JOB_CONFIG_FROM_YAML",
|
|
50
|
+
"IS_TYPE_WIDENING",
|
|
51
|
+
"IS_UNITY_CATALOG",
|
|
52
|
+
"LOGLEVEL",
|
|
53
|
+
"PATH_CONFIG",
|
|
54
|
+
"PATH_EXTENDERS",
|
|
55
|
+
"PATH_MASKS",
|
|
56
|
+
"PATH_NOTEBOOKS",
|
|
57
|
+
"PATH_PARSERS",
|
|
58
|
+
"PATH_REQUIREMENTS",
|
|
59
|
+
"PATH_RUNTIME",
|
|
60
|
+
"PATH_SCHEDULES",
|
|
61
|
+
"PATH_UDFS",
|
|
62
|
+
"PATH_VIEWS",
|
|
63
|
+
"PATHS_RUNTIME",
|
|
64
|
+
"pprint_runtime",
|
|
65
|
+
"PATHS_STORAGE",
|
|
66
|
+
"SECRET_SCOPE",
|
|
67
|
+
"SILVER",
|
|
68
|
+
"SPARK",
|
|
69
|
+
"STEPS",
|
|
70
|
+
"TIMEZONE",
|
|
71
|
+
"VARIABLES",
|
|
72
|
+
]
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from typing import List, Optional, TypedDict
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class RuntimePathOptions(TypedDict):
|
|
5
|
+
storage: str
|
|
6
|
+
udfs: str
|
|
7
|
+
parsers: str
|
|
8
|
+
schedules: str
|
|
9
|
+
views: str
|
|
10
|
+
requirements: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RuntimeTimeoutOptions(TypedDict):
|
|
14
|
+
step: int
|
|
15
|
+
job: int
|
|
16
|
+
pre_run: int
|
|
17
|
+
post_run: int
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class StepTimeoutOptions(TypedDict):
|
|
21
|
+
step: Optional[int]
|
|
22
|
+
job: Optional[int]
|
|
23
|
+
pre_run: Optional[int]
|
|
24
|
+
post_run: Optional[int]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RuntimeOptions(TypedDict):
|
|
28
|
+
secret_scope: str
|
|
29
|
+
unity_catalog: Optional[bool]
|
|
30
|
+
type_widening: Optional[bool]
|
|
31
|
+
catalog: Optional[str]
|
|
32
|
+
workers: int
|
|
33
|
+
timeouts: RuntimeTimeoutOptions
|
|
34
|
+
retention_days: int
|
|
35
|
+
timezone: Optional[str]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SparkOptions(TypedDict):
|
|
39
|
+
sql: dict
|
|
40
|
+
conf: dict
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class StepPathOptions(TypedDict):
|
|
44
|
+
runtime: str
|
|
45
|
+
storage: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class InvokeOptions(TypedDict):
|
|
49
|
+
notebook: str
|
|
50
|
+
arguments: Optional[dict[str, str]]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ExtenderOptions(TypedDict):
|
|
54
|
+
extender: str
|
|
55
|
+
arguments: Optional[dict[str, str]]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class StepOptions(TypedDict):
|
|
59
|
+
order: int
|
|
60
|
+
workers: Optional[int]
|
|
61
|
+
timeouts: StepTimeoutOptions
|
|
62
|
+
extenders: Optional[List[str]]
|
|
63
|
+
pre_run: Optional[InvokeOptions]
|
|
64
|
+
post_run: Optional[InvokeOptions]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class SilverOptions(StepOptions):
|
|
68
|
+
parent: str
|
|
69
|
+
stream: Optional[bool]
|
|
70
|
+
local_checkpoint: Optional[bool]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GoldOptions(StepOptions):
|
|
74
|
+
schema_drift: Optional[bool]
|
|
75
|
+
metadata: Optional[bool]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Step(TypedDict):
|
|
79
|
+
name: str
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class TableOptions(TypedDict):
|
|
83
|
+
powerbi: Optional[bool]
|
|
84
|
+
liquid_clustering: Optional[bool]
|
|
85
|
+
properties: Optional[dict[str, str]]
|
|
86
|
+
retention_days: Optional[int]
|
|
87
|
+
masks: Optional[dict[str, str]]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Bronze(Step):
|
|
91
|
+
options: StepOptions
|
|
92
|
+
path_options: StepPathOptions
|
|
93
|
+
table_options: Optional[TableOptions]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class Silver(Step):
|
|
97
|
+
options: SilverOptions
|
|
98
|
+
path_options: StepPathOptions
|
|
99
|
+
table_options: Optional[TableOptions]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class Gold(Step):
|
|
103
|
+
options: GoldOptions
|
|
104
|
+
path_options: StepPathOptions
|
|
105
|
+
table_options: Optional[TableOptions]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class PowerBI(Step):
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class DatabasePathOptions(TypedDict):
|
|
113
|
+
storage: str
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class Database(TypedDict):
|
|
117
|
+
name: str
|
|
118
|
+
path_options: DatabasePathOptions
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class Conf(TypedDict):
|
|
122
|
+
name: str
|
|
123
|
+
options: RuntimeOptions
|
|
124
|
+
path_options: RuntimePathOptions
|
|
125
|
+
extender_options: Optional[ExtenderOptions]
|
|
126
|
+
spark_options: SparkOptions
|
|
127
|
+
bronze: Optional[List[Bronze]]
|
|
128
|
+
silver: Optional[List[Silver]]
|
|
129
|
+
gold: Optional[List[Gold]]
|
|
130
|
+
powerbi: Optional[List[PowerBI]]
|
|
131
|
+
databases: Optional[List[Database]]
|
|
132
|
+
variables: Optional[List[dict[str, str]]]
|
|
133
|
+
credentials: Optional[List[dict[str, str]]]
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Final
|
|
4
|
+
|
|
5
|
+
from fabricks.context.config.utils import get_config_from_file
|
|
6
|
+
from fabricks.utils.path import Path
|
|
7
|
+
from fabricks.utils.spark import spark
|
|
8
|
+
|
|
9
|
+
file_path, file_config = get_config_from_file()
|
|
10
|
+
|
|
11
|
+
runtime = os.environ.get("FABRICKS_RUNTIME", "none")
|
|
12
|
+
runtime = None if runtime.lower() == "none" else runtime
|
|
13
|
+
if runtime is None:
|
|
14
|
+
if runtime := file_config.get("runtime"):
|
|
15
|
+
assert file_path is not None
|
|
16
|
+
runtime = file_path.joinpath(runtime)
|
|
17
|
+
|
|
18
|
+
if runtime is None:
|
|
19
|
+
if file_path is not None:
|
|
20
|
+
runtime = file_path
|
|
21
|
+
else:
|
|
22
|
+
raise ValueError(
|
|
23
|
+
"could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
path_runtime = Path(runtime, assume_git=True)
|
|
27
|
+
PATH_RUNTIME: Final[Path] = path_runtime
|
|
28
|
+
|
|
29
|
+
notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
|
|
30
|
+
notebooks = None if notebooks.lower() == "none" else notebooks
|
|
31
|
+
if notebooks is None:
|
|
32
|
+
if notebooks := file_config.get("notebooks"):
|
|
33
|
+
assert file_path is not None
|
|
34
|
+
notebooks = file_path.joinpath(notebooks)
|
|
35
|
+
|
|
36
|
+
notebooks = notebooks if notebooks else path_runtime.joinpath("notebooks")
|
|
37
|
+
PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
|
|
38
|
+
|
|
39
|
+
is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
|
|
40
|
+
if is_job_config_from_yaml is None:
|
|
41
|
+
assert file_path is not None
|
|
42
|
+
is_job_config_from_yaml = file_config.get("job_config_from_yaml")
|
|
43
|
+
|
|
44
|
+
IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
|
|
45
|
+
|
|
46
|
+
is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
|
|
47
|
+
if is_debugmode is None:
|
|
48
|
+
is_debugmode = file_config.get("debugmode")
|
|
49
|
+
|
|
50
|
+
IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
|
|
51
|
+
|
|
52
|
+
is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
|
|
53
|
+
if is_devmode is None:
|
|
54
|
+
is_devmode = file_config.get("devmode")
|
|
55
|
+
|
|
56
|
+
IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
|
|
57
|
+
|
|
58
|
+
loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
|
|
59
|
+
if loglevel is None:
|
|
60
|
+
loglevel = file_config.get("loglevel")
|
|
61
|
+
|
|
62
|
+
loglevel = loglevel.upper() if loglevel else "INFO"
|
|
63
|
+
if loglevel == "DEBUG":
|
|
64
|
+
_loglevel = logging.DEBUG
|
|
65
|
+
elif loglevel == "INFO":
|
|
66
|
+
_loglevel = logging.INFO
|
|
67
|
+
elif loglevel == "WARNING":
|
|
68
|
+
_loglevel = logging.WARNING
|
|
69
|
+
elif loglevel == "ERROR":
|
|
70
|
+
_loglevel = logging.ERROR
|
|
71
|
+
elif loglevel == "CRITICAL":
|
|
72
|
+
_loglevel = logging.CRITICAL
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
|
|
75
|
+
|
|
76
|
+
LOGLEVEL = _loglevel
|
|
77
|
+
|
|
78
|
+
path_config = os.environ.get("FABRICKS_CONFIG")
|
|
79
|
+
if path_config is None:
|
|
80
|
+
if path_config := file_config.get("config"):
|
|
81
|
+
assert file_path is not None
|
|
82
|
+
path_config = file_path.joinpath(path_config)
|
|
83
|
+
else:
|
|
84
|
+
path_config = PATH_RUNTIME.joinpath(path_config).string if path_config else None
|
|
85
|
+
|
|
86
|
+
if not path_config:
|
|
87
|
+
path_config = PATH_RUNTIME.joinpath(
|
|
88
|
+
"fabricks",
|
|
89
|
+
f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
|
|
90
|
+
).string
|
|
91
|
+
|
|
92
|
+
PATH_CONFIG: Final[Path] = Path(path_config, assume_git=True)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
def get_config_from_toml():
|
|
2
|
+
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
if sys.version_info >= (3, 11):
|
|
7
|
+
import tomllib
|
|
8
|
+
else:
|
|
9
|
+
import tomli as tomllib # type: ignore
|
|
10
|
+
|
|
11
|
+
path = pathlib.Path(os.getcwd())
|
|
12
|
+
while path is not None and not (path / "pyproject.toml").exists():
|
|
13
|
+
if path == path.parent:
|
|
14
|
+
break
|
|
15
|
+
path = path.parent
|
|
16
|
+
|
|
17
|
+
if (path / "pyproject.toml").exists():
|
|
18
|
+
with open((path / "pyproject.toml"), "rb") as f:
|
|
19
|
+
config = tomllib.load(f)
|
|
20
|
+
return path, config.get("tool", {}).get("fabricks", {})
|
|
21
|
+
|
|
22
|
+
return None, {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_config_from_json():
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import pathlib
|
|
29
|
+
|
|
30
|
+
path = pathlib.Path(os.getcwd())
|
|
31
|
+
while path is not None and not (path / "fabricksconfig.json").exists():
|
|
32
|
+
if path == path.parent:
|
|
33
|
+
break
|
|
34
|
+
path = path.parent
|
|
35
|
+
|
|
36
|
+
if (path / "fabricksconfig.json").exists():
|
|
37
|
+
with open((path / "fabricksconfig.json"), "r") as f:
|
|
38
|
+
config = json.load(f)
|
|
39
|
+
return path, config
|
|
40
|
+
|
|
41
|
+
return None, {}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_config_from_file():
|
|
45
|
+
json_path, json_config = get_config_from_json()
|
|
46
|
+
if json_config:
|
|
47
|
+
return json_path, json_config
|
|
48
|
+
|
|
49
|
+
pyproject_path, pyproject_config = get_config_from_toml()
|
|
50
|
+
if pyproject_config:
|
|
51
|
+
return pyproject_path, pyproject_config
|
|
52
|
+
|
|
53
|
+
return None, {}
|
fabricks/context/log.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Final, Literal, Optional
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from fabricks.context import IS_DEBUGMODE, LOGLEVEL, SECRET_SCOPE, TIMEZONE
|
|
8
|
+
from fabricks.utils.log import get_logger
|
|
9
|
+
|
|
10
|
+
logger, _ = get_logger(
|
|
11
|
+
"logs",
|
|
12
|
+
LOGLEVEL,
|
|
13
|
+
table=None,
|
|
14
|
+
debugmode=IS_DEBUGMODE,
|
|
15
|
+
timezone=TIMEZONE,
|
|
16
|
+
)
|
|
17
|
+
logging.getLogger("SQLQueryContextLogger").setLevel(logging.CRITICAL)
|
|
18
|
+
|
|
19
|
+
DEFAULT_LOGGER: Final[logging.Logger] = logger
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def send_message_to_channel(
|
|
23
|
+
channel: str,
|
|
24
|
+
title: str,
|
|
25
|
+
message: str,
|
|
26
|
+
color: Optional[str] = None,
|
|
27
|
+
loglevel: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO",
|
|
28
|
+
) -> bool:
|
|
29
|
+
"""
|
|
30
|
+
Send a message to Microsoft Teams via webhook
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
webhook_url (str): The webhook URL for your Teams channel
|
|
34
|
+
message (str): The message to send
|
|
35
|
+
title (str, optional): Title for the message card
|
|
36
|
+
color (str, optional): Hex color for the message card)
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
bool: True if message was sent successfully, False otherwise
|
|
40
|
+
"""
|
|
41
|
+
from databricks.sdk.runtime import dbutils
|
|
42
|
+
|
|
43
|
+
channel = channel.lower()
|
|
44
|
+
channel = channel.replace(" ", "-")
|
|
45
|
+
webhook_url = dbutils.secrets.get(scope=SECRET_SCOPE, key=f"{channel}-webhook-url")
|
|
46
|
+
|
|
47
|
+
teams_message = {
|
|
48
|
+
"@type": "MessageCard",
|
|
49
|
+
"@context": "http://schema.org/extensions",
|
|
50
|
+
"summary": title,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if title:
|
|
54
|
+
teams_message["title"] = title
|
|
55
|
+
|
|
56
|
+
if color:
|
|
57
|
+
teams_message["themeColor"] = color
|
|
58
|
+
else:
|
|
59
|
+
COLORS = {
|
|
60
|
+
"DEBUG": "#00FFFF",
|
|
61
|
+
"INFO": "#00FF00 ",
|
|
62
|
+
"WARNING": "#FFFF00 ",
|
|
63
|
+
"ERROR": "#FF0000 ",
|
|
64
|
+
"CRITICAL": "#FF0000",
|
|
65
|
+
}
|
|
66
|
+
color = COLORS[loglevel]
|
|
67
|
+
teams_message["themeColor"] = color
|
|
68
|
+
|
|
69
|
+
teams_message["text"] = message
|
|
70
|
+
|
|
71
|
+
teams_message_json = json.dumps(teams_message)
|
|
72
|
+
|
|
73
|
+
response = requests.post(webhook_url, data=teams_message_json, headers={"Content-Type": "application/json"})
|
|
74
|
+
if response.status_code == 200:
|
|
75
|
+
return True
|
|
76
|
+
else:
|
|
77
|
+
return False
|