fabricks 3.0.18__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/api/context.py +15 -3
- fabricks/api/notebooks/schedule.py +2 -3
- fabricks/api/parsers.py +2 -1
- fabricks/api/utils.py +3 -1
- fabricks/cdc/__init__.py +1 -2
- fabricks/cdc/base/__init__.py +1 -2
- fabricks/cdc/base/_types.py +5 -3
- fabricks/cdc/base/configurator.py +5 -0
- fabricks/cdc/base/generator.py +7 -3
- fabricks/cdc/base/merger.py +2 -0
- fabricks/cdc/base/processor.py +15 -0
- fabricks/cdc/templates/README.md +490 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
- fabricks/cdc/templates/queries/context.sql.jinja +104 -96
- fabricks/cdc/templates/query.sql.jinja +1 -1
- fabricks/context/__init__.py +13 -1
- fabricks/context/config.py +13 -122
- fabricks/context/log.py +92 -1
- fabricks/context/runtime.py +35 -69
- fabricks/context/spark_session.py +8 -7
- fabricks/context/utils.py +26 -39
- fabricks/core/__init__.py +2 -2
- fabricks/core/dags/base.py +5 -5
- fabricks/core/dags/processor.py +2 -3
- fabricks/core/extenders.py +1 -1
- fabricks/core/job_schema.py +26 -16
- fabricks/core/jobs/__init__.py +1 -7
- fabricks/core/jobs/base/README.md +1545 -0
- fabricks/core/jobs/base/__init__.py +1 -8
- fabricks/core/jobs/base/checker.py +7 -7
- fabricks/core/jobs/base/configurator.py +142 -63
- fabricks/core/jobs/base/generator.py +38 -34
- fabricks/core/jobs/base/invoker.py +48 -63
- fabricks/core/jobs/base/processor.py +13 -28
- fabricks/core/jobs/bronze.py +88 -38
- fabricks/core/jobs/get_job.py +3 -6
- fabricks/core/jobs/get_job_conf.py +19 -68
- fabricks/core/jobs/get_jobs.py +10 -11
- fabricks/core/jobs/get_schedules.py +3 -17
- fabricks/core/jobs/gold.py +96 -43
- fabricks/core/jobs/silver.py +42 -22
- fabricks/core/masks.py +11 -8
- fabricks/core/parsers/__init__.py +0 -2
- fabricks/core/parsers/base.py +10 -10
- fabricks/core/parsers/decorator.py +1 -1
- fabricks/core/parsers/get_parser.py +4 -5
- fabricks/core/schedules/process.py +1 -4
- fabricks/core/steps/base.py +27 -17
- fabricks/core/steps/get_step.py +2 -4
- fabricks/core/steps/get_step_conf.py +3 -7
- fabricks/core/udfs.py +9 -8
- fabricks/core/views.py +2 -2
- fabricks/deploy/__init__.py +27 -16
- fabricks/deploy/masks.py +1 -1
- fabricks/deploy/notebooks.py +19 -16
- fabricks/deploy/schedules.py +1 -1
- fabricks/deploy/tables.py +66 -49
- fabricks/deploy/udfs.py +2 -2
- fabricks/deploy/views.py +15 -16
- fabricks/metastore/database.py +3 -3
- fabricks/metastore/table.py +103 -68
- fabricks/models/__init__.py +125 -0
- fabricks/models/common.py +79 -0
- fabricks/models/config.py +225 -0
- fabricks/models/dependency.py +50 -0
- fabricks/models/job.py +157 -0
- fabricks/models/path.py +17 -0
- fabricks/models/runtime.py +182 -0
- fabricks/models/schedule.py +21 -0
- fabricks/models/step.py +103 -0
- fabricks/models/table.py +77 -0
- fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
- fabricks/utils/helpers.py +6 -5
- fabricks/utils/log.py +25 -6
- fabricks/utils/path.py +269 -102
- fabricks/utils/pip.py +7 -7
- fabricks/utils/read/read.py +23 -22
- fabricks/utils/read/read_yaml.py +2 -2
- fabricks/utils/write/delta.py +4 -4
- fabricks/utils/write/stream.py +2 -2
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/METADATA +9 -4
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/RECORD +86 -83
- fabricks/context/_types.py +0 -137
- fabricks/context/helpers.py +0 -63
- fabricks/core/jobs/base/_types.py +0 -284
- fabricks/core/parsers/_types.py +0 -6
- fabricks/utils/fdict.py +0 -240
- fabricks/utils/pydantic.py +0 -94
- fabricks/utils/schema/__init__.py +0 -7
- fabricks/utils/schema/get_json_schema_for_type.py +0 -161
- fabricks/utils/schema/get_schema_for_type.py +0 -99
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/WHEEL +0 -0
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
import dataclasses
|
|
2
|
-
import datetime
|
|
3
|
-
import logging
|
|
4
|
-
import sys
|
|
5
|
-
import types
|
|
6
|
-
from typing import Any, ForwardRef, Literal, Type, Union, get_type_hints
|
|
7
|
-
from uuid import UUID
|
|
8
|
-
|
|
9
|
-
LOGGER = logging.getLogger(__name__)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def get_json_schema_for_type(proptype: Type):
|
|
13
|
-
def_list: dict[str, dict] = {}
|
|
14
|
-
schema = _get_json_schema_for_type(proptype, def_list, is_root=True)
|
|
15
|
-
schema["$defs"] = def_list
|
|
16
|
-
schema["$schema"] = "https://json-schema.org/draft/2020-12/schema"
|
|
17
|
-
return schema
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def _get_json_schema_for_type(proptype: Type, def_list: dict[str, dict], is_root: bool, is_nullable=False) -> dict:
|
|
21
|
-
def _fixref(input: dict) -> dict:
|
|
22
|
-
if "type" in input:
|
|
23
|
-
if "$ref" in input["type"]:
|
|
24
|
-
return input["type"]
|
|
25
|
-
return input
|
|
26
|
-
|
|
27
|
-
def _may_null(input: dict, is_nullable: bool) -> dict:
|
|
28
|
-
if is_nullable:
|
|
29
|
-
return {"oneOf": [{"type": "null"}, input]}
|
|
30
|
-
return input
|
|
31
|
-
|
|
32
|
-
if hasattr(proptype, "__origin__") and proptype.__origin__ == Literal:
|
|
33
|
-
return {"enum": proptype.__args__}
|
|
34
|
-
|
|
35
|
-
if hasattr(proptype, "__origin__") and proptype.__origin__ == tuple: # noqa E721
|
|
36
|
-
return {
|
|
37
|
-
"type": "array",
|
|
38
|
-
"minItems": len(proptype.__args__),
|
|
39
|
-
"maxItems": len(proptype.__args__),
|
|
40
|
-
"additionalItems": False,
|
|
41
|
-
"prefixItems": [_get_json_schema_for_type(t, def_list, is_root=False) for t in proptype.__args__],
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
if (sys.version_info >= (3, 10) and isinstance(proptype, types.UnionType)) or (
|
|
45
|
-
hasattr(proptype, "__origin__") and proptype.__origin__ == Union
|
|
46
|
-
):
|
|
47
|
-
if len(proptype.__args__) == 2 and proptype.__args__[0] == type(None): # noqa E721
|
|
48
|
-
t = _get_json_schema_for_type(proptype.__args__[1], def_list, is_root=False, is_nullable=True)
|
|
49
|
-
return t
|
|
50
|
-
|
|
51
|
-
if len(proptype.__args__) == 2 and proptype.__args__[1] == type(None): # noqa E721
|
|
52
|
-
t = _get_json_schema_for_type(proptype.__args__[0], def_list, is_root=False, is_nullable=True)
|
|
53
|
-
return t
|
|
54
|
-
|
|
55
|
-
one_of_types = [
|
|
56
|
-
_get_json_schema_for_type(f, def_list, is_root=False, is_nullable=False) for f in proptype.__args__
|
|
57
|
-
]
|
|
58
|
-
|
|
59
|
-
return {"oneOf": one_of_types}
|
|
60
|
-
|
|
61
|
-
if proptype == type(None): # noqa E721
|
|
62
|
-
return {"type": "null"}
|
|
63
|
-
|
|
64
|
-
if proptype == str: # noqa E721
|
|
65
|
-
return {"type": "string"} if not is_nullable else {"type": ["string", "null"]}
|
|
66
|
-
|
|
67
|
-
if proptype == Any:
|
|
68
|
-
return {}
|
|
69
|
-
|
|
70
|
-
if proptype == UUID:
|
|
71
|
-
return {
|
|
72
|
-
"type": "string" if not is_nullable else ["string", "null"],
|
|
73
|
-
"format": "uuid",
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
if proptype == int: # noqa E721
|
|
77
|
-
return {"type": "integer" if not is_nullable else ["integer", "null"]}
|
|
78
|
-
|
|
79
|
-
if proptype == float: # noqa E721
|
|
80
|
-
return {"type": "number" if not is_nullable else ["number", "null"]}
|
|
81
|
-
|
|
82
|
-
if proptype == bool: # noqa E721
|
|
83
|
-
return {"type": "boolean" if not is_nullable else ["boolean", "null"]}
|
|
84
|
-
|
|
85
|
-
if hasattr(proptype, "__origin__") and proptype.__origin__ == list: # noqa E721
|
|
86
|
-
return {
|
|
87
|
-
"type": "array",
|
|
88
|
-
"items": _get_json_schema_for_type(proptype.__args__[0], def_list, is_root=False),
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
if hasattr(proptype, "__bases__") and len(proptype.__bases__) == 1 and proptype.__bases__[0] == dict: # noqa E721
|
|
92
|
-
typehints = get_type_hints(proptype)
|
|
93
|
-
props = {k: _get_json_schema_for_type(v, def_list, is_root=False) for (k, v) in typehints.items()}
|
|
94
|
-
|
|
95
|
-
if hasattr(proptype, "__name__") and not is_root:
|
|
96
|
-
def_list[proptype.__name__] = {"type": "object", "properties": props}
|
|
97
|
-
return _may_null({"$ref": "#/$defs/" + proptype.__name__}, is_nullable)
|
|
98
|
-
else:
|
|
99
|
-
return _may_null({"type": "object", "properties": props}, is_nullable)
|
|
100
|
-
|
|
101
|
-
if dataclasses.is_dataclass(proptype):
|
|
102
|
-
required = [
|
|
103
|
-
f.name
|
|
104
|
-
for f in dataclasses.fields(proptype)
|
|
105
|
-
if f.default == dataclasses.MISSING and f.default_factory == dataclasses.MISSING and f.init
|
|
106
|
-
]
|
|
107
|
-
definition = {
|
|
108
|
-
"type": "object",
|
|
109
|
-
"required": required,
|
|
110
|
-
"additionalProperties": False,
|
|
111
|
-
"properties": {
|
|
112
|
-
f.name: _get_json_schema_for_type(f.type, def_list, is_root=False) # type: ignore
|
|
113
|
-
for f in dataclasses.fields(proptype)
|
|
114
|
-
},
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
if is_root:
|
|
118
|
-
return definition
|
|
119
|
-
else:
|
|
120
|
-
def_list[proptype.__name__] = definition
|
|
121
|
-
|
|
122
|
-
return _may_null({"$ref": "#/$defs/" + proptype.__name__}, is_nullable)
|
|
123
|
-
|
|
124
|
-
if hasattr(proptype, "__origin__") and proptype.__origin__ == dict and len(proptype.__args__) == 2: # noqa E721
|
|
125
|
-
keytype = proptype.__args__[0]
|
|
126
|
-
if keytype != str and keytype != UUID: # noqa E721
|
|
127
|
-
raise NotImplementedError()
|
|
128
|
-
valuetype = proptype.__args__[1]
|
|
129
|
-
return _may_null(
|
|
130
|
-
{
|
|
131
|
-
"type": "object",
|
|
132
|
-
"additionalProperties": _fixref(
|
|
133
|
-
{"type": _get_json_schema_for_type(valuetype, def_list, is_root=False)}
|
|
134
|
-
),
|
|
135
|
-
},
|
|
136
|
-
is_nullable,
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
if isinstance(proptype, ForwardRef):
|
|
140
|
-
arg = proptype.__forward_arg__
|
|
141
|
-
return _may_null({"$ref": "#/$defs/" + arg}, is_nullable)
|
|
142
|
-
|
|
143
|
-
if proptype == datetime.datetime:
|
|
144
|
-
return {
|
|
145
|
-
"type": "string" if not is_nullable else ["string", "null"],
|
|
146
|
-
"format": "date-time",
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
if proptype == datetime.time:
|
|
150
|
-
return {
|
|
151
|
-
"type": "string" if not is_nullable else ["string", "null"],
|
|
152
|
-
"format": "time",
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
if proptype == datetime.date:
|
|
156
|
-
return {
|
|
157
|
-
"type": "string" if not is_nullable else ["string", "null"],
|
|
158
|
-
"format": "date",
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
return {}
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import dataclasses
|
|
2
|
-
from typing import List, Literal, Type, Union, cast, get_type_hints, overload
|
|
3
|
-
|
|
4
|
-
from pyspark.sql.types import (
|
|
5
|
-
ArrayType,
|
|
6
|
-
BooleanType,
|
|
7
|
-
DataType,
|
|
8
|
-
DoubleType,
|
|
9
|
-
LongType,
|
|
10
|
-
MapType,
|
|
11
|
-
NullType,
|
|
12
|
-
StringType,
|
|
13
|
-
StructField,
|
|
14
|
-
StructType,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@overload
|
|
19
|
-
def get_schema_for_type(proptype: Union[int, str, float, bool]) -> DataType: ...
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@overload
|
|
23
|
-
def get_schema_for_type(proptype: Type) -> StructType: ...
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _merge_struct_types(types: List[DataType]):
|
|
27
|
-
not_none_types = [t for t in types if type(t) != type(NullType())] # noqa: E721
|
|
28
|
-
|
|
29
|
-
assert len([f for f in not_none_types if not isinstance(f, StructType)]) == 0
|
|
30
|
-
all_fields: List[StructField] = []
|
|
31
|
-
|
|
32
|
-
for subtype in not_none_types:
|
|
33
|
-
fields = cast(StructType, subtype).fields
|
|
34
|
-
for field in fields:
|
|
35
|
-
existing_field = next((f for f in all_fields if f.name == field.name), None)
|
|
36
|
-
if existing_field is not None and (
|
|
37
|
-
type(existing_field.dataType) != type(field.dataType) # noqa: E721
|
|
38
|
-
or isinstance(existing_field.dataType, StructType)
|
|
39
|
-
):
|
|
40
|
-
new_type = _merge_struct_types([existing_field.dataType, field.dataType])
|
|
41
|
-
all_fields.append(StructField(name=field.name, dataType=new_type))
|
|
42
|
-
all_fields.remove(existing_field)
|
|
43
|
-
else:
|
|
44
|
-
assert existing_field is None or type(existing_field.dataType) == type(field.dataType) # noqa: E721
|
|
45
|
-
if existing_field is None:
|
|
46
|
-
all_fields.append(field)
|
|
47
|
-
|
|
48
|
-
return StructType(fields=all_fields)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def get_schema_for_type(proptype: Type) -> DataType: # type: ignore
|
|
52
|
-
if hasattr(proptype, "__origin__") and proptype.__origin__ == Literal:
|
|
53
|
-
return get_schema_for_type(type(proptype.__args__[0])) # For literal types we assume first type is correct
|
|
54
|
-
|
|
55
|
-
if hasattr(proptype, "__origin__") and proptype.__origin__ == Union:
|
|
56
|
-
if len(proptype.__args__) == 2 and proptype.__args__[0] == type(None): # noqa E721
|
|
57
|
-
return get_schema_for_type(proptype.__args__[1])
|
|
58
|
-
if len(proptype.__args__) == 2 and proptype.__args__[1] == type(None): # noqa E721
|
|
59
|
-
return get_schema_for_type(proptype.__args__[0])
|
|
60
|
-
|
|
61
|
-
return _merge_struct_types([get_schema_for_type(f) for f in proptype.__args__])
|
|
62
|
-
|
|
63
|
-
if proptype == type(None): # noqa E721
|
|
64
|
-
return NullType()
|
|
65
|
-
|
|
66
|
-
if proptype == str: # noqa E721
|
|
67
|
-
return StringType()
|
|
68
|
-
|
|
69
|
-
if proptype == int: # noqa E721
|
|
70
|
-
return LongType()
|
|
71
|
-
|
|
72
|
-
if proptype == float: # noqa E721
|
|
73
|
-
return DoubleType()
|
|
74
|
-
|
|
75
|
-
if proptype == bool: # noqa E721
|
|
76
|
-
return BooleanType()
|
|
77
|
-
|
|
78
|
-
if hasattr(proptype, "__origin__") and proptype.__origin__ == list: # noqa E721
|
|
79
|
-
return ArrayType(get_schema_for_type(proptype.__args__[0]))
|
|
80
|
-
|
|
81
|
-
if proptype == dict[str, str]:
|
|
82
|
-
return MapType(StringType(), StringType())
|
|
83
|
-
|
|
84
|
-
if hasattr(proptype, "__bases__") and len(proptype.__bases__) == 1 and proptype.__bases__[0] == dict: # noqa E721
|
|
85
|
-
types = get_type_hints(proptype)
|
|
86
|
-
fields = [StructField(k, get_schema_for_type(v)) for k, v in types.items()]
|
|
87
|
-
return StructType(fields=fields)
|
|
88
|
-
|
|
89
|
-
if dataclasses.is_dataclass(proptype):
|
|
90
|
-
fields = [StructField(f.name, get_schema_for_type(f.type)) for f in dataclasses.fields(proptype)]
|
|
91
|
-
return StructType(fields=fields)
|
|
92
|
-
|
|
93
|
-
if hasattr(proptype, "__origin__") and proptype.__origin__ == dict: # noqa E721
|
|
94
|
-
if len(proptype.__args__) == 2:
|
|
95
|
-
value_type = proptype.__args__[1]
|
|
96
|
-
value_schema = get_schema_for_type(value_type)
|
|
97
|
-
return MapType(StringType(), value_schema)
|
|
98
|
-
|
|
99
|
-
raise NotImplementedError()
|
|
File without changes
|