fabricks 3.0.19__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. fabricks/api/context.py +15 -3
  2. fabricks/api/notebooks/schedule.py +2 -3
  3. fabricks/api/parsers.py +2 -1
  4. fabricks/api/utils.py +3 -1
  5. fabricks/cdc/__init__.py +1 -2
  6. fabricks/cdc/base/__init__.py +1 -2
  7. fabricks/cdc/base/_types.py +5 -3
  8. fabricks/cdc/base/configurator.py +5 -0
  9. fabricks/cdc/base/generator.py +7 -3
  10. fabricks/cdc/base/merger.py +2 -0
  11. fabricks/cdc/base/processor.py +15 -0
  12. fabricks/cdc/templates/README.md +490 -0
  13. fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
  14. fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
  15. fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
  16. fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
  17. fabricks/cdc/templates/queries/context.sql.jinja +104 -96
  18. fabricks/cdc/templates/query.sql.jinja +1 -1
  19. fabricks/context/__init__.py +13 -1
  20. fabricks/context/config.py +13 -122
  21. fabricks/context/log.py +92 -1
  22. fabricks/context/runtime.py +35 -69
  23. fabricks/context/spark_session.py +4 -4
  24. fabricks/context/utils.py +26 -39
  25. fabricks/core/__init__.py +2 -2
  26. fabricks/core/dags/base.py +5 -5
  27. fabricks/core/dags/processor.py +2 -3
  28. fabricks/core/extenders.py +1 -1
  29. fabricks/core/job_schema.py +26 -16
  30. fabricks/core/jobs/__init__.py +1 -7
  31. fabricks/core/jobs/base/README.md +1545 -0
  32. fabricks/core/jobs/base/__init__.py +1 -8
  33. fabricks/core/jobs/base/checker.py +7 -7
  34. fabricks/core/jobs/base/configurator.py +142 -63
  35. fabricks/core/jobs/base/generator.py +38 -34
  36. fabricks/core/jobs/base/invoker.py +48 -63
  37. fabricks/core/jobs/base/processor.py +13 -28
  38. fabricks/core/jobs/bronze.py +88 -38
  39. fabricks/core/jobs/get_job.py +3 -6
  40. fabricks/core/jobs/get_job_conf.py +19 -68
  41. fabricks/core/jobs/get_jobs.py +10 -11
  42. fabricks/core/jobs/get_schedules.py +3 -17
  43. fabricks/core/jobs/gold.py +89 -47
  44. fabricks/core/jobs/silver.py +42 -22
  45. fabricks/core/masks.py +11 -8
  46. fabricks/core/parsers/__init__.py +0 -2
  47. fabricks/core/parsers/base.py +10 -10
  48. fabricks/core/parsers/decorator.py +1 -1
  49. fabricks/core/parsers/get_parser.py +4 -5
  50. fabricks/core/schedules/process.py +1 -4
  51. fabricks/core/steps/base.py +27 -17
  52. fabricks/core/steps/get_step.py +2 -4
  53. fabricks/core/steps/get_step_conf.py +3 -7
  54. fabricks/core/udfs.py +7 -7
  55. fabricks/core/views.py +2 -2
  56. fabricks/deploy/__init__.py +27 -16
  57. fabricks/deploy/masks.py +1 -1
  58. fabricks/deploy/notebooks.py +19 -16
  59. fabricks/deploy/schedules.py +1 -1
  60. fabricks/deploy/tables.py +66 -49
  61. fabricks/deploy/udfs.py +2 -2
  62. fabricks/deploy/views.py +15 -16
  63. fabricks/metastore/database.py +3 -3
  64. fabricks/metastore/table.py +103 -68
  65. fabricks/models/__init__.py +125 -0
  66. fabricks/models/common.py +79 -0
  67. fabricks/models/config.py +225 -0
  68. fabricks/models/dependency.py +50 -0
  69. fabricks/models/job.py +157 -0
  70. fabricks/models/path.py +17 -0
  71. fabricks/models/runtime.py +182 -0
  72. fabricks/models/schedule.py +21 -0
  73. fabricks/models/step.py +103 -0
  74. fabricks/models/table.py +77 -0
  75. fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
  76. fabricks/utils/helpers.py +6 -5
  77. fabricks/utils/log.py +25 -6
  78. fabricks/utils/path.py +265 -108
  79. fabricks/utils/pip.py +7 -7
  80. fabricks/utils/read/read.py +23 -22
  81. fabricks/utils/read/read_yaml.py +2 -2
  82. fabricks/utils/write/delta.py +4 -4
  83. fabricks/utils/write/stream.py +2 -2
  84. {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/METADATA +9 -4
  85. {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/RECORD +86 -83
  86. fabricks/context/_types.py +0 -139
  87. fabricks/context/helpers.py +0 -63
  88. fabricks/core/jobs/base/_types.py +0 -284
  89. fabricks/core/parsers/_types.py +0 -6
  90. fabricks/utils/fdict.py +0 -240
  91. fabricks/utils/pydantic.py +0 -94
  92. fabricks/utils/schema/__init__.py +0 -7
  93. fabricks/utils/schema/get_json_schema_for_type.py +0 -161
  94. fabricks/utils/schema/get_schema_for_type.py +0 -99
  95. {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/WHEEL +0 -0
@@ -1,161 +0,0 @@
1
- import dataclasses
2
- import datetime
3
- import logging
4
- import sys
5
- import types
6
- from typing import Any, ForwardRef, Literal, Type, Union, get_type_hints
7
- from uuid import UUID
8
-
9
- LOGGER = logging.getLogger(__name__)
10
-
11
-
12
- def get_json_schema_for_type(proptype: Type):
13
- def_list: dict[str, dict] = {}
14
- schema = _get_json_schema_for_type(proptype, def_list, is_root=True)
15
- schema["$defs"] = def_list
16
- schema["$schema"] = "https://json-schema.org/draft/2020-12/schema"
17
- return schema
18
-
19
-
20
- def _get_json_schema_for_type(proptype: Type, def_list: dict[str, dict], is_root: bool, is_nullable=False) -> dict:
21
- def _fixref(input: dict) -> dict:
22
- if "type" in input:
23
- if "$ref" in input["type"]:
24
- return input["type"]
25
- return input
26
-
27
- def _may_null(input: dict, is_nullable: bool) -> dict:
28
- if is_nullable:
29
- return {"oneOf": [{"type": "null"}, input]}
30
- return input
31
-
32
- if hasattr(proptype, "__origin__") and proptype.__origin__ == Literal:
33
- return {"enum": proptype.__args__}
34
-
35
- if hasattr(proptype, "__origin__") and proptype.__origin__ == tuple: # noqa E721
36
- return {
37
- "type": "array",
38
- "minItems": len(proptype.__args__),
39
- "maxItems": len(proptype.__args__),
40
- "additionalItems": False,
41
- "prefixItems": [_get_json_schema_for_type(t, def_list, is_root=False) for t in proptype.__args__],
42
- }
43
-
44
- if (sys.version_info >= (3, 10) and isinstance(proptype, types.UnionType)) or (
45
- hasattr(proptype, "__origin__") and proptype.__origin__ == Union
46
- ):
47
- if len(proptype.__args__) == 2 and proptype.__args__[0] == type(None): # noqa E721
48
- t = _get_json_schema_for_type(proptype.__args__[1], def_list, is_root=False, is_nullable=True)
49
- return t
50
-
51
- if len(proptype.__args__) == 2 and proptype.__args__[1] == type(None): # noqa E721
52
- t = _get_json_schema_for_type(proptype.__args__[0], def_list, is_root=False, is_nullable=True)
53
- return t
54
-
55
- one_of_types = [
56
- _get_json_schema_for_type(f, def_list, is_root=False, is_nullable=False) for f in proptype.__args__
57
- ]
58
-
59
- return {"oneOf": one_of_types}
60
-
61
- if proptype == type(None): # noqa E721
62
- return {"type": "null"}
63
-
64
- if proptype == str: # noqa E721
65
- return {"type": "string"} if not is_nullable else {"type": ["string", "null"]}
66
-
67
- if proptype == Any:
68
- return {}
69
-
70
- if proptype == UUID:
71
- return {
72
- "type": "string" if not is_nullable else ["string", "null"],
73
- "format": "uuid",
74
- }
75
-
76
- if proptype == int: # noqa E721
77
- return {"type": "integer" if not is_nullable else ["integer", "null"]}
78
-
79
- if proptype == float: # noqa E721
80
- return {"type": "number" if not is_nullable else ["number", "null"]}
81
-
82
- if proptype == bool: # noqa E721
83
- return {"type": "boolean" if not is_nullable else ["boolean", "null"]}
84
-
85
- if hasattr(proptype, "__origin__") and proptype.__origin__ == list: # noqa E721
86
- return {
87
- "type": "array",
88
- "items": _get_json_schema_for_type(proptype.__args__[0], def_list, is_root=False),
89
- }
90
-
91
- if hasattr(proptype, "__bases__") and len(proptype.__bases__) == 1 and proptype.__bases__[0] == dict: # noqa E721
92
- typehints = get_type_hints(proptype)
93
- props = {k: _get_json_schema_for_type(v, def_list, is_root=False) for (k, v) in typehints.items()}
94
-
95
- if hasattr(proptype, "__name__") and not is_root:
96
- def_list[proptype.__name__] = {"type": "object", "properties": props}
97
- return _may_null({"$ref": "#/$defs/" + proptype.__name__}, is_nullable)
98
- else:
99
- return _may_null({"type": "object", "properties": props}, is_nullable)
100
-
101
- if dataclasses.is_dataclass(proptype):
102
- required = [
103
- f.name
104
- for f in dataclasses.fields(proptype)
105
- if f.default == dataclasses.MISSING and f.default_factory == dataclasses.MISSING and f.init
106
- ]
107
- definition = {
108
- "type": "object",
109
- "required": required,
110
- "additionalProperties": False,
111
- "properties": {
112
- f.name: _get_json_schema_for_type(f.type, def_list, is_root=False) # type: ignore
113
- for f in dataclasses.fields(proptype)
114
- },
115
- }
116
-
117
- if is_root:
118
- return definition
119
- else:
120
- def_list[proptype.__name__] = definition
121
-
122
- return _may_null({"$ref": "#/$defs/" + proptype.__name__}, is_nullable)
123
-
124
- if hasattr(proptype, "__origin__") and proptype.__origin__ == dict and len(proptype.__args__) == 2: # noqa E721
125
- keytype = proptype.__args__[0]
126
- if keytype != str and keytype != UUID: # noqa E721
127
- raise NotImplementedError()
128
- valuetype = proptype.__args__[1]
129
- return _may_null(
130
- {
131
- "type": "object",
132
- "additionalProperties": _fixref(
133
- {"type": _get_json_schema_for_type(valuetype, def_list, is_root=False)}
134
- ),
135
- },
136
- is_nullable,
137
- )
138
-
139
- if isinstance(proptype, ForwardRef):
140
- arg = proptype.__forward_arg__
141
- return _may_null({"$ref": "#/$defs/" + arg}, is_nullable)
142
-
143
- if proptype == datetime.datetime:
144
- return {
145
- "type": "string" if not is_nullable else ["string", "null"],
146
- "format": "date-time",
147
- }
148
-
149
- if proptype == datetime.time:
150
- return {
151
- "type": "string" if not is_nullable else ["string", "null"],
152
- "format": "time",
153
- }
154
-
155
- if proptype == datetime.date:
156
- return {
157
- "type": "string" if not is_nullable else ["string", "null"],
158
- "format": "date",
159
- }
160
-
161
- return {}
@@ -1,99 +0,0 @@
1
- import dataclasses
2
- from typing import List, Literal, Type, Union, cast, get_type_hints, overload
3
-
4
- from pyspark.sql.types import (
5
- ArrayType,
6
- BooleanType,
7
- DataType,
8
- DoubleType,
9
- LongType,
10
- MapType,
11
- NullType,
12
- StringType,
13
- StructField,
14
- StructType,
15
- )
16
-
17
-
18
- @overload
19
- def get_schema_for_type(proptype: Union[int, str, float, bool]) -> DataType: ...
20
-
21
-
22
- @overload
23
- def get_schema_for_type(proptype: Type) -> StructType: ...
24
-
25
-
26
- def _merge_struct_types(types: List[DataType]):
27
- not_none_types = [t for t in types if type(t) != type(NullType())] # noqa: E721
28
-
29
- assert len([f for f in not_none_types if not isinstance(f, StructType)]) == 0
30
- all_fields: List[StructField] = []
31
-
32
- for subtype in not_none_types:
33
- fields = cast(StructType, subtype).fields
34
- for field in fields:
35
- existing_field = next((f for f in all_fields if f.name == field.name), None)
36
- if existing_field is not None and (
37
- type(existing_field.dataType) != type(field.dataType) # noqa: E721
38
- or isinstance(existing_field.dataType, StructType)
39
- ):
40
- new_type = _merge_struct_types([existing_field.dataType, field.dataType])
41
- all_fields.append(StructField(name=field.name, dataType=new_type))
42
- all_fields.remove(existing_field)
43
- else:
44
- assert existing_field is None or type(existing_field.dataType) == type(field.dataType) # noqa: E721
45
- if existing_field is None:
46
- all_fields.append(field)
47
-
48
- return StructType(fields=all_fields)
49
-
50
-
51
- def get_schema_for_type(proptype: Type) -> DataType: # type: ignore
52
- if hasattr(proptype, "__origin__") and proptype.__origin__ == Literal:
53
- return get_schema_for_type(type(proptype.__args__[0])) # For literal types we assume first type is correct
54
-
55
- if hasattr(proptype, "__origin__") and proptype.__origin__ == Union:
56
- if len(proptype.__args__) == 2 and proptype.__args__[0] == type(None): # noqa E721
57
- return get_schema_for_type(proptype.__args__[1])
58
- if len(proptype.__args__) == 2 and proptype.__args__[1] == type(None): # noqa E721
59
- return get_schema_for_type(proptype.__args__[0])
60
-
61
- return _merge_struct_types([get_schema_for_type(f) for f in proptype.__args__])
62
-
63
- if proptype == type(None): # noqa E721
64
- return NullType()
65
-
66
- if proptype == str: # noqa E721
67
- return StringType()
68
-
69
- if proptype == int: # noqa E721
70
- return LongType()
71
-
72
- if proptype == float: # noqa E721
73
- return DoubleType()
74
-
75
- if proptype == bool: # noqa E721
76
- return BooleanType()
77
-
78
- if hasattr(proptype, "__origin__") and proptype.__origin__ == list: # noqa E721
79
- return ArrayType(get_schema_for_type(proptype.__args__[0]))
80
-
81
- if proptype == dict[str, str]:
82
- return MapType(StringType(), StringType())
83
-
84
- if hasattr(proptype, "__bases__") and len(proptype.__bases__) == 1 and proptype.__bases__[0] == dict: # noqa E721
85
- types = get_type_hints(proptype)
86
- fields = [StructField(k, get_schema_for_type(v)) for k, v in types.items()]
87
- return StructType(fields=fields)
88
-
89
- if dataclasses.is_dataclass(proptype):
90
- fields = [StructField(f.name, get_schema_for_type(f.type)) for f in dataclasses.fields(proptype)]
91
- return StructType(fields=fields)
92
-
93
- if hasattr(proptype, "__origin__") and proptype.__origin__ == dict: # noqa E721
94
- if len(proptype.__args__) == 2:
95
- value_type = proptype.__args__[1]
96
- value_schema = get_schema_for_type(value_type)
97
- return MapType(StringType(), value_schema)
98
-
99
- raise NotImplementedError()