fabricks 3.0.12__py3-none-any.whl → 3.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,59 +1,41 @@
1
+ import os
2
+ import pathlib
1
3
  from typing import List
2
4
 
3
5
  from fabricks.utils.path import Path
4
6
 
5
7
 
6
- def get_config_from_toml():
7
- import os
8
- import pathlib
9
- import sys
10
-
11
- if sys.version_info >= (3, 11):
12
- import tomllib
13
- else:
14
- import tomli as tomllib # type: ignore
15
-
8
+ def get_config_from_file():
16
9
  path = pathlib.Path(os.getcwd())
17
- while path is not None and not (path / "pyproject.toml").exists():
18
- if path == path.parent:
19
- break
20
- path = path.parent
21
-
22
- if (path / "pyproject.toml").exists():
23
- with open((path / "pyproject.toml"), "rb") as f:
24
- config = tomllib.load(f)
25
- return path, config.get("tool", {}).get("fabricks", {})
26
-
27
- return None, {}
28
10
 
29
-
30
- def get_config_from_json():
31
- import json
32
- import os
33
- import pathlib
34
-
35
- path = pathlib.Path(os.getcwd())
36
- while path is not None and not (path / "fabricksconfig.json").exists():
11
+ while path is not None:
12
+ if (path / "fabricksconfig.json").exists():
13
+ break
14
+ if (path / "pyproject.toml").exists():
15
+ break
37
16
  if path == path.parent:
38
17
  break
18
+
39
19
  path = path.parent
40
20
 
41
21
  if (path / "fabricksconfig.json").exists():
22
+ import json
23
+
42
24
  with open((path / "fabricksconfig.json"), "r") as f:
43
25
  config = json.load(f)
44
- return path, config
45
-
46
- return None, {}
26
+ return path, config, "json"
47
27
 
28
+ if (path / "pyproject.toml").exists():
29
+ import sys
48
30
 
49
- def get_config_from_file():
50
- json_path, json_config = get_config_from_json()
51
- if json_config:
52
- return json_path, json_config, "json"
31
+ if sys.version_info >= (3, 11):
32
+ import tomllib
33
+ else:
34
+ import tomli as tomllib # type: ignore
53
35
 
54
- pyproject_path, pyproject_config = get_config_from_toml()
55
- if pyproject_config:
56
- return pyproject_path, pyproject_config, "pyproject"
36
+ with open((path / "pyproject.toml"), "rb") as f:
37
+ config = tomllib.load(f)
38
+ return path, config.get("tool", {}).get("fabricks", {}), "pyproject"
57
39
 
58
40
  return None, {}, None
59
41
 
@@ -1,8 +1,8 @@
1
- from importlib.util import module_from_spec, spec_from_file_location
2
1
  from typing import Callable
3
2
 
4
3
  from fabricks.context import IS_UNITY_CATALOG, PATH_EXTENDERS
5
4
  from fabricks.context.log import DEFAULT_LOGGER
5
+ from fabricks.utils.helpers import load_module_from_path
6
6
 
7
7
  EXTENDERS: dict[str, Callable] = {}
8
8
 
@@ -14,12 +14,7 @@ def get_extender(name: str) -> Callable:
14
14
  else:
15
15
  DEFAULT_LOGGER.debug(f"could not check if extender exists ({path.string})")
16
16
 
17
- spec = spec_from_file_location(name, path.string)
18
- assert spec, "no valid extender found in {path.string}"
19
- assert spec.loader is not None
20
-
21
- mod = module_from_spec(spec)
22
- spec.loader.exec_module(mod)
17
+ load_module_from_path(name, path)
23
18
  e = EXTENDERS[name]
24
19
 
25
20
  return e
@@ -339,7 +339,7 @@ class Generator(Configurator):
339
339
  self.table.add_table_comment(comment=comment)
340
340
 
341
341
  else:
342
- DEFAULT_LOGGER.debug("table exists, skip creation", extra={"label": self})
342
+ DEFAULT_LOGGER.debug("table already exists, skipped creation", extra={"label": self})
343
343
 
344
344
  def _update_schema(
345
345
  self,
@@ -1,9 +1,9 @@
1
- from importlib.util import module_from_spec, spec_from_file_location
2
1
  from typing import Optional
3
2
 
4
3
  from fabricks.context import PATH_PARSERS
5
4
  from fabricks.core.parsers._types import ParserOptions
6
5
  from fabricks.core.parsers.base import PARSERS, BaseParser
6
+ from fabricks.utils.helpers import load_module_from_path
7
7
 
8
8
 
9
9
  def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> BaseParser:
@@ -11,12 +11,7 @@ def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> Bas
11
11
  path = PATH_PARSERS.joinpath(name).append(".py")
12
12
  assert path.exists(), f"parser not found ({path})"
13
13
 
14
- spec = spec_from_file_location(name, path.string)
15
- assert spec, f"parser not found ({path})"
16
- assert spec.loader is not None
17
-
18
- mod = module_from_spec(spec)
19
- spec.loader.exec_module(mod)
14
+ load_module_from_path(name, path)
20
15
  parser = PARSERS[name](parser_options)
21
16
 
22
17
  else:
@@ -228,15 +228,22 @@ class BaseStep:
228
228
  DEFAULT_LOGGER.exception("fail to get jobs", extra={"label": self})
229
229
  raise e
230
230
 
231
- def create_db_objects(self, retry: Optional[bool] = True) -> List[Dict]:
231
+ def create_db_objects(
232
+ self,
233
+ retry: Optional[bool] = True,
234
+ update_lists: Optional[bool] = True,
235
+ incremental: Optional[bool] = False,
236
+ ) -> List[Dict]:
232
237
  DEFAULT_LOGGER.info("create db objects", extra={"label": self})
233
238
 
234
239
  df = self.get_jobs()
235
- table_df = self.database.get_tables()
236
- view_df = self.database.get_views()
237
240
 
238
- df = df.join(table_df, "job_id", how="left_anti")
239
- df = df.join(view_df, "job_id", how="left_anti")
241
+ if incremental:
242
+ table_df = self.database.get_tables()
243
+ view_df = self.database.get_views()
244
+
245
+ df = df.join(table_df, "job_id", how="left_anti")
246
+ df = df.join(view_df, "job_id", how="left_anti")
240
247
 
241
248
  if df:
242
249
  results = run_in_parallel(
@@ -248,15 +255,16 @@ class BaseStep:
248
255
  loglevel=logging.CRITICAL,
249
256
  )
250
257
 
251
- self.update_tables_list()
252
- self.update_views_list()
258
+ if update_lists:
259
+ self.update_tables_list()
260
+ self.update_views_list()
253
261
 
254
262
  errors = [res for res in results if res.get("error")]
255
263
 
256
264
  if errors:
257
265
  if retry:
258
266
  DEFAULT_LOGGER.warning("retry to create jobs", extra={"label": self})
259
- return self.create_db_objects(retry=False)
267
+ return self.create_db_objects(retry=False, update_lists=update_lists, incremental=incremental)
260
268
 
261
269
  return errors
262
270
 
@@ -1,5 +1,4 @@
1
- import time
2
- from typing import TYPE_CHECKING, List, Optional, Union
1
+ from typing import TYPE_CHECKING, Any, List, Optional, Union
3
2
 
4
3
  from azure.data.tables import TableClient, TableServiceClient
5
4
  from pyspark.sql import DataFrame
@@ -99,27 +98,29 @@ class AzureTable:
99
98
  if self._table_client is not None:
100
99
  self._table_client.close()
101
100
 
102
- def submit(self, operations: List, retry: Optional[bool] = True):
103
- try:
104
- partitions = set()
105
- for d in operations:
106
- partitions.add(d[1]["PartitionKey"])
107
-
108
- for p in partitions:
109
- _operations = [d for d in operations if d[1].get("PartitionKey") == p]
110
- t = 50
111
- if len(_operations) < t:
112
- self.table.submit_transaction(_operations)
113
- else:
114
- transactions = [_operations[i : i + t] for i in range(0, len(_operations), t)]
115
- for transaction in transactions:
116
- self.table.submit_transaction(transaction)
117
- except Exception as e:
118
- if retry:
119
- time.sleep(10)
120
- self.submit(operations, retry=False)
101
+ @retry(
102
+ stop=stop_after_attempt(3),
103
+ wait=wait_exponential(multiplier=1, min=1, max=10),
104
+ retry=retry_if_exception_type((Exception)),
105
+ reraise=True,
106
+ )
107
+ def _submit_with_retry(self, data: Any):
108
+ self.table.submit_transaction(data)
109
+
110
+ def submit(self, operations: List):
111
+ partitions = set()
112
+ for d in operations:
113
+ partitions.add(d[1]["PartitionKey"])
114
+
115
+ for p in partitions:
116
+ _operations = [d for d in operations if d[1].get("PartitionKey") == p]
117
+ t = 50
118
+ if len(_operations) < t:
119
+ self._submit_with_retry(_operations)
121
120
  else:
122
- raise e
121
+ transactions = [_operations[i : i + t] for i in range(0, len(_operations), t)]
122
+ for transaction in transactions:
123
+ self._submit_with_retry(transaction)
123
124
 
124
125
  def delete(self, data: Union[List, DataFrame, dict]):
125
126
  if isinstance(data, DataFrameLike):
fabricks/utils/helpers.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import sys
2
3
  from functools import reduce
3
4
  from queue import Queue
4
5
  from typing import Any, Callable, Iterable, List, Literal, Optional, Union
@@ -216,13 +217,28 @@ def run_notebook(path: Path, timeout: Optional[int] = None, **kwargs):
216
217
  dbutils.notebook.run(path.get_notebook_path(), timeout, {**kwargs}) # type: ignore
217
218
 
218
219
 
219
- def xxhash64(s: Any):
220
+ def xxhash64(s: Any) -> int:
220
221
  df = spark.sql(f"select xxhash64(cast('{s}' as string)) as xxhash64")
221
222
  return df.collect()[0][0]
222
223
 
223
224
 
224
- def md5(s: Any):
225
+ def md5(s: Any) -> str:
225
226
  from hashlib import md5
226
227
 
227
228
  md5 = md5(str(s).encode())
228
229
  return md5.hexdigest()
230
+
231
+
232
+ def load_module_from_path(name: str, path: Path):
233
+ from importlib.util import module_from_spec, spec_from_file_location
234
+
235
+ sys.path.append(str(path.parent))
236
+
237
+ spec = spec_from_file_location(name, path.string)
238
+ assert spec, f"no valid module found in {path.string}"
239
+ assert spec.loader is not None
240
+
241
+ textwrap_module = module_from_spec(spec)
242
+ spec.loader.exec_module(textwrap_module)
243
+
244
+ return textwrap_module
fabricks/utils/path.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import posixpath
2
3
  from pathlib import Path as PathlibPath
3
4
  from typing import List, Optional, Union
4
5
 
@@ -120,8 +121,13 @@ class Path:
120
121
  return False
121
122
 
122
123
  def joinpath(self, *other):
123
- new_path = self.pathlibpath.joinpath(*other)
124
- return Path(path=new_path, assume_git=self.assume_git)
124
+ parts = [str(o) for o in other]
125
+ base = self.string
126
+
127
+ joined = posixpath.join(base, *parts)
128
+ new = posixpath.normpath(joined)
129
+
130
+ return Path(path=new, assume_git=self.assume_git)
125
131
 
126
132
  def append(self, other: str):
127
133
  new_path = self.string + other
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fabricks
3
- Version: 3.0.12
3
+ Version: 3.0.14
4
4
  Author-email: BMS DWH Team <bi_support@bmsuisse.ch>
5
5
  Requires-Python: <4,>=3.9
6
6
  Requires-Dist: azure-data-tables<13,>=12.5.0
@@ -70,14 +70,14 @@ fabricks/cdc/templates/queries/nocdc/update.sql.jinja,sha256=mjNUwGVhZ08yUkdv9sC
70
70
  fabricks/context/__init__.py,sha256=qfntJ9O6omzY_t6AhDP6Ndu9C5LMiVdWbo6ikhtoe7o,1446
71
71
  fabricks/context/_types.py,sha256=FzQJ35vp0uc6pAq18bc-VHwMVEWtd0VDdm8xQmNr2Sg,2681
72
72
  fabricks/context/config.py,sha256=EmLUnswuWfrncaNJMDjvdMg-1lD8aneKAY8IDna7VPE,4814
73
- fabricks/context/helpers.py,sha256=GV9MscE8p6CsHbjVC-Qnqtv9VBf893DoxfLa057hFT8,2061
73
+ fabricks/context/helpers.py,sha256=PekG7VT01GlC7fURT-JUCT1CbyRrgPuKnrT5-gauQ-Y,1608
74
74
  fabricks/context/log.py,sha256=CadrRf8iL6iXlGIGIhEIswa7wGqC-E-oLwWcGTyJ10s,2074
75
75
  fabricks/context/runtime.py,sha256=87PtX6SqLoFd0PGxgisF6dLlxtCHaHxkMMIt34UyB2w,3479
76
76
  fabricks/context/secret.py,sha256=iRM-KU-JcJAEOLoGJ8S4Oh65-yt674W6CDTSkOE7SXw,3192
77
77
  fabricks/context/spark_session.py,sha256=BPaxKJXHZDI5oQiOPhmua_xjXnrVgluh--AVpvUgbck,2553
78
78
  fabricks/context/utils.py,sha256=EQRscdUhdjwk2htZu8gCgNZ9PfRzzrR6e1kRrIbVlBM,2786
79
79
  fabricks/core/__init__.py,sha256=LaqDi4xuyHAoLOvS44PQdZdRfq9SmVr7mB6BDHyxYpc,209
80
- fabricks/core/extenders.py,sha256=39bSm9QiW4vBAyT659joE-5p_EZiNM4gi8KA3-OgX3E,917
80
+ fabricks/core/extenders.py,sha256=oJzfv0hWxusnGmrjMwbrGyKfot8xzA4XtNquPWfFgPo,727
81
81
  fabricks/core/job_schema.py,sha256=6-70oy0ZJd3V9AiXfc0Q8b8NVEynxQza_h7mB13uB-s,853
82
82
  fabricks/core/masks.py,sha256=3UCxcCi-TgFHB7xT5ZvmEa8RMKev23X_JLE70Pr3rpY,1347
83
83
  fabricks/core/udfs.py,sha256=gu7K-ohxcO0TdgA7IjzVMOZatZQYhFTklHo60a6k_Yc,2960
@@ -105,7 +105,7 @@ fabricks/core/jobs/base/_types.py,sha256=y66BtJlJskq7wGzn7te5XYjO-NEqeQGUC11kkbe
105
105
  fabricks/core/jobs/base/checker.py,sha256=Cdfh8rQYy4MvMFl0HyC3alGUWm8zrFXk08m2t2JMu6Y,5477
106
106
  fabricks/core/jobs/base/configurator.py,sha256=9G5F7Qg5FWHPbHgdh8Qxc85OoSX0rnjD4c9itwU5KKc,10415
107
107
  fabricks/core/jobs/base/exception.py,sha256=HrdxEuOfK5rY-ItZvEL3iywLgdpYUpmWFkjjjks7oYc,2318
108
- fabricks/core/jobs/base/generator.py,sha256=TI4Wy8tlVRIKEczx6qmrIf2ppGwLvhdaatb9pnCvzlI,17635
108
+ fabricks/core/jobs/base/generator.py,sha256=Dk82tj21NhR9wwgXzMp8JlKQ6D9HnjVlK9fvDmoYLbk,17646
109
109
  fabricks/core/jobs/base/invoker.py,sha256=FvjfpNqi542slxC2yLu1BIu5EklNUWySxDF8cD_SqKQ,7602
110
110
  fabricks/core/jobs/base/job.py,sha256=dWmk2PpQH2NETaaDS6KoiefRnDHfDMdCyhmogkdcSFI,93
111
111
  fabricks/core/jobs/base/processor.py,sha256=qkNiJSSLaEnivKGBcd9UZyIVFexnv-n1p_5mCZIy1rA,9076
@@ -113,7 +113,7 @@ fabricks/core/parsers/__init__.py,sha256=TGjyUeiiTkJrAxIpu2D_c2rQcbe5YRpmBW9oh0F
113
113
  fabricks/core/parsers/_types.py,sha256=JC2Oh-wUvaX8SBzeuf5owPgRaj-Q3-7MXxyIYPQ7QwA,147
114
114
  fabricks/core/parsers/base.py,sha256=P8IrLQKGakwaAQ-4gf4vElVwWoSpkixYd9kNthu1VDM,3292
115
115
  fabricks/core/parsers/decorator.py,sha256=kn_Mj-JLWTFaRiciZ3KavmSUcWFPY3ve-buMruHrX_Q,307
116
- fabricks/core/parsers/get_parser.py,sha256=TTnVPwKqKpFu6jJJnXEuiEctWGtimk8w2p1jF2U7ibg,909
116
+ fabricks/core/parsers/get_parser.py,sha256=mauofS626h9wpPZtlZFqIb1jcKM3Jz4D_36uvd-Lv4k,717
117
117
  fabricks/core/parsers/utils.py,sha256=qdn2ElpqBgDsW55-tACWZaFOT0ebrBYg2fenqSgd6YI,2456
118
118
  fabricks/core/schedules/__init__.py,sha256=bDjNMcm7itimAo4gun0W4W9bZKwZmWUjkMqAQIcqI2Y,431
119
119
  fabricks/core/schedules/diagrams.py,sha256=YA4T7Etl_UPfW-3IGFq5Xj9OlXZGQ27Aot6RVa3ZUgg,578
@@ -126,7 +126,7 @@ fabricks/core/schedules/terminate.py,sha256=-RvtOrxTOZl2sZQ6KfNHJL6H2LCAEMSVRyyl
126
126
  fabricks/core/schedules/views.py,sha256=8hYwPLCvvN-nem2lNAKvUY5hC71v88z4-y8j0poUApM,1949
127
127
  fabricks/core/steps/__init__.py,sha256=JP-kaDa890-9XqBSPp6YdssAexdxv-MqQ__WfVYdgeg,132
128
128
  fabricks/core/steps/_types.py,sha256=VxIrH3nFwmPlwG-UI8sDDP0AwK_9jlsy6yQp6YfgtqE,90
129
- fabricks/core/steps/base.py,sha256=UTzVqdWaho9zgMrloq8ndqcWQ9R5Z-O1SInIrm9byB4,14403
129
+ fabricks/core/steps/base.py,sha256=MJe2q9s1siM89YkpHDqldtbtKQgkhDB_cFa2-e_irvs,14642
130
130
  fabricks/core/steps/get_step.py,sha256=8q4rEDdTTZNJsXB2l5XY-Ktoow8ZHsON_tx5yKMUIzg,284
131
131
  fabricks/core/steps/get_step_conf.py,sha256=UPT3gB1Sh5yzawZ9qiVQlvVAKaxPX82gaWBDzxx75EM,633
132
132
  fabricks/deploy/__init__.py,sha256=ntxtFnzeho_WneVoL5CCqbI4rKApKgdmi9j0HKb0LJc,2375
@@ -149,13 +149,13 @@ fabricks/metastore/view.py,sha256=f7hKJWtnH1KmZym8dkoucKOTndntzai_f2YqferxHLs,14
149
149
  fabricks/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
150
150
  fabricks/utils/_types.py,sha256=AuOhknlozqx5QdAdvZSA6xAWhU8k4nxG1vxIkOVgHeY,184
151
151
  fabricks/utils/azure_queue.py,sha256=wtKAq_MD5QLxelerDO475dzL-SySIrxt9d5KGi-8vvw,3102
152
- fabricks/utils/azure_table.py,sha256=7tCD1iM7UWREaSQVVmtgHCAebLtWPP9ZmuU5zDALyo0,5305
152
+ fabricks/utils/azure_table.py,sha256=J_UAPiCN89rL5FNmwIGg43Z6FSH8evVIWijDEgYJxk4,5294
153
153
  fabricks/utils/console.py,sha256=X4lLgL_UxCjoFRx-ZRCwzdBveRGPKlFYZDi6vl7uevQ,1017
154
154
  fabricks/utils/fdict.py,sha256=cdnvNBSXKJIDKSdhQGJA4CGv0qLn5IVYKQ111l7nM9I,7978
155
- fabricks/utils/helpers.py,sha256=h7SuOVpBP5qcgX1nM1suvkXG9BhiK5-257EBepCvrO8,7452
155
+ fabricks/utils/helpers.py,sha256=fKv6mpT-428xTSjdLfm7TnN1Xo9FadrSIY1qzYgWCzs,7909
156
156
  fabricks/utils/log.py,sha256=LCQEM81PhdojiyLrtEzv1QM__bWbaEhGddyd0IqyGXM,7985
157
157
  fabricks/utils/mermaid.py,sha256=XoiVxPaUJS4TC_ybA-e78qFzQkQ46uPf055JiiNDdSg,986
158
- fabricks/utils/path.py,sha256=Bs3PayWtg62-mrsDbvu8kh0VLZZhX7tU9YiyHFiYNhs,6698
158
+ fabricks/utils/path.py,sha256=ToTTS8QKGsWq8cR8SDE2ocWKx2GsuZ5psPTuAMq9R4s,6813
159
159
  fabricks/utils/pip.py,sha256=UHo7NTjFGJNghWBuuDow28xUkZYg2YrlbAP49IxZyXY,1522
160
160
  fabricks/utils/pydantic.py,sha256=W0fiDLVMFrrInfQw2s5YPeSEvkN-4k864u3UyPoHaz4,2516
161
161
  fabricks/utils/spark.py,sha256=QWVpbGwOvURIVBlR7ygt6NQ9MHUsIDvlquJ65iI8UBI,2007
@@ -171,6 +171,6 @@ fabricks/utils/schema/get_schema_for_type.py,sha256=5k-R6zCgUAtapQgxT4turcx1IQ-b
171
171
  fabricks/utils/write/__init__.py,sha256=i0UnZenXj9Aq0b0_aU3s6882vg-Vu_AyKfQhl_dTp-g,200
172
172
  fabricks/utils/write/delta.py,sha256=lTQ0CfUhcvn3xTCcT_Ns6PMDBsO5UEfa2S9XpJiLJ9c,1250
173
173
  fabricks/utils/write/stream.py,sha256=wQBpAnQtYA6nl79sPKhVM6u5m-66suX7B6VQ6tW4TOs,622
174
- fabricks-3.0.12.dist-info/METADATA,sha256=LYFVTMlw8ueiyViUZC5ZZHk2LZkpNnVGO2B1ZhYrdTg,798
175
- fabricks-3.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
176
- fabricks-3.0.12.dist-info/RECORD,,
174
+ fabricks-3.0.14.dist-info/METADATA,sha256=g8zxaKKHnJIJPtbMGYqicViqA9vFlVJArB86sgWyxps,798
175
+ fabricks-3.0.14.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
176
+ fabricks-3.0.14.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any