fabricks 3.0.11__py3-none-any.whl → 3.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,6 +89,7 @@ class Merger(Processor):
89
89
  sql = sql.replace("{src}", "src")
90
90
  sql = fix_sql(sql)
91
91
  sql = sql.replace("`src`", "{src}")
92
+
92
93
  DEFAULT_LOGGER.debug("merge", extra={"label": self, "sql": sql})
93
94
 
94
95
  except Exception as e:
@@ -390,11 +390,12 @@ class Processor(Generator):
390
390
  try:
391
391
  sql = template.render(**context)
392
392
  if fix:
393
- DEFAULT_LOGGER.debug("fix context", extra={"label": self, "sql": sql})
394
393
  sql = self.fix_sql(sql)
394
+ else:
395
+ DEFAULT_LOGGER.debug("print query", extra={"label": self, "sql": sql})
395
396
 
396
397
  except (Exception, TypeError) as e:
397
- DEFAULT_LOGGER.exception("fail to execute sql query", extra={"label": self, "context": context})
398
+ DEFAULT_LOGGER.exception("fail to render sql query", extra={"label": self, "context": context})
398
399
  raise e
399
400
 
400
401
  row = self.spark.sql(sql).collect()[0]
@@ -425,7 +426,7 @@ class Processor(Generator):
425
426
 
426
427
  except (Exception, TypeError) as e:
427
428
  DEFAULT_LOGGER.debug("context", extra={"label": self, "context": context})
428
- DEFAULT_LOGGER.exception("fail to generate sql query", extra={"label": self, "context": context})
429
+ DEFAULT_LOGGER.exception("fail to render sql query", extra={"label": self, "context": context})
429
430
  raise e
430
431
 
431
432
  return sql
@@ -0,0 +1,127 @@
1
+ import logging
2
+ import os
3
+ from typing import Final
4
+
5
+ from fabricks.context.helpers import get_config_from_file
6
+ from fabricks.utils.log import get_logger
7
+ from fabricks.utils.path import Path
8
+ from fabricks.utils.spark import spark
9
+
10
+ logger, _ = get_logger("logs", level=logging.DEBUG)
11
+ file_path, file_config, origin = get_config_from_file()
12
+
13
+ if file_path:
14
+ logger.debug(f"found {origin} config ({file_path})", extra={"label": "config"})
15
+
16
+ # path to runtime
17
+ runtime = os.environ.get("FABRICKS_RUNTIME", "none")
18
+ runtime = None if runtime.lower() == "none" else runtime
19
+ if runtime is None:
20
+ if runtime := file_config.get("runtime"):
21
+ assert file_path is not None
22
+ runtime = file_path.joinpath(runtime)
23
+ logger.debug(f"resolve runtime from {origin} file", extra={"label": "config"})
24
+ else:
25
+ logger.debug("resolve runtime from env", extra={"label": "config"})
26
+
27
+ if runtime is None:
28
+ if file_path is not None:
29
+ runtime = file_path
30
+ logger.debug(f"resolve runtime from {origin} file", extra={"label": "config"})
31
+ else:
32
+ raise ValueError(
33
+ "could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
34
+ )
35
+
36
+ path_runtime = Path(runtime, assume_git=True)
37
+
38
+ # path to config
39
+ config = os.environ.get("FABRICKS_CONFIG")
40
+ if config is None:
41
+ if config := file_config.get("config"):
42
+ assert file_path is not None
43
+ config = file_path.joinpath(config)
44
+ logger.debug(f"resolve config from {origin} file", extra={"label": "config"})
45
+ else:
46
+ logger.debug("resolve config from env", extra={"label": "config"})
47
+
48
+ if config is None:
49
+ logger.debug("resolve config from default path", extra={"label": "config"})
50
+ config = path_runtime.joinpath(
51
+ "fabricks",
52
+ f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
53
+ ).string
54
+
55
+ path_config = Path(config, assume_git=True)
56
+
57
+ # path to notebooks
58
+ notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
59
+ notebooks = None if notebooks.lower() == "none" else notebooks
60
+ if notebooks is None:
61
+ if notebooks := file_config.get("notebooks"):
62
+ assert file_path is not None
63
+ notebooks = file_path.joinpath(notebooks)
64
+ logger.debug(f"resolve notebooks from {origin} file", extra={"label": "config"})
65
+ else:
66
+ logger.debug("resolve notebooks from env", extra={"label": "config"})
67
+
68
+ if notebooks is None:
69
+ logger.debug("resolve notebooks from default path", extra={"label": "config"})
70
+ notebooks = path_runtime.joinpath("notebooks")
71
+
72
+ path_notebooks = Path(str(notebooks), assume_git=True)
73
+
74
+ # job config from yaml
75
+ is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
76
+ if is_job_config_from_yaml is None:
77
+ if is_job_config_from_yaml := file_config.get("job_config_from_yaml"):
78
+ logger.debug(f"resolve job_config_from_yaml from {origin} file", extra={"label": "config"})
79
+ else:
80
+ logger.debug("resolve job_config_from_yaml from env", extra={"label": "config"})
81
+
82
+ # debug mode
83
+ is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
84
+ if is_debugmode is None:
85
+ if is_debugmode := file_config.get("debugmode"):
86
+ logger.debug(f"resolve debugmode from {origin} file", extra={"label": "config"})
87
+ else:
88
+ logger.debug("resolve debugmode from env", extra={"label": "config"})
89
+
90
+ # dev mode
91
+ is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
92
+ if is_devmode is None:
93
+ if is_devmode := file_config.get("devmode"):
94
+ logger.debug(f"resolve devmode from {origin} file", extra={"label": "config"})
95
+ else:
96
+ logger.debug("resolve devmode from env", extra={"label": "config"})
97
+
98
+ # log level
99
+ loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
100
+ if loglevel is None:
101
+ if loglevel := file_config.get("loglevel"):
102
+ logger.debug(f"resolve loglevel from {origin} file", extra={"label": "config"})
103
+ else:
104
+ logger.debug("resolve loglevel from env", extra={"label": "config"})
105
+
106
+ loglevel = loglevel.upper() if loglevel else "INFO"
107
+ if loglevel == "DEBUG":
108
+ _loglevel = logging.DEBUG
109
+ elif loglevel == "INFO":
110
+ _loglevel = logging.INFO
111
+ elif loglevel == "WARNING":
112
+ _loglevel = logging.WARNING
113
+ elif loglevel == "ERROR":
114
+ _loglevel = logging.ERROR
115
+ elif loglevel == "CRITICAL":
116
+ _loglevel = logging.CRITICAL
117
+ else:
118
+ raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
119
+
120
+ # Constants
121
+ PATH_CONFIG: Final[Path] = path_config
122
+ PATH_RUNTIME: Final[Path] = path_runtime
123
+ PATH_NOTEBOOKS: Final[Path] = path_notebooks
124
+ IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
125
+ IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
126
+ IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
127
+ LOGLEVEL: Final[int] = _loglevel
@@ -0,0 +1,58 @@
1
+ import os
2
+ import pathlib
3
+ from typing import List
4
+
5
+ from fabricks.utils.path import Path
6
+
7
+
8
+ def get_config_from_file():
9
+ path = pathlib.Path(os.getcwd())
10
+
11
+ while path is not None and (not (path / "pyproject.toml").exists() or (path / "fabricksconfig.json").exists()):
12
+ if path == path.parent:
13
+ break
14
+ path = path.parent
15
+
16
+ if (path / "fabricksconfig.json").exists():
17
+ import json
18
+
19
+ with open((path / "fabricksconfig.json"), "r") as f:
20
+ config = json.load(f)
21
+ return path, config, "json"
22
+
23
+ if (path / "pyproject.toml").exists():
24
+ import sys
25
+
26
+ if sys.version_info >= (3, 11):
27
+ import tomllib
28
+ else:
29
+ import tomli as tomllib # type: ignore
30
+
31
+ with open((path / "pyproject.toml"), "rb") as f:
32
+ config = tomllib.load(f)
33
+ return path, config.get("tool", {}).get("fabricks", {}), "pyproject"
34
+
35
+ return None, {}, None
36
+
37
+
38
+ def get_storage_paths(objects: List[dict], variables: dict) -> dict:
39
+ d = {}
40
+ for o in objects:
41
+ if o:
42
+ name = o.get("name")
43
+ assert name
44
+ uri = o.get("path_options", {}).get("storage")
45
+ assert uri
46
+ d[name] = Path.from_uri(uri, regex=variables)
47
+ return d
48
+
49
+
50
+ def get_runtime_path(objects: List[dict], root: Path) -> dict:
51
+ d = {}
52
+ for o in objects:
53
+ name = o.get("name")
54
+ assert name
55
+ uri = o.get("path_options", {}).get("runtime")
56
+ assert uri
57
+ d[name] = root.joinpath(uri)
58
+ return d
@@ -1,11 +1,12 @@
1
- from typing import Final, List, Optional
1
+ from typing import Final, Optional
2
2
 
3
3
  import yaml
4
4
 
5
- from fabricks.context.config import path_config, path_runtime
5
+ from fabricks.context.config import PATH_CONFIG, PATH_RUNTIME
6
+ from fabricks.context.helpers import get_runtime_path, get_storage_paths
6
7
  from fabricks.utils.path import Path
7
8
 
8
- with open(str(path_config)) as f:
9
+ with open(str(PATH_CONFIG)) as f:
9
10
  data = yaml.safe_load(f)
10
11
 
11
12
  conf: dict = [d["conf"] for d in data][0]
@@ -51,67 +52,42 @@ FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = path_options.get("storage_cr
51
52
 
52
53
  path_udfs = path_options.get("udfs", "fabricks/udfs")
53
54
  assert path_udfs, "path to udfs mandatory"
54
- PATH_UDFS: Final[Path] = path_runtime.joinpath(path_udfs)
55
+ PATH_UDFS: Final[Path] = PATH_RUNTIME.joinpath(path_udfs)
55
56
 
56
57
  path_parsers = path_options.get("parsers", "fabricks/parsers")
57
58
  assert path_parsers, "path to parsers mandatory"
58
- PATH_PARSERS: Final[Path] = path_runtime.joinpath(path_parsers)
59
+ PATH_PARSERS: Final[Path] = PATH_RUNTIME.joinpath(path_parsers)
59
60
 
60
61
  path_extenders = path_options.get("extenders", "fabricks/extenders")
61
62
  assert path_extenders, "path to extenders mandatory"
62
- PATH_EXTENDERS: Final[Path] = path_runtime.joinpath(path_extenders)
63
+ PATH_EXTENDERS: Final[Path] = PATH_RUNTIME.joinpath(path_extenders)
63
64
 
64
65
  path_views = path_options.get("views", "fabricks/views")
65
66
  assert path_views, "path to views mandatory"
66
- PATH_VIEWS: Final[Path] = path_runtime.joinpath(path_views)
67
+ PATH_VIEWS: Final[Path] = PATH_RUNTIME.joinpath(path_views)
67
68
 
68
69
  path_schedules = path_options.get("schedules", "fabricks/schedules")
69
70
  assert path_schedules, "path to schedules mandatory"
70
- PATH_SCHEDULES: Final[Path] = path_runtime.joinpath(path_schedules)
71
+ PATH_SCHEDULES: Final[Path] = PATH_RUNTIME.joinpath(path_schedules)
71
72
 
72
73
  path_requirements = path_options.get("requirements", "fabricks/requirements")
73
74
  assert path_requirements, "path to requirements mandatory"
74
- PATH_REQUIREMENTS: Final[Path] = path_runtime.joinpath(path_requirements)
75
+ PATH_REQUIREMENTS: Final[Path] = PATH_RUNTIME.joinpath(path_requirements)
75
76
 
76
77
  path_masks = path_options.get("masks", "fabricks/masks")
77
78
  assert path_masks, "path to masks mandatory"
78
- PATH_MASKS: Final[Path] = path_runtime.joinpath(path_masks)
79
-
80
-
81
- def _get_storage_paths(objects: List[dict]) -> dict:
82
- d = {}
83
- for o in objects:
84
- if o:
85
- name = o.get("name")
86
- assert name
87
- uri = o.get("path_options", {}).get("storage")
88
- assert uri
89
- d[name] = Path.from_uri(uri, regex=variables)
90
- return d
91
-
79
+ PATH_MASKS: Final[Path] = PATH_RUNTIME.joinpath(path_masks)
92
80
 
93
81
  PATHS_STORAGE: Final[dict[str, Path]] = {
94
82
  "fabricks": FABRICKS_STORAGE,
95
- **_get_storage_paths(BRONZE),
96
- **_get_storage_paths(SILVER),
97
- **_get_storage_paths(GOLD),
98
- **_get_storage_paths(databases),
83
+ **get_storage_paths(BRONZE, variables),
84
+ **get_storage_paths(SILVER, variables),
85
+ **get_storage_paths(GOLD, variables),
86
+ **get_storage_paths(databases, variables),
99
87
  }
100
88
 
101
-
102
- def _get_runtime_path(objects: List[dict]) -> dict:
103
- d = {}
104
- for o in objects:
105
- name = o.get("name")
106
- assert name
107
- uri = o.get("path_options", {}).get("runtime")
108
- assert uri
109
- d[name] = path_runtime.joinpath(uri)
110
- return d
111
-
112
-
113
89
  PATHS_RUNTIME: Final[dict[str, Path]] = {
114
- **_get_runtime_path(BRONZE),
115
- **_get_runtime_path(SILVER),
116
- **_get_runtime_path(GOLD),
90
+ **get_runtime_path(BRONZE, PATH_RUNTIME),
91
+ **get_runtime_path(SILVER, PATH_RUNTIME),
92
+ **get_runtime_path(GOLD, PATH_RUNTIME),
117
93
  }
@@ -10,7 +10,7 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_ex
10
10
 
11
11
  from fabricks.context import PATH_NOTEBOOKS
12
12
  from fabricks.core.dags.base import BaseDags
13
- from fabricks.core.dags.log import LOGGER
13
+ from fabricks.core.dags.log import LOGGER, TABLE_LOG_HANDLER
14
14
  from fabricks.core.dags.run import run
15
15
  from fabricks.core.jobs.base._types import TStep
16
16
  from fabricks.core.steps.get_step import get_step
@@ -148,7 +148,9 @@ class DagProcessor(BaseDags):
148
148
  finally:
149
149
  j["Status"] = "ok"
150
150
  self.table.upsert(j)
151
+
151
152
  LOGGER.info("end", extra=self.extra(j))
153
+ TABLE_LOG_HANDLER.flush()
152
154
 
153
155
  dependencies = self.table.query(f"PartitionKey eq 'dependencies' and ParentId eq '{j.get('JobId')}'")
154
156
  self.table.delete(dependencies)
@@ -1,8 +1,8 @@
1
- from importlib.util import module_from_spec, spec_from_file_location
2
1
  from typing import Callable
3
2
 
4
3
  from fabricks.context import IS_UNITY_CATALOG, PATH_EXTENDERS
5
4
  from fabricks.context.log import DEFAULT_LOGGER
5
+ from fabricks.utils.helpers import load_module_from_path
6
6
 
7
7
  EXTENDERS: dict[str, Callable] = {}
8
8
 
@@ -14,12 +14,7 @@ def get_extender(name: str) -> Callable:
14
14
  else:
15
15
  DEFAULT_LOGGER.debug(f"could not check if extender exists ({path.string})")
16
16
 
17
- spec = spec_from_file_location(name, path.string)
18
- assert spec, "no valid extender found in {path.string}"
19
- assert spec.loader is not None
20
-
21
- mod = module_from_spec(spec)
22
- spec.loader.exec_module(mod)
17
+ load_module_from_path(name, path)
23
18
  e = EXTENDERS[name]
24
19
 
25
20
  return e
@@ -336,10 +336,10 @@ class Generator(Configurator):
336
336
 
337
337
  comment = self.options.table.get("comment")
338
338
  if comment:
339
- self.table.add_comment(comment=comment)
339
+ self.table.add_table_comment(comment=comment)
340
340
 
341
341
  else:
342
- DEFAULT_LOGGER.debug("table exists, skip creation", extra={"label": self})
342
+ DEFAULT_LOGGER.debug("table already exists, skipped creation", extra={"label": self})
343
343
 
344
344
  def _update_schema(
345
345
  self,
@@ -389,6 +389,24 @@ class Generator(Configurator):
389
389
  def overwrite_schema(self, df: Optional[DataFrame] = None):
390
390
  self._update_schema(df=df, overwrite=True)
391
391
 
392
+ def update_comments(self, table: Optional[bool] = True, columns: Optional[bool] = True):
393
+ if self.virtual:
394
+ return
395
+
396
+ if self.persist:
397
+ self.table.drop_comments()
398
+
399
+ if table:
400
+ comment = self.options.table.get("comment")
401
+ if comment:
402
+ self.table.add_table_comment(comment=comment)
403
+
404
+ if columns:
405
+ comments = self.options.table.get_dict("comments")
406
+ if comments:
407
+ for col, comment in comments.items():
408
+ self.table.add_column_comment(column=col, comment=comment)
409
+
392
410
  def get_differences_with_deltatable(self, df: Optional[DataFrame] = None):
393
411
  if df is None:
394
412
  df = self.get_data(stream=self.stream)
@@ -1,9 +1,9 @@
1
- from importlib.util import module_from_spec, spec_from_file_location
2
1
  from typing import Optional
3
2
 
4
3
  from fabricks.context import PATH_PARSERS
5
4
  from fabricks.core.parsers._types import ParserOptions
6
5
  from fabricks.core.parsers.base import PARSERS, BaseParser
6
+ from fabricks.utils.helpers import load_module_from_path
7
7
 
8
8
 
9
9
  def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> BaseParser:
@@ -11,12 +11,7 @@ def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> Bas
11
11
  path = PATH_PARSERS.joinpath(name).append(".py")
12
12
  assert path.exists(), f"parser not found ({path})"
13
13
 
14
- spec = spec_from_file_location(name, path.string)
15
- assert spec, f"parser not found ({path})"
16
- assert spec.loader is not None
17
-
18
- mod = module_from_spec(spec)
19
- spec.loader.exec_module(mod)
14
+ load_module_from_path(name, path)
20
15
  parser = PARSERS[name](parser_options)
21
16
 
22
17
  else:
@@ -228,15 +228,22 @@ class BaseStep:
228
228
  DEFAULT_LOGGER.exception("fail to get jobs", extra={"label": self})
229
229
  raise e
230
230
 
231
- def create_db_objects(self, retry: Optional[bool] = True) -> List[Dict]:
231
+ def create_db_objects(
232
+ self,
233
+ retry: Optional[bool] = True,
234
+ update_lists: Optional[bool] = True,
235
+ incremental: Optional[bool] = False,
236
+ ) -> List[Dict]:
232
237
  DEFAULT_LOGGER.info("create db objects", extra={"label": self})
233
238
 
234
239
  df = self.get_jobs()
235
- table_df = self.database.get_tables()
236
- view_df = self.database.get_views()
237
240
 
238
- df = df.join(table_df, "job_id", how="left_anti")
239
- df = df.join(view_df, "job_id", how="left_anti")
241
+ if incremental:
242
+ table_df = self.database.get_tables()
243
+ view_df = self.database.get_views()
244
+
245
+ df = df.join(table_df, "job_id", how="left_anti")
246
+ df = df.join(view_df, "job_id", how="left_anti")
240
247
 
241
248
  if df:
242
249
  results = run_in_parallel(
@@ -248,15 +255,16 @@ class BaseStep:
248
255
  loglevel=logging.CRITICAL,
249
256
  )
250
257
 
251
- self.update_tables_list()
252
- self.update_views_list()
258
+ if update_lists:
259
+ self.update_tables_list()
260
+ self.update_views_list()
253
261
 
254
262
  errors = [res for res in results if res.get("error")]
255
263
 
256
264
  if errors:
257
265
  if retry:
258
266
  DEFAULT_LOGGER.warning("retry to create jobs", extra={"label": self})
259
- return self.create_db_objects(retry=False)
267
+ return self.create_db_objects(retry=False, update_lists=update_lists, incremental=incremental)
260
268
 
261
269
  return errors
262
270
 
fabricks/deploy/views.py CHANGED
@@ -276,6 +276,7 @@ def create_or_replace_logs_pivot_view():
276
276
  l.step,
277
277
  l.job,
278
278
  l.job_id,
279
+ -- flags
279
280
  collect_set(l.status) as statuses,
280
281
  array_contains(statuses, 'skipped') as skipped,
281
282
  array_contains(statuses, 'warned') as warned,
@@ -283,15 +284,19 @@ def create_or_replace_logs_pivot_view():
283
284
  array_contains(statuses, 'failed') or (not done and not skipped) as failed,
284
285
  not done and not failed and not skipped and array_contains(statuses, 'running') as timed_out,
285
286
  not array_contains(statuses, 'running') as cancelled,
287
+ --
286
288
  max(l.notebook_id) as notebook_id,
289
+ --
290
+ max(l.timestamp) filter (where l.status = 'running') as start_time,
291
+ max(l.timestamp) filter (where l.status in ('done', 'ok')) as end_time,
292
+ --
287
293
  max(l.timestamp) filter (where l.status = 'scheduled' ) as scheduled_time,
288
294
  max(l.timestamp) filter (where l.status = 'waiting' ) as waiting_time,
289
- max(l.timestamp) filter (where l.status = 'running') as start_time,
290
295
  max(l.timestamp) filter (where l.status = 'running' ) as running_time,
291
296
  max(l.timestamp) filter (where l.status = 'done' ) as done_time,
292
297
  max(l.timestamp) filter (where l.status = 'failed' ) as failed_time,
293
- max(l.timestamp) filter(where l.status = 'ok') as end_time,
294
- max(l.timestamp) filter(where l.status = 'ok') as ok_time,
298
+ max(l.timestamp) filter (where l.status = 'ok') as ok_time,
299
+ --
295
300
  max(l.exception) as exception
296
301
  from
297
302
  fabricks.logs l
@@ -666,7 +666,45 @@ class Table(DbObject):
666
666
  """
667
667
  )
668
668
 
669
- def add_comment(self, comment: str):
669
+ def drop_comments(self):
670
+ self.drop_table_comment()
671
+ for col in self.columns:
672
+ self.drop_column_comment(col)
673
+
674
+ def drop_table_comment(self):
675
+ assert self.registered, f"{self} not registered"
676
+
677
+ DEFAULT_LOGGER.debug("drop table comment", extra={"label": self})
678
+ self.spark.sql(
679
+ f"""
680
+ comment on table {self.qualified_name}
681
+ is null;
682
+ """
683
+ )
684
+
685
+ def drop_column_comment(self, column: str):
686
+ assert self.registered, f"{self} not registered"
687
+
688
+ DEFAULT_LOGGER.debug(f"drop comment from column {column}", extra={"label": self})
689
+ self.spark.sql(
690
+ f"""
691
+ comment on column {self.qualified_name}.`{column}`
692
+ is null;
693
+ """
694
+ )
695
+
696
+ def add_column_comment(self, column: str, comment: str):
697
+ assert self.registered, f"{self} not registered"
698
+
699
+ DEFAULT_LOGGER.debug(f"add comment '{comment}' to column {column}", extra={"label": self})
700
+ self.spark.sql(
701
+ f"""
702
+ comment on column {self.qualified_name}.`{column}`
703
+ is '{comment}';
704
+ """
705
+ )
706
+
707
+ def add_table_comment(self, comment: str):
670
708
  assert self.registered, f"{self} not registered"
671
709
 
672
710
  DEFAULT_LOGGER.debug(f"add comment '{comment}'", extra={"label": self})
@@ -1,5 +1,4 @@
1
- import time
2
- from typing import TYPE_CHECKING, List, Optional, Union
1
+ from typing import TYPE_CHECKING, Any, List, Optional, Union
3
2
 
4
3
  from azure.data.tables import TableClient, TableServiceClient
5
4
  from pyspark.sql import DataFrame
@@ -99,27 +98,29 @@ class AzureTable:
99
98
  if self._table_client is not None:
100
99
  self._table_client.close()
101
100
 
102
- def submit(self, operations: List, retry: Optional[bool] = True):
103
- try:
104
- partitions = set()
105
- for d in operations:
106
- partitions.add(d[1]["PartitionKey"])
107
-
108
- for p in partitions:
109
- _operations = [d for d in operations if d[1].get("PartitionKey") == p]
110
- t = 50
111
- if len(_operations) < t:
112
- self.table.submit_transaction(_operations)
113
- else:
114
- transactions = [_operations[i : i + t] for i in range(0, len(_operations), t)]
115
- for transaction in transactions:
116
- self.table.submit_transaction(transaction)
117
- except Exception as e:
118
- if retry:
119
- time.sleep(10)
120
- self.submit(operations, retry=False)
101
+ @retry(
102
+ stop=stop_after_attempt(3),
103
+ wait=wait_exponential(multiplier=1, min=1, max=10),
104
+ retry=retry_if_exception_type((Exception)),
105
+ reraise=True,
106
+ )
107
+ def _submit_with_retry(self, data: Any):
108
+ self.table.submit_transaction(data)
109
+
110
+ def submit(self, operations: List):
111
+ partitions = set()
112
+ for d in operations:
113
+ partitions.add(d[1]["PartitionKey"])
114
+
115
+ for p in partitions:
116
+ _operations = [d for d in operations if d[1].get("PartitionKey") == p]
117
+ t = 50
118
+ if len(_operations) < t:
119
+ self._submit_with_retry(_operations)
121
120
  else:
122
- raise e
121
+ transactions = [_operations[i : i + t] for i in range(0, len(_operations), t)]
122
+ for transaction in transactions:
123
+ self._submit_with_retry(transaction)
123
124
 
124
125
  def delete(self, data: Union[List, DataFrame, dict]):
125
126
  if isinstance(data, DataFrameLike):
fabricks/utils/helpers.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import sys
2
3
  from functools import reduce
3
4
  from queue import Queue
4
5
  from typing import Any, Callable, Iterable, List, Literal, Optional, Union
@@ -216,13 +217,28 @@ def run_notebook(path: Path, timeout: Optional[int] = None, **kwargs):
216
217
  dbutils.notebook.run(path.get_notebook_path(), timeout, {**kwargs}) # type: ignore
217
218
 
218
219
 
219
- def xxhash64(s: Any):
220
+ def xxhash64(s: Any) -> int:
220
221
  df = spark.sql(f"select xxhash64(cast('{s}' as string)) as xxhash64")
221
222
  return df.collect()[0][0]
222
223
 
223
224
 
224
- def md5(s: Any):
225
+ def md5(s: Any) -> str:
225
226
  from hashlib import md5
226
227
 
227
228
  md5 = md5(str(s).encode())
228
229
  return md5.hexdigest()
230
+
231
+
232
+ def load_module_from_path(name: str, path: Path):
233
+ from importlib.util import module_from_spec, spec_from_file_location
234
+
235
+ sys.path.append(str(path.parent))
236
+
237
+ spec = spec_from_file_location(name, path.string)
238
+ assert spec, f"no valid module found in {path.string}"
239
+ assert spec.loader is not None
240
+
241
+ textwrap_module = module_from_spec(spec)
242
+ spec.loader.exec_module(textwrap_module)
243
+
244
+ return textwrap_module
fabricks/utils/path.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import posixpath
2
3
  from pathlib import Path as PathlibPath
3
4
  from typing import List, Optional, Union
4
5
 
@@ -120,8 +121,13 @@ class Path:
120
121
  return False
121
122
 
122
123
  def joinpath(self, *other):
123
- new_path = self.pathlibpath.joinpath(*other)
124
- return Path(path=new_path, assume_git=self.assume_git)
124
+ parts = [str(o) for o in other]
125
+ base = self.string
126
+
127
+ joined = posixpath.join(base, *parts)
128
+ new = posixpath.normpath(joined)
129
+
130
+ return Path(path=new, assume_git=self.assume_git)
125
131
 
126
132
  def append(self, other: str):
127
133
  new_path = self.string + other
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fabricks
3
- Version: 3.0.11
3
+ Version: 3.0.13
4
4
  Author-email: BMS DWH Team <bi_support@bmsuisse.ch>
5
5
  Requires-Python: <4,>=3.9
6
6
  Requires-Dist: azure-data-tables<13,>=12.5.0
@@ -40,8 +40,8 @@ fabricks/cdc/base/_types.py,sha256=WloCDC3ATrn0aZJ6E8BRYKZx19N3EE56r6qlBYhcuvQ,2
40
40
  fabricks/cdc/base/cdc.py,sha256=9w5BqQxSVbFVEozJWmZQThqdppkE_SYi4fHSzJ7WMvA,78
41
41
  fabricks/cdc/base/configurator.py,sha256=w6Ywif87iv1WG-5OM3XkzIRrsns-_QQ6XlADpk0YLlw,6434
42
42
  fabricks/cdc/base/generator.py,sha256=pa_GJn7Pdi5vMnXN8zExmOPMpCqdZ3QoxHEB0wv0lsk,5933
43
- fabricks/cdc/base/merger.py,sha256=1_j-hKnKKEcbogyXX0Cm2IdyB-tpDJAIMIz1MwWnXX0,4158
44
- fabricks/cdc/base/processor.py,sha256=b8ATjVX-dW8JCrt4n3v8HlteTi1498jrItEla0BqynU,17689
43
+ fabricks/cdc/base/merger.py,sha256=3qUUs0uqmwOMdXc50kV3Zo9omuQuUUFgtMLBrg4E-wk,4159
44
+ fabricks/cdc/base/processor.py,sha256=gL3pWMaBRsc0oB93ISnH2x07WbmtM_QEIx8qrUcUoZ0,17704
45
45
  fabricks/cdc/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
46
  fabricks/cdc/templates/filter.sql.jinja,sha256=AQcOj4KRwKscVG3zepnEAN9Yxb03AM7068hqW7dtVI8,236
47
47
  fabricks/cdc/templates/merge.sql.jinja,sha256=YS9wWckCVsUI1pUYiRSFBIuQ16WU3EPWSkhZVy2niBA,221
@@ -69,15 +69,15 @@ fabricks/cdc/templates/queries/nocdc/complete.sql.jinja,sha256=cVKKCSbiuuw1K7BOz
69
69
  fabricks/cdc/templates/queries/nocdc/update.sql.jinja,sha256=mjNUwGVhZ08yUkdv9sCTkqyW60p0YavtWTqvSUVrwjA,1283
70
70
  fabricks/context/__init__.py,sha256=qfntJ9O6omzY_t6AhDP6Ndu9C5LMiVdWbo6ikhtoe7o,1446
71
71
  fabricks/context/_types.py,sha256=FzQJ35vp0uc6pAq18bc-VHwMVEWtd0VDdm8xQmNr2Sg,2681
72
+ fabricks/context/config.py,sha256=EmLUnswuWfrncaNJMDjvdMg-1lD8aneKAY8IDna7VPE,4814
73
+ fabricks/context/helpers.py,sha256=nyHF0yhTJ_cDQT5Q47z0CYh4QHOJyzyRNRBfrUYDIbU,1552
72
74
  fabricks/context/log.py,sha256=CadrRf8iL6iXlGIGIhEIswa7wGqC-E-oLwWcGTyJ10s,2074
73
- fabricks/context/runtime.py,sha256=7pXU_5gPyvChwiyxP4ch59eAgyoNOH9jMUlGWxwXWn8,3915
75
+ fabricks/context/runtime.py,sha256=87PtX6SqLoFd0PGxgisF6dLlxtCHaHxkMMIt34UyB2w,3479
74
76
  fabricks/context/secret.py,sha256=iRM-KU-JcJAEOLoGJ8S4Oh65-yt674W6CDTSkOE7SXw,3192
75
77
  fabricks/context/spark_session.py,sha256=BPaxKJXHZDI5oQiOPhmua_xjXnrVgluh--AVpvUgbck,2553
76
78
  fabricks/context/utils.py,sha256=EQRscdUhdjwk2htZu8gCgNZ9PfRzzrR6e1kRrIbVlBM,2786
77
- fabricks/context/config/__init__.py,sha256=pFEsGXBQkX5_FP0cwQMX427j6dQuTx81NR9snMxc8cU,3127
78
- fabricks/context/config/utils.py,sha256=7KCTUiSbqQnDD5mbCO9_o1KbUgD-Xbei_UGgpMQi9nE,1371
79
79
  fabricks/core/__init__.py,sha256=LaqDi4xuyHAoLOvS44PQdZdRfq9SmVr7mB6BDHyxYpc,209
80
- fabricks/core/extenders.py,sha256=39bSm9QiW4vBAyT659joE-5p_EZiNM4gi8KA3-OgX3E,917
80
+ fabricks/core/extenders.py,sha256=oJzfv0hWxusnGmrjMwbrGyKfot8xzA4XtNquPWfFgPo,727
81
81
  fabricks/core/job_schema.py,sha256=6-70oy0ZJd3V9AiXfc0Q8b8NVEynxQza_h7mB13uB-s,853
82
82
  fabricks/core/masks.py,sha256=3UCxcCi-TgFHB7xT5ZvmEa8RMKev23X_JLE70Pr3rpY,1347
83
83
  fabricks/core/udfs.py,sha256=gu7K-ohxcO0TdgA7IjzVMOZatZQYhFTklHo60a6k_Yc,2960
@@ -86,7 +86,7 @@ fabricks/core/dags/__init__.py,sha256=0DUKzVcXcROvxkN19P_kaOJ7da5BAM7Vt8EGQbp2KS
86
86
  fabricks/core/dags/base.py,sha256=tFj27SqeZUZ7pB_LOWkpdowZz5gj30JUANI4gWK3Pl8,3139
87
87
  fabricks/core/dags/generator.py,sha256=4fp_CRsWnl_UauM9Jx-E4UCaxnm2_Q5103J58fRws2U,4832
88
88
  fabricks/core/dags/log.py,sha256=v1xfpQGfddHDz9lflvXOWTXMde3CdERo9jzeSmNDRhY,402
89
- fabricks/core/dags/processor.py,sha256=dcEWk0y2fuNt7RfxkeCny5axdg5aE4UpD7QwEijcQWM,7864
89
+ fabricks/core/dags/processor.py,sha256=IzjqrpNu6lTYp-Rl2T_8Sb5N0pSk9BhdP9vuL4sBRMg,7930
90
90
  fabricks/core/dags/run.py,sha256=RIDULb9WakObSyYzmkglh8RwFRwC8-NFC-1yPDMkBC0,1074
91
91
  fabricks/core/dags/terminator.py,sha256=Y6pV2UnSyrCIx2AQWJXoHk5Roq12gZqpennHx_Lbnzs,793
92
92
  fabricks/core/dags/utils.py,sha256=4kyobLGl4tO0Flo6LxNzYjCU_G42vns1LrkxTO5_KLY,1585
@@ -105,7 +105,7 @@ fabricks/core/jobs/base/_types.py,sha256=y66BtJlJskq7wGzn7te5XYjO-NEqeQGUC11kkbe
105
105
  fabricks/core/jobs/base/checker.py,sha256=Cdfh8rQYy4MvMFl0HyC3alGUWm8zrFXk08m2t2JMu6Y,5477
106
106
  fabricks/core/jobs/base/configurator.py,sha256=9G5F7Qg5FWHPbHgdh8Qxc85OoSX0rnjD4c9itwU5KKc,10415
107
107
  fabricks/core/jobs/base/exception.py,sha256=HrdxEuOfK5rY-ItZvEL3iywLgdpYUpmWFkjjjks7oYc,2318
108
- fabricks/core/jobs/base/generator.py,sha256=NlJgR1461rx4bOBfvpmBrS6PvNeHD6QH6FOqAZiXDvM,16987
108
+ fabricks/core/jobs/base/generator.py,sha256=Dk82tj21NhR9wwgXzMp8JlKQ6D9HnjVlK9fvDmoYLbk,17646
109
109
  fabricks/core/jobs/base/invoker.py,sha256=FvjfpNqi542slxC2yLu1BIu5EklNUWySxDF8cD_SqKQ,7602
110
110
  fabricks/core/jobs/base/job.py,sha256=dWmk2PpQH2NETaaDS6KoiefRnDHfDMdCyhmogkdcSFI,93
111
111
  fabricks/core/jobs/base/processor.py,sha256=qkNiJSSLaEnivKGBcd9UZyIVFexnv-n1p_5mCZIy1rA,9076
@@ -113,7 +113,7 @@ fabricks/core/parsers/__init__.py,sha256=TGjyUeiiTkJrAxIpu2D_c2rQcbe5YRpmBW9oh0F
113
113
  fabricks/core/parsers/_types.py,sha256=JC2Oh-wUvaX8SBzeuf5owPgRaj-Q3-7MXxyIYPQ7QwA,147
114
114
  fabricks/core/parsers/base.py,sha256=P8IrLQKGakwaAQ-4gf4vElVwWoSpkixYd9kNthu1VDM,3292
115
115
  fabricks/core/parsers/decorator.py,sha256=kn_Mj-JLWTFaRiciZ3KavmSUcWFPY3ve-buMruHrX_Q,307
116
- fabricks/core/parsers/get_parser.py,sha256=TTnVPwKqKpFu6jJJnXEuiEctWGtimk8w2p1jF2U7ibg,909
116
+ fabricks/core/parsers/get_parser.py,sha256=mauofS626h9wpPZtlZFqIb1jcKM3Jz4D_36uvd-Lv4k,717
117
117
  fabricks/core/parsers/utils.py,sha256=qdn2ElpqBgDsW55-tACWZaFOT0ebrBYg2fenqSgd6YI,2456
118
118
  fabricks/core/schedules/__init__.py,sha256=bDjNMcm7itimAo4gun0W4W9bZKwZmWUjkMqAQIcqI2Y,431
119
119
  fabricks/core/schedules/diagrams.py,sha256=YA4T7Etl_UPfW-3IGFq5Xj9OlXZGQ27Aot6RVa3ZUgg,578
@@ -126,7 +126,7 @@ fabricks/core/schedules/terminate.py,sha256=-RvtOrxTOZl2sZQ6KfNHJL6H2LCAEMSVRyyl
126
126
  fabricks/core/schedules/views.py,sha256=8hYwPLCvvN-nem2lNAKvUY5hC71v88z4-y8j0poUApM,1949
127
127
  fabricks/core/steps/__init__.py,sha256=JP-kaDa890-9XqBSPp6YdssAexdxv-MqQ__WfVYdgeg,132
128
128
  fabricks/core/steps/_types.py,sha256=VxIrH3nFwmPlwG-UI8sDDP0AwK_9jlsy6yQp6YfgtqE,90
129
- fabricks/core/steps/base.py,sha256=UTzVqdWaho9zgMrloq8ndqcWQ9R5Z-O1SInIrm9byB4,14403
129
+ fabricks/core/steps/base.py,sha256=MJe2q9s1siM89YkpHDqldtbtKQgkhDB_cFa2-e_irvs,14642
130
130
  fabricks/core/steps/get_step.py,sha256=8q4rEDdTTZNJsXB2l5XY-Ktoow8ZHsON_tx5yKMUIzg,284
131
131
  fabricks/core/steps/get_step_conf.py,sha256=UPT3gB1Sh5yzawZ9qiVQlvVAKaxPX82gaWBDzxx75EM,633
132
132
  fabricks/deploy/__init__.py,sha256=ntxtFnzeho_WneVoL5CCqbI4rKApKgdmi9j0HKb0LJc,2375
@@ -136,26 +136,26 @@ fabricks/deploy/schedules.py,sha256=0a5dU1rW6fg8aAp7TTt-l0DgR-4kmzsX2xxV2C30yaw,
136
136
  fabricks/deploy/tables.py,sha256=IF822oxOCy12r08Dz54YUK5luud6dtTPxJ4TUIHE-No,2621
137
137
  fabricks/deploy/udfs.py,sha256=7fw3O5LgOOxDEhuS3s1yFdqybgFh65r_1IdfZUYeejs,597
138
138
  fabricks/deploy/utils.py,sha256=V41r1zVT9KcsICqTLAzpb4ixRk2q2ybJMrGhkPOtG6k,5099
139
- fabricks/deploy/views.py,sha256=Ket511q0v8QHgkNArb4qVPFDuuQnQn3v-fvr7KO6oxM,14360
139
+ fabricks/deploy/views.py,sha256=8cSt6IzZy-JHHkyqd91NT2hi3LTNTOolimlfSBXMCvU,14434
140
140
  fabricks/metastore/README.md,sha256=utPUGAxmjyNMGe43GfL0Gup4MjeTKKwyiUoNVSfMquI,51
141
141
  fabricks/metastore/__init__.py,sha256=RhjY2CuqtZBg8fEizzzvW8qszqCM-vSCL1tQGuzoato,174
142
142
  fabricks/metastore/_types.py,sha256=NXYxwQHP0sCllM0N6QBbaK4CdtM_m_rHFDxRNRfBcLU,1919
143
143
  fabricks/metastore/database.py,sha256=23VAKKzjrwlEaj28DNNmiOhcfdKRzYk8eEfq-PzINbg,1924
144
144
  fabricks/metastore/dbobject.py,sha256=ve8p48OqEpJYsqWNhgesGSE0emM--uY8QrvBRoR3j3g,1881
145
145
  fabricks/metastore/pyproject.toml,sha256=6RZM9RMKMDF_EAequhORZ7TD0BQNk7aBCTWAv-sRcp0,519
146
- fabricks/metastore/table.py,sha256=mos701lEU_EwjdwwGb2ey-RSebkg8Ubms2VcOJvIARA,27937
146
+ fabricks/metastore/table.py,sha256=AaoNL-1mz4A0CCb3tH_0BUurYPjA1oL5pioCYlEMtu4,29113
147
147
  fabricks/metastore/utils.py,sha256=8SxhjDkz_aSH4IGUusel7hqOQxP9U8PNBCY0M7GH00Y,1355
148
148
  fabricks/metastore/view.py,sha256=f7hKJWtnH1KmZym8dkoucKOTndntzai_f2YqferxHLs,1431
149
149
  fabricks/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
150
150
  fabricks/utils/_types.py,sha256=AuOhknlozqx5QdAdvZSA6xAWhU8k4nxG1vxIkOVgHeY,184
151
151
  fabricks/utils/azure_queue.py,sha256=wtKAq_MD5QLxelerDO475dzL-SySIrxt9d5KGi-8vvw,3102
152
- fabricks/utils/azure_table.py,sha256=7tCD1iM7UWREaSQVVmtgHCAebLtWPP9ZmuU5zDALyo0,5305
152
+ fabricks/utils/azure_table.py,sha256=J_UAPiCN89rL5FNmwIGg43Z6FSH8evVIWijDEgYJxk4,5294
153
153
  fabricks/utils/console.py,sha256=X4lLgL_UxCjoFRx-ZRCwzdBveRGPKlFYZDi6vl7uevQ,1017
154
154
  fabricks/utils/fdict.py,sha256=cdnvNBSXKJIDKSdhQGJA4CGv0qLn5IVYKQ111l7nM9I,7978
155
- fabricks/utils/helpers.py,sha256=h7SuOVpBP5qcgX1nM1suvkXG9BhiK5-257EBepCvrO8,7452
155
+ fabricks/utils/helpers.py,sha256=fKv6mpT-428xTSjdLfm7TnN1Xo9FadrSIY1qzYgWCzs,7909
156
156
  fabricks/utils/log.py,sha256=LCQEM81PhdojiyLrtEzv1QM__bWbaEhGddyd0IqyGXM,7985
157
157
  fabricks/utils/mermaid.py,sha256=XoiVxPaUJS4TC_ybA-e78qFzQkQ46uPf055JiiNDdSg,986
158
- fabricks/utils/path.py,sha256=Bs3PayWtg62-mrsDbvu8kh0VLZZhX7tU9YiyHFiYNhs,6698
158
+ fabricks/utils/path.py,sha256=ToTTS8QKGsWq8cR8SDE2ocWKx2GsuZ5psPTuAMq9R4s,6813
159
159
  fabricks/utils/pip.py,sha256=UHo7NTjFGJNghWBuuDow28xUkZYg2YrlbAP49IxZyXY,1522
160
160
  fabricks/utils/pydantic.py,sha256=W0fiDLVMFrrInfQw2s5YPeSEvkN-4k864u3UyPoHaz4,2516
161
161
  fabricks/utils/spark.py,sha256=QWVpbGwOvURIVBlR7ygt6NQ9MHUsIDvlquJ65iI8UBI,2007
@@ -171,6 +171,6 @@ fabricks/utils/schema/get_schema_for_type.py,sha256=5k-R6zCgUAtapQgxT4turcx1IQ-b
171
171
  fabricks/utils/write/__init__.py,sha256=i0UnZenXj9Aq0b0_aU3s6882vg-Vu_AyKfQhl_dTp-g,200
172
172
  fabricks/utils/write/delta.py,sha256=lTQ0CfUhcvn3xTCcT_Ns6PMDBsO5UEfa2S9XpJiLJ9c,1250
173
173
  fabricks/utils/write/stream.py,sha256=wQBpAnQtYA6nl79sPKhVM6u5m-66suX7B6VQ6tW4TOs,622
174
- fabricks-3.0.11.dist-info/METADATA,sha256=lDGJheQcvE7Psjm6fnUdvH9vMwHuag9sEWlcNNUFzf4,798
175
- fabricks-3.0.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
176
- fabricks-3.0.11.dist-info/RECORD,,
174
+ fabricks-3.0.13.dist-info/METADATA,sha256=H7Yv6dwfPTscFWMQWayOB7t_eURWWYCGxPGzWsV71KQ,798
175
+ fabricks-3.0.13.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
176
+ fabricks-3.0.13.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,92 +0,0 @@
1
- import logging
2
- import os
3
- from typing import Final
4
-
5
- from fabricks.context.config.utils import get_config_from_file
6
- from fabricks.utils.path import Path
7
- from fabricks.utils.spark import spark
8
-
9
- file_path, file_config = get_config_from_file()
10
-
11
- runtime = os.environ.get("FABRICKS_RUNTIME", "none")
12
- runtime = None if runtime.lower() == "none" else runtime
13
- if runtime is None:
14
- if runtime := file_config.get("runtime"):
15
- assert file_path is not None
16
- runtime = file_path.joinpath(runtime)
17
-
18
- if runtime is None:
19
- if file_path is not None:
20
- runtime = file_path
21
- else:
22
- raise ValueError(
23
- "could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
24
- )
25
-
26
- path_runtime = Path(runtime, assume_git=True)
27
- PATH_RUNTIME: Final[Path] = path_runtime
28
-
29
- notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
30
- notebooks = None if notebooks.lower() == "none" else notebooks
31
- if notebooks is None:
32
- if notebooks := file_config.get("notebooks"):
33
- assert file_path is not None
34
- notebooks = file_path.joinpath(notebooks)
35
-
36
- notebooks = notebooks if notebooks else path_runtime.joinpath("notebooks")
37
- PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
38
-
39
- is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
40
- if is_job_config_from_yaml is None:
41
- assert file_path is not None
42
- is_job_config_from_yaml = file_config.get("job_config_from_yaml")
43
-
44
- IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
45
-
46
- is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
47
- if is_debugmode is None:
48
- is_debugmode = file_config.get("debugmode")
49
-
50
- IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
51
-
52
- is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
53
- if is_devmode is None:
54
- is_devmode = file_config.get("devmode")
55
-
56
- IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
57
-
58
- loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
59
- if loglevel is None:
60
- loglevel = file_config.get("loglevel")
61
-
62
- loglevel = loglevel.upper() if loglevel else "INFO"
63
- if loglevel == "DEBUG":
64
- _loglevel = logging.DEBUG
65
- elif loglevel == "INFO":
66
- _loglevel = logging.INFO
67
- elif loglevel == "WARNING":
68
- _loglevel = logging.WARNING
69
- elif loglevel == "ERROR":
70
- _loglevel = logging.ERROR
71
- elif loglevel == "CRITICAL":
72
- _loglevel = logging.CRITICAL
73
- else:
74
- raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
75
-
76
- LOGLEVEL = _loglevel
77
-
78
- path_config = os.environ.get("FABRICKS_CONFIG")
79
- if path_config is None:
80
- if path_config := file_config.get("config"):
81
- assert file_path is not None
82
- path_config = file_path.joinpath(path_config)
83
- else:
84
- path_config = PATH_RUNTIME.joinpath(path_config).string if path_config else None
85
-
86
- if not path_config:
87
- path_config = PATH_RUNTIME.joinpath(
88
- "fabricks",
89
- f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
90
- ).string
91
-
92
- PATH_CONFIG: Final[Path] = Path(path_config, assume_git=True)
@@ -1,53 +0,0 @@
1
- def get_config_from_toml():
2
- import os
3
- import pathlib
4
- import sys
5
-
6
- if sys.version_info >= (3, 11):
7
- import tomllib
8
- else:
9
- import tomli as tomllib # type: ignore
10
-
11
- path = pathlib.Path(os.getcwd())
12
- while path is not None and not (path / "pyproject.toml").exists():
13
- if path == path.parent:
14
- break
15
- path = path.parent
16
-
17
- if (path / "pyproject.toml").exists():
18
- with open((path / "pyproject.toml"), "rb") as f:
19
- config = tomllib.load(f)
20
- return path, config.get("tool", {}).get("fabricks", {})
21
-
22
- return None, {}
23
-
24
-
25
- def get_config_from_json():
26
- import json
27
- import os
28
- import pathlib
29
-
30
- path = pathlib.Path(os.getcwd())
31
- while path is not None and not (path / "fabricksconfig.json").exists():
32
- if path == path.parent:
33
- break
34
- path = path.parent
35
-
36
- if (path / "fabricksconfig.json").exists():
37
- with open((path / "fabricksconfig.json"), "r") as f:
38
- config = json.load(f)
39
- return path, config
40
-
41
- return None, {}
42
-
43
-
44
- def get_config_from_file():
45
- json_path, json_config = get_config_from_json()
46
- if json_config:
47
- return json_path, json_config
48
-
49
- pyproject_path, pyproject_config = get_config_from_toml()
50
- if pyproject_config:
51
- return pyproject_path, pyproject_config
52
-
53
- return None, {}