fabricks 3.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/__init__.py +0 -0
- fabricks/api/__init__.py +11 -0
- fabricks/api/cdc/__init__.py +6 -0
- fabricks/api/cdc/nocdc.py +3 -0
- fabricks/api/cdc/scd1.py +3 -0
- fabricks/api/cdc/scd2.py +3 -0
- fabricks/api/context.py +27 -0
- fabricks/api/core.py +4 -0
- fabricks/api/deploy.py +3 -0
- fabricks/api/exceptions.py +19 -0
- fabricks/api/extenders.py +3 -0
- fabricks/api/job_schema.py +3 -0
- fabricks/api/log.py +3 -0
- fabricks/api/masks.py +3 -0
- fabricks/api/metastore/__init__.py +10 -0
- fabricks/api/metastore/database.py +3 -0
- fabricks/api/metastore/table.py +3 -0
- fabricks/api/metastore/view.py +6 -0
- fabricks/api/notebooks/__init__.py +0 -0
- fabricks/api/notebooks/cluster.py +6 -0
- fabricks/api/notebooks/initialize.py +42 -0
- fabricks/api/notebooks/process.py +54 -0
- fabricks/api/notebooks/run.py +59 -0
- fabricks/api/notebooks/schedule.py +75 -0
- fabricks/api/notebooks/terminate.py +31 -0
- fabricks/api/parsers.py +3 -0
- fabricks/api/schedules.py +3 -0
- fabricks/api/udfs.py +3 -0
- fabricks/api/utils.py +9 -0
- fabricks/api/version.py +3 -0
- fabricks/api/views.py +6 -0
- fabricks/cdc/__init__.py +14 -0
- fabricks/cdc/base/__init__.py +4 -0
- fabricks/cdc/base/_types.py +10 -0
- fabricks/cdc/base/cdc.py +5 -0
- fabricks/cdc/base/configurator.py +223 -0
- fabricks/cdc/base/generator.py +177 -0
- fabricks/cdc/base/merger.py +110 -0
- fabricks/cdc/base/processor.py +471 -0
- fabricks/cdc/cdc.py +5 -0
- fabricks/cdc/nocdc.py +20 -0
- fabricks/cdc/scd.py +22 -0
- fabricks/cdc/scd1.py +15 -0
- fabricks/cdc/scd2.py +15 -0
- fabricks/cdc/templates/__init__.py +0 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
- fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
- fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
- fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
- fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
- fabricks/cdc/templates/filter.sql.jinja +4 -0
- fabricks/cdc/templates/filters/final.sql.jinja +4 -0
- fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
- fabricks/cdc/templates/filters/update.sql.jinja +30 -0
- fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
- fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
- fabricks/cdc/templates/merge.sql.jinja +3 -0
- fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
- fabricks/cdc/templates/queries/__init__.py +0 -0
- fabricks/cdc/templates/queries/context.sql.jinja +186 -0
- fabricks/cdc/templates/queries/final.sql.jinja +1 -0
- fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
- fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
- fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
- fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
- fabricks/cdc/templates/query.sql.jinja +15 -0
- fabricks/context/__init__.py +72 -0
- fabricks/context/_types.py +133 -0
- fabricks/context/config/__init__.py +92 -0
- fabricks/context/config/utils.py +53 -0
- fabricks/context/log.py +77 -0
- fabricks/context/runtime.py +117 -0
- fabricks/context/secret.py +103 -0
- fabricks/context/spark_session.py +82 -0
- fabricks/context/utils.py +80 -0
- fabricks/core/__init__.py +4 -0
- fabricks/core/dags/__init__.py +9 -0
- fabricks/core/dags/base.py +99 -0
- fabricks/core/dags/generator.py +157 -0
- fabricks/core/dags/log.py +12 -0
- fabricks/core/dags/processor.py +228 -0
- fabricks/core/dags/run.py +39 -0
- fabricks/core/dags/terminator.py +25 -0
- fabricks/core/dags/utils.py +54 -0
- fabricks/core/extenders.py +33 -0
- fabricks/core/job_schema.py +32 -0
- fabricks/core/jobs/__init__.py +21 -0
- fabricks/core/jobs/base/__init__.py +10 -0
- fabricks/core/jobs/base/_types.py +284 -0
- fabricks/core/jobs/base/checker.py +139 -0
- fabricks/core/jobs/base/configurator.py +306 -0
- fabricks/core/jobs/base/exception.py +85 -0
- fabricks/core/jobs/base/generator.py +447 -0
- fabricks/core/jobs/base/invoker.py +206 -0
- fabricks/core/jobs/base/job.py +5 -0
- fabricks/core/jobs/base/processor.py +249 -0
- fabricks/core/jobs/bronze.py +395 -0
- fabricks/core/jobs/get_job.py +127 -0
- fabricks/core/jobs/get_job_conf.py +152 -0
- fabricks/core/jobs/get_job_id.py +31 -0
- fabricks/core/jobs/get_jobs.py +107 -0
- fabricks/core/jobs/get_schedule.py +10 -0
- fabricks/core/jobs/get_schedules.py +32 -0
- fabricks/core/jobs/gold.py +415 -0
- fabricks/core/jobs/silver.py +373 -0
- fabricks/core/masks.py +52 -0
- fabricks/core/parsers/__init__.py +12 -0
- fabricks/core/parsers/_types.py +6 -0
- fabricks/core/parsers/base.py +95 -0
- fabricks/core/parsers/decorator.py +11 -0
- fabricks/core/parsers/get_parser.py +26 -0
- fabricks/core/parsers/utils.py +69 -0
- fabricks/core/schedules/__init__.py +14 -0
- fabricks/core/schedules/diagrams.py +21 -0
- fabricks/core/schedules/generate.py +20 -0
- fabricks/core/schedules/get_schedule.py +5 -0
- fabricks/core/schedules/get_schedules.py +9 -0
- fabricks/core/schedules/process.py +9 -0
- fabricks/core/schedules/run.py +3 -0
- fabricks/core/schedules/terminate.py +6 -0
- fabricks/core/schedules/views.py +61 -0
- fabricks/core/steps/__init__.py +4 -0
- fabricks/core/steps/_types.py +7 -0
- fabricks/core/steps/base.py +423 -0
- fabricks/core/steps/get_step.py +10 -0
- fabricks/core/steps/get_step_conf.py +26 -0
- fabricks/core/udfs.py +106 -0
- fabricks/core/views.py +41 -0
- fabricks/deploy/__init__.py +92 -0
- fabricks/deploy/masks.py +8 -0
- fabricks/deploy/notebooks.py +71 -0
- fabricks/deploy/schedules.py +10 -0
- fabricks/deploy/tables.py +82 -0
- fabricks/deploy/udfs.py +19 -0
- fabricks/deploy/utils.py +36 -0
- fabricks/deploy/views.py +509 -0
- fabricks/metastore/README.md +3 -0
- fabricks/metastore/__init__.py +5 -0
- fabricks/metastore/_types.py +65 -0
- fabricks/metastore/database.py +65 -0
- fabricks/metastore/dbobject.py +66 -0
- fabricks/metastore/pyproject.toml +20 -0
- fabricks/metastore/table.py +768 -0
- fabricks/metastore/utils.py +51 -0
- fabricks/metastore/view.py +53 -0
- fabricks/utils/__init__.py +0 -0
- fabricks/utils/_types.py +6 -0
- fabricks/utils/azure_queue.py +93 -0
- fabricks/utils/azure_table.py +154 -0
- fabricks/utils/console.py +51 -0
- fabricks/utils/fdict.py +240 -0
- fabricks/utils/helpers.py +228 -0
- fabricks/utils/log.py +236 -0
- fabricks/utils/mermaid.py +32 -0
- fabricks/utils/path.py +242 -0
- fabricks/utils/pip.py +61 -0
- fabricks/utils/pydantic.py +94 -0
- fabricks/utils/read/__init__.py +11 -0
- fabricks/utils/read/_types.py +3 -0
- fabricks/utils/read/read.py +305 -0
- fabricks/utils/read/read_excel.py +5 -0
- fabricks/utils/read/read_yaml.py +33 -0
- fabricks/utils/schema/__init__.py +7 -0
- fabricks/utils/schema/get_json_schema_for_type.py +161 -0
- fabricks/utils/schema/get_schema_for_type.py +99 -0
- fabricks/utils/spark.py +76 -0
- fabricks/utils/sqlglot.py +56 -0
- fabricks/utils/write/__init__.py +8 -0
- fabricks/utils/write/delta.py +46 -0
- fabricks/utils/write/stream.py +27 -0
- fabricks-3.0.11.dist-info/METADATA +23 -0
- fabricks-3.0.11.dist-info/RECORD +176 -0
- fabricks-3.0.11.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from pyspark.errors.exceptions.base import AnalysisException
|
|
2
|
+
from pyspark.sql import DataFrame
|
|
3
|
+
|
|
4
|
+
from fabricks.context import SPARK
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_tables(schema: str) -> DataFrame:
|
|
8
|
+
table_df = SPARK.sql(f"show tables in {schema}")
|
|
9
|
+
view_df = SPARK.sql(f"show views in {schema}")
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
df = SPARK.sql(
|
|
13
|
+
"""
|
|
14
|
+
select
|
|
15
|
+
database,
|
|
16
|
+
concat_ws('.', database, tableName) as table,
|
|
17
|
+
md5(table) as job_id
|
|
18
|
+
from
|
|
19
|
+
{tables}
|
|
20
|
+
left anti join {views} on tableName = viewName
|
|
21
|
+
""",
|
|
22
|
+
tables=table_df,
|
|
23
|
+
views=view_df,
|
|
24
|
+
)
|
|
25
|
+
return df
|
|
26
|
+
|
|
27
|
+
except AnalysisException:
|
|
28
|
+
return SPARK.sql("select null::string as database, null::string as table")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_views(schema: str) -> DataFrame:
|
|
32
|
+
view_df = SPARK.sql(f"show views in {schema}")
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
df = SPARK.sql(
|
|
36
|
+
"""
|
|
37
|
+
select
|
|
38
|
+
namespace as database,
|
|
39
|
+
concat_ws('.', namespace, viewName) as view,
|
|
40
|
+
md5(view) as job_id
|
|
41
|
+
from
|
|
42
|
+
{views}
|
|
43
|
+
where
|
|
44
|
+
not isTemporary
|
|
45
|
+
""",
|
|
46
|
+
views=view_df,
|
|
47
|
+
)
|
|
48
|
+
return df
|
|
49
|
+
|
|
50
|
+
except AnalysisException:
|
|
51
|
+
return SPARK.sql("select null::string as database, null::string as view")
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Any, Optional, Union
|
|
2
|
+
from uuid import uuid4
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from pyspark.sql import DataFrame, SparkSession
|
|
6
|
+
|
|
7
|
+
from fabricks.context import SPARK
|
|
8
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
9
|
+
from fabricks.metastore.dbobject import DbObject
|
|
10
|
+
from fabricks.utils._types import DataFrameLike
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class View(DbObject):
|
|
14
|
+
@staticmethod
|
|
15
|
+
def create_or_replace(
|
|
16
|
+
df: Union[DataFrame, pd.DataFrame],
|
|
17
|
+
*dependencies,
|
|
18
|
+
spark: Optional[SparkSession] = None,
|
|
19
|
+
) -> str:
|
|
20
|
+
if spark is None:
|
|
21
|
+
if isinstance(df, DataFrameLike):
|
|
22
|
+
spark = df.sparkSession
|
|
23
|
+
else:
|
|
24
|
+
spark = SPARK
|
|
25
|
+
|
|
26
|
+
assert spark is not None
|
|
27
|
+
|
|
28
|
+
uuid = str(uuid4().hex)
|
|
29
|
+
df = spark.createDataFrame(df) if isinstance(df, pd.DataFrame) else df
|
|
30
|
+
if dependencies:
|
|
31
|
+
for d in dependencies:
|
|
32
|
+
df = df.join(d.where("1 == 2"), how="leftanti")
|
|
33
|
+
|
|
34
|
+
df.createOrReplaceGlobalTempView(uuid)
|
|
35
|
+
return uuid
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_or_replace_global_temp_view(
|
|
39
|
+
name: str,
|
|
40
|
+
df: DataFrame,
|
|
41
|
+
uuid: Optional[bool] = False,
|
|
42
|
+
job: Optional[Any] = None,
|
|
43
|
+
) -> str:
|
|
44
|
+
if uuid:
|
|
45
|
+
name = f"{name}__{str(uuid4().hex)}"
|
|
46
|
+
|
|
47
|
+
if job is None:
|
|
48
|
+
job = name.split("__")[0]
|
|
49
|
+
|
|
50
|
+
DEFAULT_LOGGER.debug(f"create global temp view {name}", extra={"label": job})
|
|
51
|
+
df.createOrReplaceGlobalTempView(name)
|
|
52
|
+
|
|
53
|
+
return f"global_temp.{name}"
|
|
File without changes
|
fabricks/utils/_types.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
3
|
+
|
|
4
|
+
from azure.core.exceptions import ResourceExistsError
|
|
5
|
+
from azure.storage.queue import QueueClient
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from azure.core.credentials import TokenCredential
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AzureQueue:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
name: str,
|
|
15
|
+
storage_account: Optional[str] = None,
|
|
16
|
+
access_key: Optional[str] = None,
|
|
17
|
+
connection_string: Optional[str] = None,
|
|
18
|
+
credential: "Optional[TokenCredential]" = None,
|
|
19
|
+
):
|
|
20
|
+
self.name = name
|
|
21
|
+
self.storage_account = storage_account
|
|
22
|
+
if connection_string is None:
|
|
23
|
+
assert storage_account
|
|
24
|
+
assert access_key or credential, "Either access_key or credential must be provided"
|
|
25
|
+
self.storage_account = storage_account
|
|
26
|
+
self.access_key = access_key
|
|
27
|
+
self.credential = credential
|
|
28
|
+
connection_string = (
|
|
29
|
+
f"DefaultEndpointsProtocol=https;AccountName={self.storage_account};AccountKey={self.access_key};EndpointSuffix=core.windows.net"
|
|
30
|
+
if access_key
|
|
31
|
+
else None
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
assert connection_string
|
|
35
|
+
self.connection_string = connection_string
|
|
36
|
+
self._queue_client = None
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def queue_client(self) -> QueueClient:
|
|
40
|
+
if not self._queue_client:
|
|
41
|
+
if self.connection_string is not None:
|
|
42
|
+
self._queue_client = QueueClient.from_connection_string(self.connection_string, queue_name=self.name)
|
|
43
|
+
else:
|
|
44
|
+
assert self.storage_account and (self.access_key or self.credential), (
|
|
45
|
+
"Either access_key or credential must be provided"
|
|
46
|
+
)
|
|
47
|
+
self._queue_client = QueueClient(
|
|
48
|
+
account_url=f"https://{self.storage_account}.queue.core.windows.net",
|
|
49
|
+
queue_name=self.name,
|
|
50
|
+
credential=self.access_key if self.access_key else self.credential,
|
|
51
|
+
)
|
|
52
|
+
return self._queue_client
|
|
53
|
+
|
|
54
|
+
def create_if_not_exists(self):
|
|
55
|
+
try:
|
|
56
|
+
self.queue_client.create_queue()
|
|
57
|
+
except ResourceExistsError:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def sentinel(self):
|
|
62
|
+
return "SENTINEL"
|
|
63
|
+
|
|
64
|
+
def clear(self):
|
|
65
|
+
self.queue_client.clear_messages()
|
|
66
|
+
|
|
67
|
+
def send(self, message: Union[str, dict]):
|
|
68
|
+
if isinstance(message, dict):
|
|
69
|
+
message = json.dumps(message)
|
|
70
|
+
# print("sending ->", message)
|
|
71
|
+
self.queue_client.send_message(message)
|
|
72
|
+
|
|
73
|
+
def send_sentinel(self):
|
|
74
|
+
# print("sentinel", self.sentinel)
|
|
75
|
+
self.send(self.sentinel)
|
|
76
|
+
|
|
77
|
+
def receive(self):
|
|
78
|
+
msg = self.queue_client.receive_message()
|
|
79
|
+
if msg:
|
|
80
|
+
self.queue_client.delete_message(msg)
|
|
81
|
+
# print("receiving ->", msg.content)
|
|
82
|
+
return msg.content
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
def delete(self):
|
|
86
|
+
self.queue_client.delete_queue()
|
|
87
|
+
|
|
88
|
+
def __enter__(self):
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __exit__(self, *args, **kwargs):
|
|
92
|
+
if self._queue_client is not None:
|
|
93
|
+
self._queue_client.close()
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import TYPE_CHECKING, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
from azure.data.tables import TableClient, TableServiceClient
|
|
5
|
+
from pyspark.sql import DataFrame
|
|
6
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
7
|
+
|
|
8
|
+
from fabricks.utils._types import DataFrameLike
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from azure.core.credentials import TokenCredential
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AzureTable:
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
name: str,
|
|
18
|
+
storage_account: Optional[str] = None,
|
|
19
|
+
access_key: Optional[str] = None,
|
|
20
|
+
connection_string: Optional[str] = None,
|
|
21
|
+
credential: "Optional[TokenCredential]" = None,
|
|
22
|
+
):
|
|
23
|
+
self.name = name
|
|
24
|
+
|
|
25
|
+
if connection_string is None:
|
|
26
|
+
assert storage_account, "storage_account must be provided if connection_string is not set"
|
|
27
|
+
assert access_key or credential, "Either access_key or credential must be provided"
|
|
28
|
+
self.storage_account = storage_account
|
|
29
|
+
self.access_key = access_key
|
|
30
|
+
self.credential = credential
|
|
31
|
+
self.storage_account = storage_account
|
|
32
|
+
|
|
33
|
+
connection_string = (
|
|
34
|
+
f"DefaultEndpointsProtocol=https;AccountName={self.storage_account};AccountKey={self.access_key};EndpointSuffix=core.windows.net"
|
|
35
|
+
if access_key
|
|
36
|
+
else None
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
assert connection_string
|
|
40
|
+
self.connection_string = connection_string
|
|
41
|
+
|
|
42
|
+
self._table_client = None
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def table_service_client(self) -> TableServiceClient:
|
|
46
|
+
if not self._table_client:
|
|
47
|
+
if self.connection_string is None:
|
|
48
|
+
return TableServiceClient(
|
|
49
|
+
endpoint=f"https://{self.storage_account}.table.core.windows.net",
|
|
50
|
+
credential=self.credential,
|
|
51
|
+
)
|
|
52
|
+
self._table_client = TableServiceClient.from_connection_string(self.connection_string)
|
|
53
|
+
return self._table_client
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def table(self) -> TableClient:
|
|
57
|
+
return self.create_if_not_exists()
|
|
58
|
+
|
|
59
|
+
@retry(
|
|
60
|
+
stop=stop_after_attempt(3),
|
|
61
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
62
|
+
retry=retry_if_exception_type((Exception)),
|
|
63
|
+
reraise=True,
|
|
64
|
+
)
|
|
65
|
+
def create_if_not_exists(self) -> TableClient:
|
|
66
|
+
return self.table_service_client.create_table_if_not_exists(table_name=self.name)
|
|
67
|
+
|
|
68
|
+
@retry(
|
|
69
|
+
stop=stop_after_attempt(3),
|
|
70
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
71
|
+
retry=retry_if_exception_type((Exception)),
|
|
72
|
+
reraise=True,
|
|
73
|
+
)
|
|
74
|
+
def drop(self):
|
|
75
|
+
self.table_service_client.delete_table(self.name)
|
|
76
|
+
|
|
77
|
+
@retry(
|
|
78
|
+
stop=stop_after_attempt(3),
|
|
79
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
80
|
+
retry=retry_if_exception_type((Exception)),
|
|
81
|
+
reraise=True,
|
|
82
|
+
)
|
|
83
|
+
def query(self, query: str) -> List:
|
|
84
|
+
return list(self.table.query_entities(query))
|
|
85
|
+
|
|
86
|
+
@retry(
|
|
87
|
+
stop=stop_after_attempt(3),
|
|
88
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
89
|
+
retry=retry_if_exception_type((Exception)),
|
|
90
|
+
reraise=True,
|
|
91
|
+
)
|
|
92
|
+
def list_all(self) -> List:
|
|
93
|
+
return self.query("")
|
|
94
|
+
|
|
95
|
+
def __enter__(self):
|
|
96
|
+
return self
|
|
97
|
+
|
|
98
|
+
def __exit__(self, *args, **kwargs) -> None:
|
|
99
|
+
if self._table_client is not None:
|
|
100
|
+
self._table_client.close()
|
|
101
|
+
|
|
102
|
+
def submit(self, operations: List, retry: Optional[bool] = True):
|
|
103
|
+
try:
|
|
104
|
+
partitions = set()
|
|
105
|
+
for d in operations:
|
|
106
|
+
partitions.add(d[1]["PartitionKey"])
|
|
107
|
+
|
|
108
|
+
for p in partitions:
|
|
109
|
+
_operations = [d for d in operations if d[1].get("PartitionKey") == p]
|
|
110
|
+
t = 50
|
|
111
|
+
if len(_operations) < t:
|
|
112
|
+
self.table.submit_transaction(_operations)
|
|
113
|
+
else:
|
|
114
|
+
transactions = [_operations[i : i + t] for i in range(0, len(_operations), t)]
|
|
115
|
+
for transaction in transactions:
|
|
116
|
+
self.table.submit_transaction(transaction)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
if retry:
|
|
119
|
+
time.sleep(10)
|
|
120
|
+
self.submit(operations, retry=False)
|
|
121
|
+
else:
|
|
122
|
+
raise e
|
|
123
|
+
|
|
124
|
+
def delete(self, data: Union[List, DataFrame, dict]):
|
|
125
|
+
if isinstance(data, DataFrameLike):
|
|
126
|
+
data = [row.asDict() for row in data.collect()]
|
|
127
|
+
elif not isinstance(data, List):
|
|
128
|
+
data = [data]
|
|
129
|
+
|
|
130
|
+
operations = [("delete", d) for d in data]
|
|
131
|
+
self.submit(operations)
|
|
132
|
+
|
|
133
|
+
def upsert(self, data: Union[List, DataFrame, dict]):
|
|
134
|
+
if isinstance(data, DataFrameLike):
|
|
135
|
+
data = [row.asDict() for row in data.collect()]
|
|
136
|
+
elif not isinstance(data, List):
|
|
137
|
+
data = [data]
|
|
138
|
+
|
|
139
|
+
operations = [("upsert", d) for d in data]
|
|
140
|
+
self.submit(operations)
|
|
141
|
+
|
|
142
|
+
def truncate_partition(self, partition: str):
|
|
143
|
+
data = self.query(f"PartitionKey eq '{partition}'")
|
|
144
|
+
self.delete(data)
|
|
145
|
+
|
|
146
|
+
def truncate_all_partitions(self):
|
|
147
|
+
for p in self.list_all_partitions():
|
|
148
|
+
self.truncate_partition(p)
|
|
149
|
+
|
|
150
|
+
def list_all_partitions(self) -> List:
|
|
151
|
+
partitions = set()
|
|
152
|
+
for d in self.list_all():
|
|
153
|
+
partitions.add(d["PartitionKey"])
|
|
154
|
+
return sorted(list(partitions))
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class formatter:
|
|
5
|
+
END = "\33[0m"
|
|
6
|
+
BOLD = "\33[1m"
|
|
7
|
+
ITALIC = "\33[3m"
|
|
8
|
+
URL = "\33[4m"
|
|
9
|
+
BLINK = "\33[5m"
|
|
10
|
+
SELECTED = "\33[7m"
|
|
11
|
+
|
|
12
|
+
BLINK2 = "\33[6m"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class colors:
|
|
16
|
+
BLACK = "\33[30m"
|
|
17
|
+
RED = "\33[31m"
|
|
18
|
+
GREEN = "\33[32m"
|
|
19
|
+
YELLOW = "\33[33m"
|
|
20
|
+
BLUE = "\33[34m"
|
|
21
|
+
VIOLET = "\33[35m"
|
|
22
|
+
BEIGE = "\33[36m"
|
|
23
|
+
WHITE = "\33[37m"
|
|
24
|
+
GREY = "\33[90m"
|
|
25
|
+
ORANGE = "\33[33m"
|
|
26
|
+
|
|
27
|
+
RED2 = "\33[91m"
|
|
28
|
+
GREEN2 = "\33[92m"
|
|
29
|
+
YELLOW2 = "\33[93m"
|
|
30
|
+
BLUE2 = "\33[94m"
|
|
31
|
+
VIOLET2 = "\33[95m"
|
|
32
|
+
BEIGE2 = "\33[96m"
|
|
33
|
+
WHITE2 = "\33[97m"
|
|
34
|
+
|
|
35
|
+
RED3 = "\33[1;31m"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def progress_bar(progress: int = 0, width: int = 40, msg: Optional[str] = None):
|
|
39
|
+
if not isinstance(progress, int):
|
|
40
|
+
progress = int(progress)
|
|
41
|
+
|
|
42
|
+
left = width * progress // 100
|
|
43
|
+
right = width - left
|
|
44
|
+
|
|
45
|
+
tags = "#" * left
|
|
46
|
+
spaces = " " * right
|
|
47
|
+
pct = f" {progress}%"
|
|
48
|
+
if msg:
|
|
49
|
+
pct = f"{pct} ({msg})"
|
|
50
|
+
|
|
51
|
+
print("\r[", tags, spaces, "]", pct, sep="", end="", flush=True)
|
fabricks/utils/fdict.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from typing import Any, Callable, Dict, List, Optional, TypeVar, Union, overload
|
|
2
|
+
|
|
3
|
+
T = TypeVar("T")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FDict:
|
|
7
|
+
"""
|
|
8
|
+
A flexible dictionary wrapper that provides type-safe access to nested data structures
|
|
9
|
+
with convenient conversion methods.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, options: Union[Dict[str, Any], Any, None] = None):
|
|
13
|
+
"""
|
|
14
|
+
Initialize FDict with a dictionary of options.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
options: Input dictionary. If None, creates an empty dictionary.
|
|
18
|
+
"""
|
|
19
|
+
self.options = options if options is not None else {}
|
|
20
|
+
|
|
21
|
+
def __getitem__(self, key: str) -> Any:
|
|
22
|
+
"""Enable dictionary-like access with [] operator."""
|
|
23
|
+
return self.options[key]
|
|
24
|
+
|
|
25
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
26
|
+
"""Enable dictionary-like value setting with [] operator."""
|
|
27
|
+
self.options[key] = value
|
|
28
|
+
|
|
29
|
+
def __contains__(self, key: str) -> bool:
|
|
30
|
+
"""Enable 'in' operator for membership testing."""
|
|
31
|
+
return key in self.options
|
|
32
|
+
|
|
33
|
+
def __repr__(self) -> str:
|
|
34
|
+
"""Return string representation of the FDict."""
|
|
35
|
+
return f"FDict({self.options})"
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
38
|
+
"""Convert FDict to a regular dictionary."""
|
|
39
|
+
return self.options
|
|
40
|
+
|
|
41
|
+
@overload
|
|
42
|
+
def get(self, key: str) -> Optional[Any]: ...
|
|
43
|
+
|
|
44
|
+
@overload
|
|
45
|
+
def get(self, key: str, default: T) -> Union[Any, T]: ...
|
|
46
|
+
|
|
47
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
48
|
+
"""
|
|
49
|
+
Get a value from the dictionary with an optional default.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
key: The key to look up
|
|
53
|
+
default: Value to return if key is not found
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
The value associated with the key or the default value
|
|
57
|
+
"""
|
|
58
|
+
return self.options.get(key, default)
|
|
59
|
+
|
|
60
|
+
def get_list(self, key: str, default: Optional[List[Any]] = None) -> List[Any]:
|
|
61
|
+
"""
|
|
62
|
+
Get a value as a list, converting single items to a single-item list.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
key: The key to look up
|
|
66
|
+
default: Default value if key is not found
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
A list containing the value(s)
|
|
70
|
+
"""
|
|
71
|
+
values = self.options.get(key, default if default is not None else [])
|
|
72
|
+
if values is None:
|
|
73
|
+
return []
|
|
74
|
+
|
|
75
|
+
return [values] if not isinstance(values, list) else values
|
|
76
|
+
|
|
77
|
+
def get_boolean(self, key: str, default: Optional[bool] = None) -> Optional[bool]:
|
|
78
|
+
"""
|
|
79
|
+
Get a value as a boolean, with string conversion support.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
key: The key to look up
|
|
83
|
+
default: Default value if key is not found
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Boolean value of the key, or default if key not found
|
|
87
|
+
"""
|
|
88
|
+
value = self.options.get(key)
|
|
89
|
+
|
|
90
|
+
if value is None:
|
|
91
|
+
return default
|
|
92
|
+
if isinstance(value, bool):
|
|
93
|
+
return value
|
|
94
|
+
if isinstance(value, str):
|
|
95
|
+
return value.lower() in ("true", "1", "yes", "on")
|
|
96
|
+
|
|
97
|
+
return bool(value)
|
|
98
|
+
|
|
99
|
+
def get_dict(self, key: str, default: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
100
|
+
"""
|
|
101
|
+
Get a nested dictionary, with a default empty dict if not found.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
key: The key to look up
|
|
105
|
+
default: Default value if key is not found
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Dictionary value of the key, or default if key not found
|
|
109
|
+
"""
|
|
110
|
+
return self.options.get(key, default if default is not None else {})
|
|
111
|
+
|
|
112
|
+
def get_nested(self, *keys: str, default: Any = None) -> Any:
|
|
113
|
+
"""
|
|
114
|
+
Access nested dictionary values using a sequence of keys.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
*keys: Sequence of keys to traverse
|
|
118
|
+
default: Default value if path not found
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Value at the nested path, or default if path not found
|
|
122
|
+
"""
|
|
123
|
+
current = self.options
|
|
124
|
+
for key in keys:
|
|
125
|
+
if not isinstance(current, dict):
|
|
126
|
+
return default
|
|
127
|
+
if key not in current:
|
|
128
|
+
return default
|
|
129
|
+
current = current[key]
|
|
130
|
+
|
|
131
|
+
return current
|
|
132
|
+
|
|
133
|
+
def set_nested(self, *keys: str, value: Any) -> None:
|
|
134
|
+
"""
|
|
135
|
+
Set a value in a nested dictionary path, creating intermediate dictionaries as needed.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
*keys: Sequence of keys defining the path
|
|
139
|
+
value: Value to set at the path
|
|
140
|
+
"""
|
|
141
|
+
current = self.options
|
|
142
|
+
for key in keys[:-1]:
|
|
143
|
+
current = current.setdefault(key, {})
|
|
144
|
+
|
|
145
|
+
current[keys[-1]] = value
|
|
146
|
+
|
|
147
|
+
def filter(self, predicate: Callable[[str, Any], bool]) -> "FDict":
|
|
148
|
+
"""
|
|
149
|
+
Create a new FDict with key-value pairs that satisfy the predicate function.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
predicate: Lambda function that takes key and value as arguments and returns bool
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
New FDict containing only the filtered key-value pairs
|
|
156
|
+
|
|
157
|
+
Example:
|
|
158
|
+
# Get all items with numeric values greater than 10
|
|
159
|
+
filtered = fdict.filter(lambda k, v: isinstance(v, (int, float)) and v > 10)
|
|
160
|
+
"""
|
|
161
|
+
filtered_dict = {k: v for k, v in self.options.items() if predicate(k, v)}
|
|
162
|
+
return FDict(filtered_dict)
|
|
163
|
+
|
|
164
|
+
def filter_keys(self, predicate: Callable[[str], bool]) -> "FDict":
|
|
165
|
+
"""
|
|
166
|
+
Create a new FDict with keys that satisfy the predicate function.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
predicate: Lambda function that takes key as argument and returns bool
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
New FDict containing only the filtered keys
|
|
173
|
+
|
|
174
|
+
Example:
|
|
175
|
+
# Get all items with keys starting with 'user_'
|
|
176
|
+
filtered = fdict.filter_keys(lambda k: k.startswith('user_'))
|
|
177
|
+
"""
|
|
178
|
+
return self.filter(lambda k, _: predicate(k))
|
|
179
|
+
|
|
180
|
+
def filter_values(self, predicate: Callable[[Any], bool]) -> "FDict":
|
|
181
|
+
"""
|
|
182
|
+
Create a new FDict with values that satisfy the predicate function.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
predicate: Lambda function that takes value as argument and returns bool
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
New FDict containing only the filtered values
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
# Get all items with string values
|
|
192
|
+
filtered = fdict.filter_values(lambda v: isinstance(v, str))
|
|
193
|
+
"""
|
|
194
|
+
return self.filter(lambda _, v: predicate(v))
|
|
195
|
+
|
|
196
|
+
def map_values(self, transform: Callable[[Any], Any]) -> "FDict":
|
|
197
|
+
"""
|
|
198
|
+
Create a new FDict with transformed values using the provided function.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
transform: Lambda function that takes a value and returns transformed value
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
New FDict containing transformed values
|
|
205
|
+
|
|
206
|
+
Example:
|
|
207
|
+
# Convert all string values to uppercase
|
|
208
|
+
transformed = fdict.map_values(lambda v: v.upper() if isinstance(v, str) else v)
|
|
209
|
+
"""
|
|
210
|
+
transformed_dict = {k: transform(v) for k, v in self.options.items()}
|
|
211
|
+
return FDict(transformed_dict)
|
|
212
|
+
|
|
213
|
+
def deep_filter(self, predicate: Callable[[str, Any], bool]) -> "FDict":
|
|
214
|
+
"""
|
|
215
|
+
Recursively filter nested dictionaries using the predicate function.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
predicate: Lambda function that takes key and value as arguments and returns bool
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
New FDict with filtered nested structure
|
|
222
|
+
|
|
223
|
+
Example:
|
|
224
|
+
# Filter all nested numeric values greater than 10
|
|
225
|
+
filtered = fdict.deep_filter(lambda k, v:
|
|
226
|
+
not isinstance(v, dict) and isinstance(v, (int, float)) and v > 10)
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
def filter_recursive(d: Dict[str, Any]) -> Dict[str, Any]:
|
|
230
|
+
result = {}
|
|
231
|
+
for k, v in d.items():
|
|
232
|
+
if isinstance(v, dict):
|
|
233
|
+
filtered_nested = filter_recursive(v)
|
|
234
|
+
if filtered_nested: # Only include non-empty nested dicts
|
|
235
|
+
result[k] = filtered_nested
|
|
236
|
+
elif predicate(k, v):
|
|
237
|
+
result[k] = v
|
|
238
|
+
return result
|
|
239
|
+
|
|
240
|
+
return FDict(filter_recursive(self.options))
|