ddeutil-workflow 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __version__: str = "0.0.1"
File without changes
@@ -0,0 +1,44 @@
1
+ # -------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # --------------------------------------------------------------------------
6
+ import re
7
+ from re import (
8
+ IGNORECASE,
9
+ MULTILINE,
10
+ UNICODE,
11
+ VERBOSE,
12
+ Pattern,
13
+ )
14
+
15
+
16
+ class RegexConf:
17
+ """Regular expression config."""
18
+
19
+ # NOTE: Search caller
20
+ __re_caller: str = r"""
21
+ \$
22
+ {{
23
+ \s*(?P<caller>
24
+ [a-zA-Z0-9_.\s'\"\[\]\(\)\-\{}]+?
25
+ )\s*
26
+ }}
27
+ """
28
+ RE_CALLER: Pattern = re.compile(
29
+ __re_caller, MULTILINE | IGNORECASE | UNICODE | VERBOSE
30
+ )
31
+
32
+ # NOTE: Search task
33
+ __re_task_fmt: str = r"""
34
+ ^
35
+ (?P<path>[^/@]+)
36
+ /
37
+ (?P<func>[^@]+)
38
+ @
39
+ (?P<tag>.+)
40
+ $
41
+ """
42
+ RE_TASK_FMT: Pattern = re.compile(
43
+ __re_task_fmt, MULTILINE | IGNORECASE | UNICODE | VERBOSE
44
+ )
@@ -0,0 +1,11 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+ TupleStr = tuple[str, ...]
11
+ DictData = dict[str, Any]
@@ -0,0 +1,235 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from collections.abc import Iterator
10
+ from pathlib import Path
11
+ from typing import Annotated, Any, Literal, Optional, TypeVar
12
+
13
+ from ddeutil.model.conn import Conn as ConnModel
14
+ from pydantic import BaseModel, ConfigDict, Field
15
+ from pydantic.functional_validators import field_validator
16
+ from pydantic.types import SecretStr
17
+ from typing_extensions import Self
18
+
19
+ from .__types import DictData, TupleStr
20
+ from .loader import Loader
21
+
22
+ EXCLUDED_EXTRAS: TupleStr = (
23
+ "type",
24
+ "url",
25
+ )
26
+
27
+
28
+ class BaseConn(BaseModel):
29
+ """Base Conn (Connection) Model"""
30
+
31
+ model_config = ConfigDict(arbitrary_types_allowed=True)
32
+
33
+ # NOTE: This is fields
34
+ dialect: str
35
+ host: Optional[str] = None
36
+ port: Optional[int] = None
37
+ user: Optional[str] = None
38
+ pwd: Optional[SecretStr] = None
39
+ endpoint: str
40
+ extras: Annotated[
41
+ DictData,
42
+ Field(default_factory=dict, description="Extras mapping of parameters"),
43
+ ]
44
+
45
+ @classmethod
46
+ def from_loader(
47
+ cls,
48
+ name: str,
49
+ externals: DictData,
50
+ ) -> Self:
51
+ """Construct Connection with Loader object with specific config name.
52
+
53
+ :param name:
54
+ :param externals:
55
+ """
56
+ loader: Loader = Loader(name, externals=externals)
57
+ # NOTE: Validate the config type match with current connection model
58
+ if loader.type != cls:
59
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
60
+ filter_data: DictData = {
61
+ k: loader.data.pop(k)
62
+ for k in loader.data.copy()
63
+ if k not in cls.model_fields and k not in EXCLUDED_EXTRAS
64
+ }
65
+ if "url" in loader.data:
66
+ url: ConnModel = ConnModel.from_url(loader.data.pop("url"))
67
+ return cls(
68
+ dialect=url.dialect,
69
+ host=url.host,
70
+ port=url.port,
71
+ user=url.user,
72
+ pwd=url.pwd,
73
+ # NOTE:
74
+ # I will replace None endpoint with memory value for SQLite
75
+ # connection string.
76
+ endpoint=cls.__prepare_slash_from_url(url.endpoint or "memory"),
77
+ # NOTE: This order will show that externals this the top level.
78
+ extras=(url.options | filter_data | externals),
79
+ )
80
+ return cls.model_validate(
81
+ obj={
82
+ "extras": (
83
+ loader.data.pop("extras", {}) | filter_data | externals
84
+ ),
85
+ **loader.data,
86
+ }
87
+ )
88
+
89
+ @classmethod
90
+ def __prepare_slash_from_url(cls, value: str) -> str:
91
+ if value.startswith("/"):
92
+ return value[1:]
93
+ return value
94
+
95
+ @field_validator("endpoint")
96
+ def __prepare_slash(cls, value: str) -> str:
97
+ if value.startswith("//"):
98
+ return value[1:]
99
+ return value
100
+
101
+
102
+ class Conn(BaseConn):
103
+ """Conn (Connection) Model that implement any necessary methods. This object
104
+ should be the base for abstraction to any connection model object.
105
+ """
106
+
107
+ def get_spec(self) -> str:
108
+ """Return full connection url that construct from all fields."""
109
+ return (
110
+ f"{self.dialect}://{self.user or ''}"
111
+ f"{f':{self.pwd}' if self.pwd else ''}"
112
+ f"{self.host or ''}{f':{self.port}' if self.port else ''}"
113
+ f"/{self.endpoint}"
114
+ )
115
+
116
+ def ping(self) -> bool:
117
+ """Ping the connection that able to use with this field value."""
118
+ raise NotImplementedError("Ping does not implement")
119
+
120
+ def glob(self, pattern: str) -> Iterator[Any]:
121
+ """Return a list of object from the endpoint of this connection."""
122
+ raise NotImplementedError("Glob does not implement")
123
+
124
+ def find_object(self, _object: str):
125
+ raise NotImplementedError("Glob does not implement")
126
+
127
+
128
+ class FlSys(Conn):
129
+ """File System Connection."""
130
+
131
+ dialect: Literal["local"] = "local"
132
+
133
+ def ping(self) -> bool:
134
+ return Path(self.endpoint).exists()
135
+
136
+ def glob(self, pattern: str) -> Iterator[Path]:
137
+ yield from Path(self.endpoint).rglob(pattern=pattern)
138
+
139
+ def find_object(self, _object: str) -> bool:
140
+ return (Path(self.endpoint) / _object).exists()
141
+
142
+
143
+ class SFTP(Conn):
144
+ """SFTP Server Connection."""
145
+
146
+ dialect: Literal["sftp"] = "sftp"
147
+
148
+ def __client(self):
149
+ from .vendors.sftp_wrapped import WrapSFTP
150
+
151
+ return WrapSFTP(
152
+ host=self.host,
153
+ port=self.port,
154
+ user=self.user,
155
+ pwd=self.pwd.get_secret_value(),
156
+ )
157
+
158
+ def ping(self) -> bool:
159
+ with self.__client().simple_client():
160
+ return True
161
+
162
+ def glob(self, pattern: str) -> Iterator[str]:
163
+ yield from self.__client().walk(pattern=pattern)
164
+
165
+
166
+ class Db(Conn):
167
+ """RDBMS System Connection"""
168
+
169
+ def ping(self) -> bool:
170
+ from sqlalchemy import create_engine
171
+ from sqlalchemy.engine import URL, Engine
172
+ from sqlalchemy.exc import OperationalError
173
+
174
+ engine: Engine = create_engine(
175
+ url=URL.create(
176
+ self.dialect,
177
+ username=self.user,
178
+ password=self.pwd.get_secret_value() if self.pwd else None,
179
+ host=self.host,
180
+ port=self.port,
181
+ database=self.endpoint,
182
+ query={},
183
+ ),
184
+ execution_options={},
185
+ )
186
+ try:
187
+ return engine.connect()
188
+ except OperationalError as err:
189
+ logging.warning(str(err))
190
+ return False
191
+
192
+
193
+ class SQLite(Db):
194
+ dialect: Literal["sqlite"]
195
+
196
+
197
+ class ODBC(Conn): ...
198
+
199
+
200
+ class Doc(Conn):
201
+ """No SQL System Connection"""
202
+
203
+
204
+ class Mongo(Doc): ...
205
+
206
+
207
+ class SSHCred(BaseModel):
208
+ ssh_host: str
209
+ ssh_user: str
210
+ ssh_password: Optional[SecretStr] = Field(default=None)
211
+ ssh_private_key: Optional[str] = Field(default=None)
212
+ ssh_private_key_pwd: Optional[SecretStr] = Field(default=None)
213
+ ssh_port: int = Field(default=22)
214
+
215
+
216
+ class S3Cred(BaseModel):
217
+ aws_access_key: str
218
+ aws_secret_access_key: SecretStr
219
+ region: str = Field(default="ap-southeast-1")
220
+ role_arn: Optional[str] = Field(default=None)
221
+ role_name: Optional[str] = Field(default=None)
222
+ mfa_serial: Optional[str] = Field(default=None)
223
+
224
+
225
+ class AZServPrinCred(BaseModel):
226
+ tenant: str
227
+ client_id: str
228
+ secret_id: SecretStr
229
+
230
+
231
+ class GoogleCred(BaseModel):
232
+ google_json_path: str
233
+
234
+
235
+ SubclassConn = TypeVar("SubclassConn", bound=Conn)
@@ -0,0 +1,306 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ from datetime import datetime
9
+ from typing import Annotated, Any, Optional
10
+
11
+ from fmtutil import Datetime, FormatterGroupType, make_group
12
+ from fmtutil.utils import escape_fmt_group
13
+ from pydantic import BaseModel, Field
14
+ from typing_extensions import Self
15
+
16
+ try:
17
+ import polars as pl
18
+ except ImportError:
19
+ raise ImportError(
20
+ "Please install polars package\n\t\t$ pip install polars"
21
+ ) from None
22
+
23
+ from .__types import DictData, TupleStr
24
+ from .conn import SubclassConn
25
+ from .loader import Loader
26
+
27
+ EXCLUDED_EXTRAS: TupleStr = ("type",)
28
+ OBJ_FMTS: FormatterGroupType = make_group(
29
+ {
30
+ "datetime": Datetime,
31
+ }
32
+ )
33
+
34
+
35
+ class BaseDataset(BaseModel):
36
+ """Base Dataset Model. This model implement only loading constructor."""
37
+
38
+ conn: Annotated[SubclassConn, Field(description="Connection Model")]
39
+ endpoint: Annotated[
40
+ Optional[str],
41
+ Field(description="Endpoint of connection"),
42
+ ] = None
43
+ object: str
44
+ features: list = Field(default_factory=list)
45
+ extras: dict[str, Any] = Field(default_factory=dict)
46
+
47
+ @classmethod
48
+ def from_loader(
49
+ cls,
50
+ name: str,
51
+ externals: DictData,
52
+ ) -> Self:
53
+ """Construct Connection with Loader object with specific config name.
54
+
55
+ :param name: A name of dataset that want to load from config file.
56
+ :param externals: An external parameters.
57
+ """
58
+ loader: Loader = Loader(name, externals=externals)
59
+
60
+ # NOTE: Validate the config type match with current dataset model
61
+ if loader.type != cls:
62
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
63
+
64
+ filter_data: DictData = {
65
+ k: loader.data.pop(k)
66
+ for k in loader.data.copy()
67
+ if k not in cls.model_fields and k not in EXCLUDED_EXTRAS
68
+ }
69
+
70
+ if "conn" not in loader.data:
71
+ raise ValueError("Dataset config does not set ``conn`` value")
72
+
73
+ # NOTE: Start loading connection config
74
+ conn_name: str = loader.data.pop("conn")
75
+ conn_loader: Loader = Loader(conn_name, externals=externals)
76
+ conn_model: SubclassConn = conn_loader.type.from_loader(
77
+ name=conn_name, externals=externals
78
+ )
79
+
80
+ # NOTE: Override ``endpoint`` value to getter connection data.
81
+ if "endpoint" in loader.data:
82
+ # NOTE: Update endpoint path without Pydantic validator.
83
+ conn_model.__dict__["endpoint"] = loader.data["endpoint"]
84
+ else:
85
+ loader.data.update({"endpoint": conn_model.endpoint})
86
+ return cls.model_validate(
87
+ obj={
88
+ "extras": (
89
+ loader.data.pop("extras", {}) | filter_data | externals
90
+ ),
91
+ "conn": conn_model,
92
+ **loader.data,
93
+ }
94
+ )
95
+
96
+
97
+ class Dataset(BaseDataset):
98
+
99
+ def exists(self) -> bool:
100
+ raise NotImplementedError("Object exists does not implement")
101
+
102
+ def format_object(
103
+ self,
104
+ _object: str | None = None,
105
+ dt: str | datetime | None = None,
106
+ ) -> str:
107
+ """Format the object value that implement datetime"""
108
+ if dt is None:
109
+ dt = datetime.now()
110
+ dt: datetime = (
111
+ dt if isinstance(dt, datetime) else datetime.fromisoformat(dt)
112
+ )
113
+ return (
114
+ OBJ_FMTS({"datetime": dt})
115
+ .format(escape_fmt_group(_object or self.object))
116
+ .replace("\\", "")
117
+ )
118
+
119
+
120
+ class FlDataset(Dataset):
121
+
122
+ def exists(self) -> bool:
123
+ return self.conn.find_object(self.object)
124
+
125
+
126
+ class TblDataset(Dataset):
127
+
128
+ def exists(self) -> bool:
129
+ return self.conn.find_object(self.object)
130
+
131
+
132
+ class FlDataFrame(Dataset):
133
+
134
+ def exists(self) -> bool:
135
+ return self.conn.find_object(self.object)
136
+
137
+
138
+ class TblDataFrame(Dataset): ...
139
+
140
+
141
+ class PandasCSV: ...
142
+
143
+
144
+ class PandasJson: ...
145
+
146
+
147
+ class PandasParq: ...
148
+
149
+
150
+ class PandasDb: ...
151
+
152
+
153
+ class PandasExcel: ...
154
+
155
+
156
+ class PolarsCsvArgs(BaseModel):
157
+ """CSV file should use format rfc4180 as CSV standard format.
158
+
159
+ docs: [RFC4180](https://datatracker.ietf.org/doc/html/rfc4180)
160
+ """
161
+
162
+ header: bool = True
163
+ separator: str = ","
164
+ skip_rows: int = 0
165
+ encoding: str = "utf-8"
166
+
167
+
168
+ class PolarsCsv(FlDataFrame):
169
+ extras: PolarsCsvArgs
170
+
171
+ def load_options(self) -> dict[str, Any]:
172
+ return {
173
+ "has_header": self.extras.header,
174
+ "separator": self.extras.separator,
175
+ "skip_rows": self.extras.skip_rows,
176
+ "encoding": self.extras.encoding,
177
+ }
178
+
179
+ def load(
180
+ self,
181
+ _object: str | None = None,
182
+ options: dict[str, Any] | None = None,
183
+ *,
184
+ override: bool = False,
185
+ ) -> pl.DataFrame:
186
+ """Load CSV file to Polars DataFrame with ``read_csv`` method."""
187
+ return pl.read_csv(
188
+ f"{self.conn.get_spec()}/{_object or self.object}",
189
+ **(
190
+ (options or {})
191
+ if override
192
+ else (self.load_options() | (options or {}))
193
+ ),
194
+ )
195
+
196
+ def scan(
197
+ self,
198
+ _object: str | None = None,
199
+ options: dict[str, Any] | None = None,
200
+ ) -> pl.LazyFrame:
201
+ """Load CSV file to Polars LazyFrame with ``scan_csv`` method."""
202
+ # FIXME: Save Csv does not support for the fsspec file url.
203
+ return pl.scan_csv(
204
+ f"{self.conn.endpoint}/{_object or self.object}",
205
+ **(self.load_options() | (options or {})),
206
+ )
207
+
208
+ def save_options(self) -> dict[str, Any]:
209
+ return {
210
+ "include_header": self.extras.header,
211
+ "separator": self.extras.separator,
212
+ }
213
+
214
+ def save(
215
+ self,
216
+ df: pl.DataFrame,
217
+ _object: str | None = None,
218
+ options: dict[str, Any] | None = None,
219
+ ) -> None:
220
+ """Save Polars Dataframe to CSV file with ``write_csv`` method."""
221
+ # FIXME: Save Csv does not support for the fsspec file url.
222
+ return df.write_csv(
223
+ f"{self.conn.endpoint}/{_object or self.object}",
224
+ **(self.save_options() | (options or {})),
225
+ )
226
+
227
+ def sink(
228
+ self,
229
+ df: pl.LazyFrame,
230
+ _object: str | None = None,
231
+ options: dict[str, Any] | None = None,
232
+ ) -> None:
233
+ """Save Polars Dataframe to CSV file with ``sink_csv`` method."""
234
+ # FIXME: Save Csv does not support for the fsspec file url.
235
+ return df.sink_csv(
236
+ f"{self.conn.endpoint}/{_object or self.object}",
237
+ **(self.save_options() | (options or {})),
238
+ )
239
+
240
+
241
+ class PolarsJson(FlDataFrame):
242
+
243
+ def load(
244
+ self,
245
+ _object: str | None = None,
246
+ options: dict[str, Any] | None = None,
247
+ *,
248
+ dt: str | datetime | None = None,
249
+ ):
250
+ """Load Json file to Polars Dataframe with ``read_json`` method."""
251
+ # FIXME: Load Json does not support for the fsspec file url.
252
+ return pl.read_json(
253
+ f"{self.conn.endpoint}/"
254
+ f"{self.format_object(_object or self.object, dt=dt)}",
255
+ **(options or {}),
256
+ )
257
+
258
+ def save(
259
+ self,
260
+ df: pl.DataFrame,
261
+ _object: str | None = None,
262
+ options: dict[str, Any] | None = None,
263
+ ): ...
264
+
265
+
266
+ class PolarsNdJson(FlDataFrame): ...
267
+
268
+
269
+ class PolarsParqArgs(BaseModel):
270
+ compression: Optional[str] = None
271
+ use_pyarrow: bool = False
272
+ pyarrow_options: dict[str, Any] = Field(default_factory=dict)
273
+
274
+
275
+ class PolarsParq(FlDataFrame):
276
+ extras: PolarsParqArgs
277
+
278
+ def save_options(self):
279
+ excluded: list[str] = []
280
+ if not self.extras.pyarrow_options:
281
+ excluded.append("pyarrow_options")
282
+ return self.extras.model_dump(exclude=excluded)
283
+
284
+ def save(
285
+ self,
286
+ df: pl.DataFrame,
287
+ _object: str | None = None,
288
+ options: dict[str, Any] | None = None,
289
+ ):
290
+ print(
291
+ f"Start write parquet to "
292
+ f"{self.conn.endpoint}/{_object or self.object}"
293
+ )
294
+ return df.write_parquet(
295
+ f"{self.conn.endpoint}/{_object or self.object}",
296
+ **(self.save_options() | (options or {})),
297
+ )
298
+
299
+
300
+ class PostgresTbl(TblDataset): ...
301
+
302
+
303
+ class SqliteTbl(TblDataset): ...
304
+
305
+
306
+ class PolarsPostgres(TblDataFrame): ...
@@ -0,0 +1,82 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ """
7
+ Define Errors Object for Node package
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from typing import Union
12
+
13
+
14
+ class BaseError(Exception):
15
+ """Base Error Object that use for catch any errors statement of
16
+ all step in this src
17
+ """
18
+
19
+
20
+ class WorkflowBaseError(BaseError):
21
+ """Core Base Error object"""
22
+
23
+
24
+ class ConfigNotFound(WorkflowBaseError):
25
+ """Error raise for a method not found the config file or data."""
26
+
27
+
28
+ class ConfigArgumentError(WorkflowBaseError):
29
+ """Error raise for a wrong configuration argument."""
30
+
31
+ def __init__(self, argument: Union[str, tuple], message: str):
32
+ """Main Initialization that merge the argument and message input values
33
+ with specific content message together like
34
+
35
+ `__class__` with `argument`, `message`
36
+
37
+ :param argument: Union[str, tuple]
38
+ :param message: str
39
+ """
40
+ if isinstance(argument, tuple):
41
+ _last_arg: str = str(argument[-1])
42
+ _argument: str = (
43
+ (
44
+ ", ".join([f"{_!r}" for _ in argument[:-1]])
45
+ + f", and {_last_arg!r}"
46
+ )
47
+ if len(argument) > 1
48
+ else f"{_last_arg!r}"
49
+ )
50
+ else:
51
+ _argument: str = f"{argument!r}"
52
+ _message: str = f"with {_argument}, {message}"
53
+ super().__init__(_message)
54
+
55
+
56
+ class ConnArgumentError(ConfigArgumentError):
57
+ """Error raise for wrong connection argument when loading or parsing"""
58
+
59
+
60
+ class DsArgumentError(ConfigArgumentError):
61
+ """Error raise for wrong catalog argument when loading or parsing"""
62
+
63
+
64
+ class NodeArgumentError(ConfigArgumentError):
65
+ """Error raise for wrong node argument when loading or parsing"""
66
+
67
+
68
+ class ScdlArgumentError(ConfigArgumentError):
69
+ """Error raise for wrong schedule argument when loading or parsing"""
70
+
71
+
72
+ class PipeArgumentError(ConfigArgumentError):
73
+ """Error raise for wrong pipeline argument when loading or parsing"""
74
+
75
+
76
+ class PyException(Exception): ...
77
+
78
+
79
+ class ShellException(Exception): ...
80
+
81
+
82
+ class TaskException(Exception): ...
@@ -0,0 +1,9 @@
1
+ from typing import Any
2
+
3
+ from ddeutil.core import lazy
4
+
5
+ registries: dict[str, Any] = {
6
+ "postgres-proc": {
7
+ "pysycopg": lazy("ddeutil.workflow.tasks._postgres.postgres_procedure"),
8
+ },
9
+ }
@@ -0,0 +1,2 @@
1
+ def postgres_procedure():
2
+ return