ddeutil-workflow 0.0.2__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/PKG-INFO +19 -15
  2. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/README.md +18 -14
  3. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/pyproject.toml +4 -2
  4. ddeutil_workflow-0.0.3/src/ddeutil/workflow/__about__.py +1 -0
  5. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/__types.py +1 -0
  6. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/conn.py +13 -10
  7. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/exceptions.py +0 -20
  8. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/loader.py +39 -11
  9. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/pipeline.py +183 -147
  10. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/schedule.py +7 -7
  11. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/tasks/_pandas.py +1 -1
  12. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/tasks/_polars.py +10 -2
  13. ddeutil_workflow-0.0.3/src/ddeutil/workflow/utils.py +180 -0
  14. ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/__dataset.py +127 -0
  15. ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/az.py +0 -0
  16. ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/pd.py +13 -0
  17. ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/pg.py +11 -0
  18. ddeutil_workflow-0.0.2/src/ddeutil/workflow/dataset.py → ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/pl.py +3 -133
  19. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/PKG-INFO +19 -15
  20. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/SOURCES.txt +9 -5
  21. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_conn.py +8 -9
  22. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_dataset.py +7 -5
  23. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_pipeline.py +1 -1
  24. ddeutil_workflow-0.0.3/tests/test_pipeline_matrix.py +29 -0
  25. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_pipeline_run.py +5 -3
  26. ddeutil_workflow-0.0.3/tests/test_pipeline_task.py +80 -0
  27. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_schedule.py +2 -2
  28. ddeutil_workflow-0.0.2/src/ddeutil/workflow/__about__.py +0 -1
  29. ddeutil_workflow-0.0.2/src/ddeutil/workflow/utils.py +0 -65
  30. ddeutil_workflow-0.0.2/tests/test_loader_simple.py +0 -84
  31. ddeutil_workflow-0.0.2/tests/test_pipeline_task.py +0 -21
  32. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/LICENSE +0 -0
  33. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/setup.cfg +0 -0
  34. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/__init__.py +0 -0
  35. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/__regex.py +0 -0
  36. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/tasks/__init__.py +0 -0
  37. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/vendors/__dict.py +0 -0
  38. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/vendors/__init__.py +0 -0
  39. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/vendors/__schedule.py +0 -0
  40. /ddeutil_workflow-0.0.2/src/ddeutil/workflow/vendors/aws_warpped.py → /ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/aws.py +0 -0
  41. /ddeutil_workflow-0.0.2/src/ddeutil/workflow/vendors/minio_warpped.py → /ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/minio.py +0 -0
  42. /ddeutil_workflow-0.0.2/src/ddeutil/workflow/vendors/sftp_wrapped.py → /ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/sftp.py +0 -0
  43. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
  44. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/requires.txt +0 -0
  45. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
  46. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_base_data.py +0 -0
  47. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_base_local_and_global.py +0 -0
  48. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_base_regex.py +0 -0
  49. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_loader.py +0 -0
  50. {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_pipeline_params.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddeutil-workflow
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: Data Developer & Engineer Workflow Utility Objects
5
5
  Author-email: ddeutils <korawich.anu@gmail.com>
6
6
  License: MIT
@@ -83,7 +83,13 @@ Out of you data that want to use in pipeline of workflow. Some of this component
83
83
  is similar component of the **Airflow** because I like it concepts.
84
84
 
85
85
  The main feature of this project is the `Pipeline` object that can call any
86
- registried function.
86
+ registries function. The pipeline can handle everything that you want to do, it
87
+ will passing parameters and catching the output for re-use it to next step.
88
+
89
+ > [!IMPORTANT]
90
+ > In the future of this project, I will drop the connection and dataset to
91
+ > dynamic registries instead of main features because it have a lot of maintain
92
+ > vendor codes and deps. (I do not have time to handle this features)
87
93
 
88
94
  ### Connection
89
95
 
@@ -104,7 +110,9 @@ assert conn.ping()
104
110
 
105
111
  ### Dataset
106
112
 
107
- The dataset is define any objects on the connection.
113
+ The dataset is define any objects on the connection. This feature was implemented
114
+ on `/vendors` because it has a lot of tools that can interact with any data systems
115
+ in the data tool stacks.
108
116
 
109
117
  ```yaml
110
118
  ds_postgres_customer_tbl:
@@ -116,7 +124,7 @@ ds_postgres_customer_tbl:
116
124
  ```
117
125
 
118
126
  ```python
119
- from ddeutil.workflow.dataset import PostgresTbl
127
+ from ddeutil.workflow.vendors.pg import PostgresTbl
120
128
 
121
129
  dataset = PostgresTbl.from_loader(name='ds_postgres_customer_tbl', externals={})
122
130
  assert dataset.exists()
@@ -126,14 +134,14 @@ assert dataset.exists()
126
134
 
127
135
  ```yaml
128
136
  schd_for_node:
129
- type: schedule.Scdl
137
+ type: schedule.Schedule
130
138
  cron: "*/5 * * * *"
131
139
  ```
132
140
 
133
141
  ```python
134
- from ddeutil.workflow.schedule import Scdl
142
+ from ddeutil.workflow.schedule import Schedule
135
143
 
136
- scdl = Scdl.from_loader(name='schd_for_node', externals={})
144
+ scdl = Schedule.from_loader(name='schd_for_node', externals={})
137
145
  assert '*/5 * * * *' == str(scdl.cronjob)
138
146
 
139
147
  cron_iterate = scdl.generate('2022-01-01 00:00:00')
@@ -234,14 +242,10 @@ pipe_el_pg_to_lake:
234
242
  pipe_hook_mssql_proc:
235
243
  type: ddeutil.workflow.pipe.Pipeline
236
244
  params:
237
- run_date:
238
- type: datetime
239
- sp_name:
240
- type: str
241
- source_name:
242
- type: str
243
- target_name:
244
- type: str
245
+ run_date: datetime
246
+ sp_name: str
247
+ source_name: str
248
+ target_name: str
245
249
  jobs:
246
250
  transform:
247
251
  stages:
@@ -48,7 +48,13 @@ Out of you data that want to use in pipeline of workflow. Some of this component
48
48
  is similar component of the **Airflow** because I like it concepts.
49
49
 
50
50
  The main feature of this project is the `Pipeline` object that can call any
51
- registried function.
51
+ registries function. The pipeline can handle everything that you want to do, it
52
+ will passing parameters and catching the output for re-use it to next step.
53
+
54
+ > [!IMPORTANT]
55
+ > In the future of this project, I will drop the connection and dataset to
56
+ > dynamic registries instead of main features because it have a lot of maintain
57
+ > vendor codes and deps. (I do not have time to handle this features)
52
58
 
53
59
  ### Connection
54
60
 
@@ -69,7 +75,9 @@ assert conn.ping()
69
75
 
70
76
  ### Dataset
71
77
 
72
- The dataset is define any objects on the connection.
78
+ The dataset is define any objects on the connection. This feature was implemented
79
+ on `/vendors` because it has a lot of tools that can interact with any data systems
80
+ in the data tool stacks.
73
81
 
74
82
  ```yaml
75
83
  ds_postgres_customer_tbl:
@@ -81,7 +89,7 @@ ds_postgres_customer_tbl:
81
89
  ```
82
90
 
83
91
  ```python
84
- from ddeutil.workflow.dataset import PostgresTbl
92
+ from ddeutil.workflow.vendors.pg import PostgresTbl
85
93
 
86
94
  dataset = PostgresTbl.from_loader(name='ds_postgres_customer_tbl', externals={})
87
95
  assert dataset.exists()
@@ -91,14 +99,14 @@ assert dataset.exists()
91
99
 
92
100
  ```yaml
93
101
  schd_for_node:
94
- type: schedule.Scdl
102
+ type: schedule.Schedule
95
103
  cron: "*/5 * * * *"
96
104
  ```
97
105
 
98
106
  ```python
99
- from ddeutil.workflow.schedule import Scdl
107
+ from ddeutil.workflow.schedule import Schedule
100
108
 
101
- scdl = Scdl.from_loader(name='schd_for_node', externals={})
109
+ scdl = Schedule.from_loader(name='schd_for_node', externals={})
102
110
  assert '*/5 * * * *' == str(scdl.cronjob)
103
111
 
104
112
  cron_iterate = scdl.generate('2022-01-01 00:00:00')
@@ -199,14 +207,10 @@ pipe_el_pg_to_lake:
199
207
  pipe_hook_mssql_proc:
200
208
  type: ddeutil.workflow.pipe.Pipeline
201
209
  params:
202
- run_date:
203
- type: datetime
204
- sp_name:
205
- type: str
206
- source_name:
207
- type: str
208
- target_name:
209
- type: str
210
+ run_date: datetime
211
+ sp_name: str
212
+ source_name: str
213
+ target_name: str
210
214
  jobs:
211
215
  transform:
212
216
  stages:
@@ -79,6 +79,10 @@ addopts = [
79
79
  "--strict-markers",
80
80
  ]
81
81
  filterwarnings = ["error"]
82
+ log_cli = true
83
+ log_cli_level = "INFO"
84
+ log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)"
85
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
82
86
 
83
87
  [tool.black]
84
88
  line-length = 80
@@ -97,7 +101,6 @@ exclude = """
97
101
  | build
98
102
  | dist
99
103
  | venv
100
- | __legacy
101
104
  )/
102
105
  )
103
106
  """
@@ -113,7 +116,6 @@ exclude = [
113
116
  "build",
114
117
  "dist",
115
118
  "venv",
116
- "__legacy",
117
119
  ]
118
120
 
119
121
  [tool.ruff.lint]
@@ -0,0 +1 @@
1
+ __version__: str = "0.0.3"
@@ -9,3 +9,4 @@ from typing import Any
9
9
 
10
10
  TupleStr = tuple[str, ...]
11
11
  DictData = dict[str, Any]
12
+ DictStr = dict[str, str]
@@ -43,8 +43,14 @@ class BaseConn(BaseModel):
43
43
  ]
44
44
 
45
45
  @classmethod
46
- def from_dict(cls, values: DictData):
47
- """Construct Connection with dict of data"""
46
+ def from_dict(cls, values: DictData) -> Self:
47
+ """Construct Connection Model from dict data. This construct is
48
+ different with ``.model_validate()`` because it will prepare the values
49
+ before using it if the data dose not have 'url'.
50
+
51
+ :param values: A dict data that use to construct this model.
52
+ """
53
+ # NOTE: filter out the fields of this model.
48
54
  filter_data: DictData = {
49
55
  k: values.pop(k)
50
56
  for k in values.copy()
@@ -73,15 +79,11 @@ class BaseConn(BaseModel):
73
79
  )
74
80
 
75
81
  @classmethod
76
- def from_loader(
77
- cls,
78
- name: str,
79
- externals: DictData,
80
- ) -> Self:
82
+ def from_loader(cls, name: str, externals: DictData) -> Self:
81
83
  """Construct Connection with Loader object with specific config name.
82
84
 
83
- :param name:
84
- :param externals:
85
+ :param name: A config name.
86
+ :param externals: A external data that want to adding to extras.
85
87
  """
86
88
  loader: Loader = Loader(name, externals=externals)
87
89
  # NOTE: Validate the config type match with current connection model
@@ -96,6 +98,7 @@ class BaseConn(BaseModel):
96
98
 
97
99
  @field_validator("endpoint")
98
100
  def __prepare_slash(cls, value: str) -> str:
101
+ """Prepare slash character that map double form URL model loading."""
99
102
  if value.startswith("//"):
100
103
  return value[1:]
101
104
  return value
@@ -148,7 +151,7 @@ class SFTP(Conn):
148
151
  dialect: Literal["sftp"] = "sftp"
149
152
 
150
153
  def __client(self):
151
- from .vendors.sftp_wrapped import WrapSFTP
154
+ from .vendors.sftp import WrapSFTP
152
155
 
153
156
  return WrapSFTP(
154
157
  host=self.host,
@@ -9,24 +9,4 @@ Define Errors Object for Node package
9
9
  from __future__ import annotations
10
10
 
11
11
 
12
- class BaseError(Exception):
13
- """Base Error Object that use for catch any errors statement of
14
- all step in this src
15
- """
16
-
17
-
18
- class WorkflowBaseError(BaseError):
19
- """Core Base Error object"""
20
-
21
-
22
- class ConfigNotFound(WorkflowBaseError):
23
- """Error raise for a method not found the config file or data."""
24
-
25
-
26
- class PyException(Exception): ...
27
-
28
-
29
- class ShellException(Exception): ...
30
-
31
-
32
12
  class TaskException(Exception): ...
@@ -6,7 +6,7 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  from functools import cached_property
9
- from typing import Any, TypeVar
9
+ from typing import Any, ClassVar, TypeVar
10
10
 
11
11
  from ddeutil.core import (
12
12
  getdot,
@@ -14,12 +14,12 @@ from ddeutil.core import (
14
14
  import_string,
15
15
  )
16
16
  from ddeutil.io import (
17
- ConfigNotFound,
18
- Params,
17
+ PathData,
19
18
  PathSearch,
20
19
  YamlEnvFl,
21
20
  )
22
- from pydantic import BaseModel
21
+ from pydantic import BaseModel, Field
22
+ from pydantic.functional_validators import model_validator
23
23
 
24
24
  from .__regex import RegexConf
25
25
  from .__types import DictData
@@ -29,6 +29,25 @@ BaseModelType = type[BaseModel]
29
29
  AnyModel = TypeVar("AnyModel", bound=BaseModel)
30
30
 
31
31
 
32
+ class Engine(BaseModel):
33
+ """Engine Model"""
34
+
35
+ paths: PathData = Field(default_factory=PathData)
36
+ registry: list[str] = Field(default_factory=lambda: ["ddeutil.workflow"])
37
+
38
+ @model_validator(mode="before")
39
+ def __prepare_registry(cls, values: DictData) -> DictData:
40
+ if (_regis := values.get("registry")) and isinstance(_regis, str):
41
+ values["registry"] = [_regis]
42
+ return values
43
+
44
+
45
+ class Params(BaseModel):
46
+ """Params Model"""
47
+
48
+ engine: Engine = Field(default_factory=Engine)
49
+
50
+
32
51
  class SimLoad:
33
52
  """Simple Load Object that will search config data by name.
34
53
 
@@ -36,13 +55,11 @@ class SimLoad:
36
55
  :param params: A Params model object.
37
56
  :param externals: An external parameters
38
57
 
39
- Note:
58
+ Noted:
40
59
  The config data should have ``type`` key for engine can know what is
41
60
  config should to do next.
42
61
  """
43
62
 
44
- import_prefix: str = "ddeutil.workflow"
45
-
46
63
  def __init__(
47
64
  self,
48
65
  name: str,
@@ -56,7 +73,7 @@ class SimLoad:
56
73
  ):
57
74
  self.data = data
58
75
  if not self.data:
59
- raise ConfigNotFound(f"Config {name!r} does not found on conf path")
76
+ raise ValueError(f"Config {name!r} does not found on conf path")
60
77
  self.__conf_params: Params = params
61
78
  self.externals: DictData = externals
62
79
 
@@ -75,6 +92,11 @@ class SimLoad:
75
92
  # NOTE: Auto adding module prefix if it does not set
76
93
  return import_string(f"ddeutil.workflow.{_typ}")
77
94
  except ModuleNotFoundError:
95
+ for registry in self.conf_params.engine.registry:
96
+ try:
97
+ return import_string(f"{registry}.{_typ}")
98
+ except ModuleNotFoundError:
99
+ continue
78
100
  return import_string(f"{_typ}")
79
101
 
80
102
  def load(self) -> AnyModel:
@@ -82,12 +104,14 @@ class SimLoad:
82
104
 
83
105
 
84
106
  class Loader(SimLoad):
85
- """Main Loader Object.
107
+ """Main Loader Object that get the config `yaml` file from current path.
86
108
 
87
109
  :param name: A name of config data that will read by Yaml Loader object.
88
110
  :param externals: An external parameters
89
111
  """
90
112
 
113
+ conf_name: ClassVar[str] = "workflows-conf"
114
+
91
115
  def __init__(
92
116
  self,
93
117
  name: str,
@@ -106,12 +130,16 @@ class Loader(SimLoad):
106
130
  def config(cls, path: str | None = None) -> Params:
107
131
  """Load Config data from ``workflows-conf.yaml`` file."""
108
132
  return Params.model_validate(
109
- YamlEnvFl(path or "./workflows-conf.yaml").read()
133
+ YamlEnvFl(path or f"./{cls.conf_name}.yaml").read()
110
134
  )
111
135
 
112
136
 
113
137
  def map_params(value: Any, params: dict[str, Any]) -> Any:
114
- """Map caller value that found from ``RE_CALLER`` regex.
138
+ """Map caller value that found from ``RE_CALLER`` regular expression.
139
+
140
+ :param value: A value that want to mapped with an params
141
+ :param params: A parameter value that getting with matched regular
142
+ expression.
115
143
 
116
144
  :rtype: Any
117
145
  :returns: An any getter value from the params input.