ddeutil-workflow 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/utils.py CHANGED
@@ -6,20 +6,105 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import inspect
9
+ import os
9
10
  import stat
10
11
  from abc import ABC, abstractmethod
12
+ from collections.abc import Iterator
13
+ from dataclasses import dataclass, field
11
14
  from datetime import date, datetime
12
15
  from functools import wraps
16
+ from hashlib import md5
13
17
  from importlib import import_module
18
+ from itertools import product
14
19
  from pathlib import Path
15
20
  from typing import Any, Callable, Literal, Optional, Protocol, Union
21
+ from zoneinfo import ZoneInfo
16
22
 
17
- from ddeutil.core import lazy
23
+ from ddeutil.core import getdot, hasdot, lazy
24
+ from ddeutil.io import PathData
18
25
  from ddeutil.io.models.lineage import dt_now
19
26
  from pydantic import BaseModel, Field
20
27
  from pydantic.functional_validators import model_validator
21
28
  from typing_extensions import Self
22
29
 
30
+ from .__types import DictData, Matrix, Re
31
+
32
+
33
+ class Engine(BaseModel):
34
+ """Engine Model"""
35
+
36
+ paths: PathData = Field(default_factory=PathData)
37
+ registry: list[str] = Field(
38
+ default_factory=lambda: [
39
+ "ddeutil.workflow",
40
+ ],
41
+ )
42
+
43
+ @model_validator(mode="before")
44
+ def __prepare_registry(cls, values: DictData) -> DictData:
45
+ """Prepare registry value that passing with string type. It convert the
46
+ string type to list of string.
47
+ """
48
+ if (_regis := values.get("registry")) and isinstance(_regis, str):
49
+ values["registry"] = [_regis]
50
+ return values
51
+
52
+
53
+ class ConfParams(BaseModel):
54
+ """Params Model"""
55
+
56
+ engine: Engine = Field(
57
+ default_factory=Engine,
58
+ description="A engine mapping values.",
59
+ )
60
+
61
+
62
+ def config() -> ConfParams:
63
+ """Load Config data from ``workflows-conf.yaml`` file."""
64
+ root_path: str = os.getenv("WORKFLOW_ROOT_PATH", ".")
65
+
66
+ regis: list[str] = []
67
+ if regis_env := os.getenv("WORKFLOW_CORE_REGISTRY"):
68
+ regis = [r.strip() for r in regis_env.split(",")]
69
+
70
+ conf_path: str = (
71
+ f"{root_path}/{conf_env}"
72
+ if (conf_env := os.getenv("WORKFLOW_CORE_PATH_CONF"))
73
+ else None
74
+ )
75
+ return ConfParams.model_validate(
76
+ obj={
77
+ "engine": {
78
+ "registry": regis,
79
+ "paths": {
80
+ "root": root_path,
81
+ "conf": conf_path,
82
+ },
83
+ },
84
+ }
85
+ )
86
+
87
+
88
+ def gen_id(value: Any, *, sensitive: bool = True, unique: bool = False) -> str:
89
+ """Generate running ID for able to tracking. This generate process use `md5`
90
+ function.
91
+
92
+ :param value:
93
+ :param sensitive:
94
+ :param unique:
95
+ :rtype: str
96
+ """
97
+ if not isinstance(value, str):
98
+ value: str = str(value)
99
+
100
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
101
+ return md5(
102
+ (
103
+ f"{(value if sensitive else value.lower())}"
104
+ + (f"{datetime.now(tz=tz):%Y%m%d%H%M%S%f}" if unique else "")
105
+ ).encode()
106
+ ).hexdigest()
107
+
23
108
 
24
109
  class TagFunc(Protocol):
25
110
  """Tag Function Protocol"""
@@ -30,50 +115,68 @@ class TagFunc(Protocol):
30
115
  def __call__(self, *args, **kwargs): ...
31
116
 
32
117
 
33
- def tag(tag_value: str, name: str | None = None):
118
+ def tag(value: str, name: str | None = None):
34
119
  """Tag decorator function that set function attributes, ``tag`` and ``name``
35
120
  for making registries variable.
36
121
 
37
- :param: tag_value: A tag value for make different use-case of a function.
122
+ :param: value: A tag value for make different use-case of a function.
38
123
  :param: name: A name that keeping in registries.
39
124
  """
40
125
 
41
- def func_internal(func: TagFunc):
42
- func.tag = tag_value
126
+ def func_internal(func: callable) -> TagFunc:
127
+ func.tag = value
43
128
  func.name = name or func.__name__.replace("_", "-")
44
129
 
45
130
  @wraps(func)
46
131
  def wrapped(*args, **kwargs):
47
132
  return func(*args, **kwargs)
48
133
 
134
+ # TODO: pass result from a wrapped to Result model
135
+ # >>> return Result.model_validate(obj=wrapped)
49
136
  return wrapped
50
137
 
51
138
  return func_internal
52
139
 
53
140
 
54
- def make_registry(module: str) -> dict[str, dict[str, Callable[[], TagFunc]]]:
55
- """Return registries of all functions that able to called with task."""
56
- rs: dict[str, dict[str, Callable[[], Callable]]] = {}
57
- for fstr, func in inspect.getmembers(
58
- import_module(module), inspect.isfunction
59
- ):
60
- if not hasattr(func, "tag"):
141
+ Registry = dict[str, Callable[[], TagFunc]]
142
+
143
+
144
+ def make_registry(submodule: str) -> dict[str, Registry]:
145
+ """Return registries of all functions that able to called with task.
146
+
147
+ :param submodule: A module prefix that want to import registry.
148
+ """
149
+ rs: dict[str, Registry] = {}
150
+ for module in config().engine.registry:
151
+ # NOTE: try to sequential import task functions
152
+ try:
153
+ importer = import_module(f"{module}.{submodule}")
154
+ except ModuleNotFoundError:
61
155
  continue
62
156
 
63
- if func.name in rs:
157
+ for fstr, func in inspect.getmembers(importer, inspect.isfunction):
158
+ # NOTE: check function attribute that already set tag by
159
+ # ``utils.tag`` decorator.
160
+ if not hasattr(func, "tag"):
161
+ continue
162
+
163
+ # NOTE: Create new register name if it not exists
164
+ if func.name not in rs:
165
+ rs[func.name] = {func.tag: lazy(f"{module}.{submodule}.{fstr}")}
166
+ continue
167
+
64
168
  if func.tag in rs[func.name]:
65
169
  raise ValueError(
66
- f"The tag {func.tag!r} already exists on module {module}"
170
+ f"The tag {func.tag!r} already exists on "
171
+ f"{module}.{submodule}, you should change this tag name or "
172
+ f"change it func name."
67
173
  )
68
- rs[func.name][func.tag] = lazy(f"{module}.{fstr}")
69
- continue
174
+ rs[func.name][func.tag] = lazy(f"{module}.{submodule}.{fstr}")
70
175
 
71
- # NOTE: Create new register name if it not exists
72
- rs[func.name] = {func.tag: lazy(f"{module}.{fstr}")}
73
176
  return rs
74
177
 
75
178
 
76
- class BaseParams(BaseModel, ABC):
179
+ class BaseParam(BaseModel, ABC):
77
180
  """Base Parameter that use to make Params Model."""
78
181
 
79
182
  desc: Optional[str] = None
@@ -87,7 +190,7 @@ class BaseParams(BaseModel, ABC):
87
190
  )
88
191
 
89
192
 
90
- class DefaultParams(BaseParams):
193
+ class DefaultParam(BaseParam):
91
194
  """Default Parameter that will check default if it required"""
92
195
 
93
196
  default: Optional[str] = None
@@ -107,7 +210,7 @@ class DefaultParams(BaseParams):
107
210
  return self
108
211
 
109
212
 
110
- class DatetimeParams(DefaultParams):
213
+ class DatetimeParam(DefaultParam):
111
214
  """Datetime parameter."""
112
215
 
113
216
  type: Literal["datetime"] = "datetime"
@@ -130,7 +233,7 @@ class DatetimeParams(DefaultParams):
130
233
  return datetime.fromisoformat(value)
131
234
 
132
235
 
133
- class StrParams(DefaultParams):
236
+ class StrParam(DefaultParam):
134
237
  """String parameter."""
135
238
 
136
239
  type: Literal["str"] = "str"
@@ -141,7 +244,7 @@ class StrParams(DefaultParams):
141
244
  return str(value)
142
245
 
143
246
 
144
- class IntParams(DefaultParams):
247
+ class IntParam(DefaultParam):
145
248
  """Integer parameter."""
146
249
 
147
250
  type: Literal["int"] = "int"
@@ -160,7 +263,7 @@ class IntParams(DefaultParams):
160
263
  return value
161
264
 
162
265
 
163
- class ChoiceParams(BaseParams):
266
+ class ChoiceParam(BaseParam):
164
267
  type: Literal["choice"] = "choice"
165
268
  options: list[str]
166
269
 
@@ -175,13 +278,101 @@ class ChoiceParams(BaseParams):
175
278
  return value
176
279
 
177
280
 
178
- Params = Union[
179
- ChoiceParams,
180
- DatetimeParams,
181
- StrParams,
281
+ Param = Union[
282
+ ChoiceParam,
283
+ DatetimeParam,
284
+ StrParam,
182
285
  ]
183
286
 
184
287
 
288
+ @dataclass
289
+ class Result:
290
+ """Result Dataclass object for passing parameter and receiving output from
291
+ the pipeline execution.
292
+ """
293
+
294
+ status: int = field(default=2)
295
+ context: DictData = field(default_factory=dict)
296
+
297
+
185
298
  def make_exec(path: str | Path):
299
+ """Change mode of file to be executable file."""
186
300
  f: Path = Path(path) if isinstance(path, str) else path
187
301
  f.chmod(f.stat().st_mode | stat.S_IEXEC)
302
+
303
+
304
+ def param2template(
305
+ value: Any,
306
+ params: dict[str, Any],
307
+ *,
308
+ repr_flag: bool = False,
309
+ ) -> Any:
310
+ """Pass param to template string that can search by ``RE_CALLER`` regular
311
+ expression.
312
+
313
+ :param value: A value that want to mapped with an params
314
+ :param params: A parameter value that getting with matched regular
315
+ expression.
316
+ :param repr_flag: A repr flag for using repr instead of str if it set be
317
+ true.
318
+
319
+ :rtype: Any
320
+ :returns: An any getter value from the params input.
321
+ """
322
+ if isinstance(value, dict):
323
+ return {k: param2template(value[k], params) for k in value}
324
+ elif isinstance(value, (list, tuple, set)):
325
+ return type(value)([param2template(i, params) for i in value])
326
+ elif not isinstance(value, str):
327
+ return value
328
+
329
+ if not Re.RE_CALLER.search(value):
330
+ return value
331
+
332
+ for found in Re.RE_CALLER.finditer(value):
333
+
334
+ # NOTE: get caller value that setting inside; ``${{ <caller-value> }}``
335
+ caller: str = found.group("caller")
336
+ if not hasdot(caller, params):
337
+ raise ValueError(f"params does not set caller: {caller!r}")
338
+
339
+ getter: Any = getdot(caller, params)
340
+
341
+ # NOTE: check type of vars
342
+ if isinstance(getter, (str, int)):
343
+ value: str = value.replace(
344
+ found.group(0), (repr(getter) if repr_flag else str(getter)), 1
345
+ )
346
+ continue
347
+
348
+ # NOTE:
349
+ # If type of getter caller does not formatting, it will return origin
350
+ # value from the ``getdot`` function.
351
+ if value.replace(found.group(0), "", 1) != "":
352
+ raise ValueError(
353
+ "Callable variable should not pass other outside ${{ ... }}"
354
+ )
355
+ return getter
356
+ return value
357
+
358
+
359
+ def dash2underscore(
360
+ key: str,
361
+ values: DictData,
362
+ *,
363
+ fixed: str | None = None,
364
+ ) -> DictData:
365
+ """Change key name that has dash to underscore."""
366
+ if key in values:
367
+ values[(fixed or key.replace("-", "_"))] = values.pop(key)
368
+ return values
369
+
370
+
371
+ def cross_product(matrix: Matrix) -> Iterator:
372
+ """Iterator of products value from matrix."""
373
+ yield from (
374
+ {_k: _v for e in mapped for _k, _v in e.items()}
375
+ for mapped in product(
376
+ *[[{k: v} for v in vs] for k, vs in matrix.items()]
377
+ )
378
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddeutil-workflow
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Data Developer & Engineer Workflow Utility Objects
5
5
  Author-email: ddeutils <korawich.anu@gmail.com>
6
6
  License: MIT
@@ -9,7 +9,7 @@ Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
9
9
  Keywords: data,workflow,utility,pipeline
10
10
  Classifier: Topic :: Utilities
11
11
  Classifier: Natural Language :: English
12
- Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Developers
14
14
  Classifier: Operating System :: OS Independent
15
15
  Classifier: Programming Language :: Python
@@ -23,35 +23,33 @@ Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
24
  Requires-Dist: fmtutil
25
25
  Requires-Dist: ddeutil-io
26
- Requires-Dist: python-dotenv
27
- Provides-Extra: test
28
- Requires-Dist: sqlalchemy ==2.0.30 ; extra == 'test'
29
- Requires-Dist: paramiko ==3.4.0 ; extra == 'test'
30
- Requires-Dist: sshtunnel ==0.4.0 ; extra == 'test'
31
- Requires-Dist: boto3 ==1.34.117 ; extra == 'test'
32
- Requires-Dist: fsspec ==2024.5.0 ; extra == 'test'
33
- Requires-Dist: polars ==0.20.31 ; extra == 'test'
34
- Requires-Dist: pyarrow ==16.1.0 ; extra == 'test'
35
-
36
- # Data Utility: _Workflow_
26
+ Requires-Dist: python-dotenv ==1.0.1
27
+ Provides-Extra: app
28
+ Requires-Dist: fastapi ==0.112.0 ; extra == 'app'
29
+ Requires-Dist: apscheduler[sqlalchemy] ==3.10.4 ; extra == 'app'
30
+
31
+ # Workflow
37
32
 
38
33
  [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
39
34
  [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
40
35
  [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
36
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
41
37
 
42
38
  **Table of Contents**:
43
39
 
44
40
  - [Installation](#installation)
45
41
  - [Getting Started](#getting-started)
46
- - [Connection](#connection)
47
- - [Dataset](#dataset)
48
- - [Schedule](#schedule)
49
- - [Examples](#examples)
50
- - [Python](#python)
51
- - [Tasks (EL)](#tasks-extract--load)
52
- - [Hooks (T)](#hooks-transform)
53
-
54
- This **Utility Workflow** objects was created for easy to make a simple metadata
42
+ - [Core Features](#core-features)
43
+ - [On](#on)
44
+ - [Pipeline](#pipeline)
45
+ - [Usage](#usage)
46
+ - [Python & Bash](#python--bash)
47
+ - [Hook (EL)](#hook-extract--load)
48
+ - [Hook (T)](#hook-transform)
49
+ - [Configuration](#configuration)
50
+ - [Deployment](#deployment)
51
+
52
+ This **Workflow** objects was created for easy to make a simple metadata
55
53
  driven pipeline that able to **ETL, T, EL, or ELT** by `.yaml` file.
56
54
 
57
55
  I think we should not create the multiple pipeline per use-case if we able to
@@ -74,13 +72,18 @@ pipeline.
74
72
  pip install ddeutil-workflow
75
73
  ```
76
74
 
77
- This project need `ddeutil-io`, `ddeutil-model` extension namespace packages.
75
+ This project need `ddeutil-io` extension namespace packages. If you want to install
76
+ this package with application add-ons, you should add `app` in installation;
77
+
78
+ ```shell
79
+ pip install ddeutil-workflow[app]
80
+ ```
78
81
 
79
82
  ## Getting Started
80
83
 
81
84
  The first step, you should start create the connections and datasets for In and
82
85
  Out of you data that want to use in pipeline of workflow. Some of this component
83
- is similar component of the **Airflow** because I like it concepts.
86
+ is similar component of the **Airflow** because I like it orchestration concepts.
84
87
 
85
88
  The main feature of this project is the `Pipeline` object that can call any
86
89
  registries function. The pipeline can handle everything that you want to do, it
@@ -91,88 +94,84 @@ will passing parameters and catching the output for re-use it to next step.
91
94
  > dynamic registries instead of main features because it have a lot of maintain
92
95
  > vendor codes and deps. (I do not have time to handle this features)
93
96
 
94
- ### Connection
97
+ ### On
95
98
 
96
- The connection for worker able to do any thing.
99
+ The **On** is schedule object.
97
100
 
98
101
  ```yaml
99
- conn_postgres_data:
100
- type: conn.Postgres
101
- url: 'postgres//username:${ENV_PASS}@hostname:port/database?echo=True&time_out=10'
102
+ on_every_5_min:
103
+ type: on.On
104
+ cron: "*/5 * * * *"
102
105
  ```
103
106
 
104
107
  ```python
105
- from ddeutil.workflow.conn import Conn
108
+ from ddeutil.workflow.on import On
106
109
 
107
- conn = Conn.from_loader(name='conn_postgres_data', externals={})
108
- assert conn.ping()
109
- ```
110
+ schedule = On.from_loader(name='on_every_5_min', externals={})
111
+ assert '*/5 * * * *' == str(schedule.cronjob)
110
112
 
111
- ### Dataset
112
-
113
- The dataset is define any objects on the connection. This feature was implemented
114
- on `/vendors` because it has a lot of tools that can interact with any data systems
115
- in the data tool stacks.
116
-
117
- ```yaml
118
- ds_postgres_customer_tbl:
119
- type: dataset.PostgresTbl
120
- conn: 'conn_postgres_data'
121
- features:
122
- id: serial primary key
123
- name: varchar( 100 ) not null
113
+ cron_iter = schedule.generate('2022-01-01 00:00:00')
114
+ assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
115
+ assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
116
+ assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
117
+ assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
124
118
  ```
125
119
 
126
- ```python
127
- from ddeutil.workflow.vendors.pg import PostgresTbl
128
-
129
- dataset = PostgresTbl.from_loader(name='ds_postgres_customer_tbl', externals={})
130
- assert dataset.exists()
131
- ```
120
+ ### Pipeline
132
121
 
133
- ### Schedule
122
+ The **Pipeline** object that is the core feature of this project.
134
123
 
135
124
  ```yaml
136
- schd_for_node:
137
- type: schedule.Schedule
138
- cron: "*/5 * * * *"
125
+ run_py_local:
126
+ type: ddeutil.workflow.pipeline.Pipeline
127
+ on: 'on_every_5_min'
128
+ params:
129
+ author-run:
130
+ type: str
131
+ run-date:
132
+ type: datetime
139
133
  ```
140
134
 
141
135
  ```python
142
- from ddeutil.workflow.schedule import Schedule
143
-
144
- scdl = Schedule.from_loader(name='schd_for_node', externals={})
145
- assert '*/5 * * * *' == str(scdl.cronjob)
136
+ from ddeutil.workflow.pipeline import Pipeline
146
137
 
147
- cron_iterate = scdl.generate('2022-01-01 00:00:00')
148
- assert '2022-01-01 00:05:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
149
- assert '2022-01-01 00:10:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
150
- assert '2022-01-01 00:15:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
151
- assert '2022-01-01 00:20:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
152
- assert '2022-01-01 00:25:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
138
+ pipe = Pipeline.from_loader(name='run_py_local', externals={})
139
+ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
153
140
  ```
154
141
 
155
- ## Examples
142
+ > [!NOTE]
143
+ > The above parameter use short declarative statement. You can pass a parameter
144
+ > type to the key of a parameter name.
145
+ > ```yaml
146
+ > params:
147
+ > author-run: str
148
+ > run-date: datetime
149
+ > ```
150
+ >
151
+ > And for the type, you can remove `ddeutil.workflow` prefix because we can find
152
+ > it by looping search from `WORKFLOW_CORE_REGISTRY` value.
153
+
154
+ ## Usage
156
155
 
157
156
  This is examples that use workflow file for running common Data Engineering
158
157
  use-case.
159
158
 
160
- ### Python
159
+ > [!IMPORTANT]
160
+ > I recommend you to use `task` stage for all actions that you want to do with
161
+ > pipeline object.
161
162
 
162
- The state of doing lists that worker should to do. It be collection of the stage.
163
+ ### Python & Bash
163
164
 
164
165
  ```yaml
165
166
  run_py_local:
166
- type: ddeutil.workflow.pipe.Pipeline
167
+ type: pipeline.Pipeline
167
168
  params:
168
- author-run:
169
- type: str
170
- run-date:
171
- type: datetime
169
+ author-run: str
170
+ run-date: datetime
172
171
  jobs:
173
172
  first-job:
174
173
  stages:
175
- - name: Printing Information
174
+ - name: "Printing Information"
176
175
  id: define-func
177
176
  run: |
178
177
  x = '${{ params.author-run }}'
@@ -181,7 +180,7 @@ run_py_local:
181
180
  def echo(name: str):
182
181
  print(f'Hello {name}')
183
182
 
184
- - name: Run Sequence and use var from Above
183
+ - name: "Run Sequence and use var from Above"
185
184
  vars:
186
185
  x: ${{ params.author-run }}
187
186
  run: |
@@ -189,11 +188,17 @@ run_py_local:
189
188
  # Change x value
190
189
  x: int = 1
191
190
 
192
- - name: Call Function
191
+ - name: "Call Function"
193
192
  vars:
194
193
  echo: ${{ stages.define-func.outputs.echo }}
195
194
  run: |
196
195
  echo('Caller')
196
+ second-job:
197
+ stages:
198
+ - name: "Echo Bash Script"
199
+ id: shell-echo
200
+ bash: |
201
+ echo "Hello World from Shell"
197
202
  ```
198
203
 
199
204
  ```python
@@ -207,24 +212,23 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
207
212
  > Hello Local Workflow
208
213
  > Receive x from above with Local Workflow
209
214
  > Hello Caller
215
+ > Hello World from Shell
210
216
  ```
211
217
 
212
- ### Tasks (Extract & Load)
218
+ ### Hook (Extract & Load)
213
219
 
214
220
  ```yaml
215
221
  pipe_el_pg_to_lake:
216
- type: ddeutil.workflow.pipe.Pipeline
222
+ type: pipeline.Pipeline
217
223
  params:
218
- run-date:
219
- type: datetime
220
- author-email:
221
- type: str
224
+ run-date: datetime
225
+ author-email: str
222
226
  jobs:
223
227
  extract-load:
224
228
  stages:
225
229
  - name: "Extract Load from Postgres to Lake"
226
230
  id: extract-load
227
- task: tasks/postgres-to-delta@polars
231
+ uses: tasks/postgres-to-delta@polars
228
232
  with:
229
233
  source:
230
234
  conn: conn_postgres_url
@@ -236,11 +240,11 @@ pipe_el_pg_to_lake:
236
240
  endpoint: "/${{ params.name }}"
237
241
  ```
238
242
 
239
- ### Tasks (Transform)
243
+ ### Hook (Transform)
240
244
 
241
245
  ```yaml
242
- pipe_hook_mssql_proc:
243
- type: ddeutil.workflow.pipe.Pipeline
246
+ pipeline_hook_mssql_proc:
247
+ type: pipeline.Pipeline
244
248
  params:
245
249
  run_date: datetime
246
250
  sp_name: str
@@ -251,7 +255,7 @@ pipe_hook_mssql_proc:
251
255
  stages:
252
256
  - name: "Transform Data in MS SQL Server"
253
257
  id: transform
254
- task: tasks/mssql-proc@odbc
258
+ uses: tasks/mssql-proc@odbc
255
259
  with:
256
260
  exec: ${{ params.sp_name }}
257
261
  params:
@@ -261,6 +265,30 @@ pipe_hook_mssql_proc:
261
265
  target: ${{ params.target_name }}
262
266
  ```
263
267
 
264
- ## License
268
+ ## Configuration
265
269
 
266
- This project was licensed under the terms of the [MIT license](LICENSE).
270
+ ```bash
271
+ export WORKFLOW_ROOT_PATH=.
272
+ export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
273
+ export WORKFLOW_CORE_PATH_CONF=conf
274
+ ```
275
+
276
+ Application config:
277
+
278
+ ```bash
279
+ export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
280
+ export WORKFLOW_APP_INTERVAL=10
281
+ ```
282
+
283
+ ## Deployment
284
+
285
+ This package able to run as a application service for receive manual trigger
286
+ from the master node via RestAPI.
287
+
288
+ > [!WARNING]
289
+ > This feature do not start yet because I still research and find the best tool
290
+ > to use it provision an app service, like `starlette`, `fastapi`, `apscheduler`.
291
+
292
+ ```shell
293
+ (venv) $ workflow start -p 7070
294
+ ```