ddeutil-workflow 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,40 +6,58 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import inspect
9
+ import logging
9
10
  import subprocess
11
+ from abc import ABC, abstractmethod
12
+ from datetime import date, datetime
10
13
  from inspect import Parameter
11
14
  from subprocess import CompletedProcess
12
- from typing import Any, Callable, Optional, Union
15
+ from typing import Any, Callable, Literal, Optional, Union
13
16
 
17
+ from ddeutil.io.models.lineage import dt_now
14
18
  from pydantic import BaseModel, Field
19
+ from pydantic.functional_validators import model_validator
15
20
  from typing_extensions import Self
16
21
 
17
22
  from .__regex import RegexConf
18
23
  from .__types import DictData
19
- from .exceptions import PipeArgumentError, PyException, TaskException
20
- from .loader import Loader, map_caller
24
+ from .exceptions import PyException, TaskException
25
+ from .loader import Loader, map_params
26
+ from .utils import make_registry
21
27
 
22
28
 
23
- class StageResult(BaseModel): ...
29
+ class BaseStage(BaseModel, ABC):
30
+ """Base Stage Model."""
24
31
 
32
+ id: Optional[str] = None
33
+ name: str
25
34
 
26
- class JobResult(BaseModel): ...
35
+ @abstractmethod
36
+ def execute(self, params: DictData) -> DictData:
37
+ raise NotImplementedError("Stage should implement ``execute`` method.")
27
38
 
39
+ def set_outputs(self, rs: DictData, params: DictData) -> DictData:
40
+ """Set outputs to params"""
41
+ if self.id is None:
42
+ return params
28
43
 
29
- class PipeResult(BaseModel): ...
44
+ if "stages" not in params:
45
+ params["stages"] = {}
30
46
 
47
+ params["stages"][self.id] = {"outputs": rs}
48
+ return params
31
49
 
32
- class EmptyStage(BaseModel):
33
- """Empty stage that is doing nothing and logging the name of stage only."""
34
50
 
35
- id: Optional[str] = None
36
- name: str
51
+ class EmptyStage(BaseStage):
52
+ """Empty stage that is doing nothing and logging the name of stage only."""
37
53
 
38
- def execute(self, params: dict[str, Any]) -> dict[str, Any]:
54
+ def execute(self, params: DictData) -> DictData:
55
+ """Execute for the Empty stage that do only logging out."""
56
+ logging.info(f"Execute: {self.name!r}")
39
57
  return params
40
58
 
41
59
 
42
- class ShellStage(EmptyStage):
60
+ class ShellStage(BaseStage):
43
61
  """Shell statement stage."""
44
62
 
45
63
  shell: str
@@ -50,9 +68,7 @@ class ShellStage(EmptyStage):
50
68
  """Prepare shell statement string that include newline"""
51
69
  return shell.replace("\n", ";")
52
70
 
53
- def set_outputs(
54
- self, rs: CompletedProcess, params: dict[str, Any]
55
- ) -> dict[str, Any]:
71
+ def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
56
72
  """Set outputs to params"""
57
73
  # NOTE: skipping set outputs of stage execution when id does not set.
58
74
  if self.id is None:
@@ -71,7 +87,7 @@ class ShellStage(EmptyStage):
71
87
  }
72
88
  return params
73
89
 
74
- def execute(self, params: dict[str, Any]) -> dict[str, Any]:
90
+ def execute(self, params: DictData) -> DictData:
75
91
  """Execute the Shell & Powershell statement with the Python build-in
76
92
  ``subprocess`` package.
77
93
  """
@@ -92,24 +108,22 @@ class ShellStage(EmptyStage):
92
108
  return params
93
109
 
94
110
 
95
- class PyStage(EmptyStage):
111
+ class PyStage(BaseStage):
96
112
  """Python executor stage that running the Python statement that receive
97
113
  globals nad additional variables.
98
114
  """
99
115
 
100
116
  run: str
101
- vars: dict[str, Any] = Field(default_factory=dict)
117
+ vars: DictData = Field(default_factory=dict)
102
118
 
103
- def get_var(self, params: dict[str, Any]) -> dict[str, Any]:
119
+ def get_var(self, params: DictData) -> DictData:
104
120
  """Return variables"""
105
121
  rs = self.vars.copy()
106
122
  for p, v in self.vars.items():
107
- rs[p] = map_caller(v, params)
123
+ rs[p] = map_params(v, params)
108
124
  return rs
109
125
 
110
- def set_outputs(
111
- self, lc: dict[str, Any], params: dict[str, Any]
112
- ) -> dict[str, Any]:
126
+ def set_outputs(self, rs: DictData, params: DictData) -> DictData:
113
127
  """Set outputs to params"""
114
128
  # NOTE: skipping set outputs of stage execution when id does not set.
115
129
  if self.id is None:
@@ -120,25 +134,25 @@ class PyStage(EmptyStage):
120
134
 
121
135
  params["stages"][self.id] = {
122
136
  # NOTE: The output will fileter unnecessary keys from ``_locals``.
123
- "outputs": {k: lc[k] for k in lc if k != "__annotations__"},
137
+ "outputs": {k: rs[k] for k in rs if k != "__annotations__"},
124
138
  }
125
139
  return params
126
140
 
127
- def execute(self, params: dict[str, Any]) -> dict[str, Any]:
141
+ def execute(self, params: DictData) -> DictData:
128
142
  """Execute the Python statement that pass all globals and input params
129
143
  to globals argument on ``exec`` build-in function.
130
144
 
131
145
  :param params: A parameter that want to pass before run any statement.
132
- :type params: dict[str, Any]
146
+ :type params: DictData
133
147
 
134
- :rtype: dict[str, Any]
148
+ :rtype: DictData
135
149
  :returns: A parameters from an input that was mapped output if the stage
136
150
  ID was set.
137
151
  """
138
- _globals: dict[str, Any] = globals() | params | self.get_var(params)
139
- _locals: dict[str, Any] = {}
152
+ _globals: DictData = globals() | params | self.get_var(params)
153
+ _locals: DictData = {}
140
154
  try:
141
- exec(map_caller(self.run, params), _globals, _locals)
155
+ exec(map_params(self.run, params), _globals, _locals)
142
156
  except Exception as err:
143
157
  raise PyException(
144
158
  f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
@@ -151,14 +165,18 @@ class PyStage(EmptyStage):
151
165
 
152
166
 
153
167
  class TaskSearch(BaseModel):
168
+ """Task Search Model"""
169
+
154
170
  path: str
155
171
  func: str
156
172
  tag: str
157
173
 
158
174
 
159
- class TaskStage(EmptyStage):
175
+ class TaskStage(BaseStage):
176
+ """Task executor stage that running the Python function."""
177
+
160
178
  task: str
161
- args: dict[str, Any]
179
+ args: DictData
162
180
 
163
181
  @staticmethod
164
182
  def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
@@ -167,27 +185,15 @@ class TaskStage(EmptyStage):
167
185
  raise ValueError("Task does not match with task format regex.")
168
186
  tasks = TaskSearch(**found.groupdict())
169
187
 
170
- from ddeutil.core import import_string
171
-
172
- try:
173
- rgt = import_string(f"ddeutil.workflow.{tasks.path}.registries")
174
- if tasks.func not in rgt:
175
- raise NotImplementedError(
176
- f"ddeutil.workflow.{tasks.path}.registries does not "
177
- f"implement registry: {tasks.func}."
178
- )
179
- except ImportError:
180
-
181
- # NOTE: Try to import this task function fom target module.
182
- try:
183
- return import_string(
184
- f"ddeutil.workflow.{tasks.path}.{tasks.func}"
185
- )
186
- except ImportError:
187
- raise NotImplementedError(
188
- f"ddeutil.workflow.{tasks.path} does not implement "
189
- f"registries or {tasks.func}."
190
- ) from None
188
+ # NOTE: Registry object should implement on this package only.
189
+ # TODO: This prefix value to search registry should dynamic with
190
+ # config file.
191
+ rgt = make_registry(f"ddeutil.workflow.{tasks.path}")
192
+ if tasks.func not in rgt:
193
+ raise NotImplementedError(
194
+ f"ddeutil.workflow.{tasks.path}.registries does not "
195
+ f"implement registry: {tasks.func}."
196
+ )
191
197
 
192
198
  if tasks.tag not in rgt[tasks.func]:
193
199
  raise NotImplementedError(
@@ -197,7 +203,7 @@ class TaskStage(EmptyStage):
197
203
  )
198
204
  return rgt[tasks.func][tasks.tag]
199
205
 
200
- def execute(self, params: dict[str, Any]) -> dict[str, Any]:
206
+ def execute(self, params: DictData) -> DictData:
201
207
  """Execute the Task function."""
202
208
  task_caller = self.extract_task(self.task)()
203
209
  if not callable(task_caller):
@@ -215,17 +221,11 @@ class TaskStage(EmptyStage):
215
221
  f"does not set to args"
216
222
  )
217
223
  try:
218
- rs = task_caller(**self.args)
224
+ rs = task_caller(**map_params(self.args, params))
219
225
  except Exception as err:
220
226
  raise TaskException(f"{err.__class__.__name__}: {err}") from err
221
- return {"output": rs}
222
-
223
-
224
- class HookStage(EmptyStage):
225
- hook: str
226
- args: dict[str, Any]
227
-
228
- def execute(self, params: dict[str, Any]) -> dict[str, Any]: ...
227
+ self.set_outputs(rs, params)
228
+ return params
229
229
 
230
230
 
231
231
  # NOTE: Order of parsing stage data
@@ -233,14 +233,24 @@ Stage = Union[
233
233
  PyStage,
234
234
  ShellStage,
235
235
  TaskStage,
236
- HookStage,
237
236
  EmptyStage,
238
237
  ]
239
238
 
240
239
 
240
+ class Strategy(BaseModel):
241
+ """Strategy Model"""
242
+
243
+ matrix: list[str] = Field(default_factory=list)
244
+ include: list[str] = Field(default_factory=list)
245
+ exclude: list[str] = Field(default_factory=list)
246
+
247
+
241
248
  class Job(BaseModel):
249
+ """Job Model"""
250
+
242
251
  stages: list[Stage] = Field(default_factory=list)
243
252
  needs: list[str] = Field(default_factory=list)
253
+ strategy: Strategy = Field(default_factory=Strategy)
244
254
 
245
255
  def stage(self, stage_id: str) -> Stage:
246
256
  for stage in self.stages:
@@ -248,7 +258,8 @@ class Job(BaseModel):
248
258
  return stage
249
259
  raise ValueError(f"Stage ID {stage_id} does not exists")
250
260
 
251
- def execute(self, params: dict[str, Any] | None = None) -> dict[str, Any]:
261
+ def execute(self, params: DictData | None = None) -> DictData:
262
+ """Execute job with passing dynamic parameters from the pipeline."""
252
263
  for stage in self.stages:
253
264
  # NOTE:
254
265
  # I do not use below syntax because `params` dict be the
@@ -259,45 +270,148 @@ class Job(BaseModel):
259
270
  return params
260
271
 
261
272
 
262
- class Strategy(BaseModel):
263
- matrix: list[str]
264
- include: list[str]
265
- exclude: list[str]
273
+ class BaseParams(BaseModel, ABC):
274
+ """Base Parameter that use to make Params Model."""
266
275
 
276
+ desc: Optional[str] = None
277
+ required: bool = True
278
+ type: str
267
279
 
268
- class JobStrategy(Job):
269
- """Strategy job"""
280
+ @abstractmethod
281
+ def receive(self, value: Optional[Any] = None) -> Any:
282
+ raise ValueError(
283
+ "Receive value and validate typing before return valid value."
284
+ )
285
+
286
+
287
+ class DefaultParams(BaseParams):
288
+ """Default Parameter that will check default if it required"""
289
+
290
+ default: Optional[str] = None
291
+
292
+ @abstractmethod
293
+ def receive(self, value: Optional[Any] = None) -> Any:
294
+ raise ValueError(
295
+ "Receive value and validate typing before return valid value."
296
+ )
297
+
298
+ @model_validator(mode="after")
299
+ def check_default(self) -> Self:
300
+ if not self.required and self.default is None:
301
+ raise ValueError(
302
+ "Default should set when this parameter does not required."
303
+ )
304
+ return self
305
+
306
+
307
+ class DatetimeParams(DefaultParams):
308
+ """Datetime parameter."""
309
+
310
+ type: Literal["datetime"] = "datetime"
311
+ required: bool = False
312
+ default: datetime = Field(default_factory=dt_now)
313
+
314
+ def receive(self, value: str | datetime | date | None = None) -> datetime:
315
+ if value is None:
316
+ return self.default
317
+
318
+ if isinstance(value, datetime):
319
+ return value
320
+ elif isinstance(value, date):
321
+ return datetime(value.year, value.month, value.day)
322
+ elif not isinstance(value, str):
323
+ raise ValueError(
324
+ f"Value that want to convert to datetime does not support for "
325
+ f"type: {type(value)}"
326
+ )
327
+ return datetime.fromisoformat(value)
328
+
329
+
330
+ class StrParams(DefaultParams):
331
+ """String parameter."""
332
+
333
+ type: Literal["str"] = "str"
270
334
 
271
- strategy: Strategy
335
+ def receive(self, value: Optional[str] = None) -> str | None:
336
+ if value is None:
337
+ return self.default
338
+ return str(value)
339
+
340
+
341
+ class IntParams(DefaultParams):
342
+ """Integer parameter."""
343
+
344
+ type: Literal["int"] = "int"
345
+
346
+ def receive(self, value: Optional[int] = None) -> int | None:
347
+ if value is None:
348
+ return self.default
349
+ if not isinstance(value, int):
350
+ try:
351
+ return int(str(value))
352
+ except TypeError as err:
353
+ raise ValueError(
354
+ f"Value that want to convert to integer does not support "
355
+ f"for type: {type(value)}"
356
+ ) from err
357
+ return value
358
+
359
+
360
+ class ChoiceParams(BaseParams):
361
+ type: Literal["choice"] = "choice"
362
+ options: list[str]
363
+
364
+ def receive(self, value: Optional[str] = None) -> str:
365
+ """Receive value that match with options."""
366
+ # NOTE:
367
+ # Return the first value in options if does not pass any input value
368
+ if value is None:
369
+ return self.options[0]
370
+ if any(value not in self.options):
371
+ raise ValueError(f"{value} does not match any value in options")
372
+ return value
373
+
374
+
375
+ Params = Union[
376
+ ChoiceParams,
377
+ DatetimeParams,
378
+ StrParams,
379
+ ]
272
380
 
273
381
 
274
382
  class Pipeline(BaseModel):
275
383
  """Pipeline Model"""
276
384
 
277
- params: dict[str, Any] = Field(default_factory=dict)
385
+ params: dict[str, Params] = Field(default_factory=dict)
278
386
  jobs: dict[str, Job]
279
387
 
280
388
  @classmethod
281
389
  def from_loader(
282
390
  cls,
283
391
  name: str,
284
- externals: DictData,
392
+ externals: Optional[DictData] = None,
285
393
  ) -> Self:
286
- loader: Loader = Loader(name, externals=externals)
394
+ loader: Loader = Loader(name, externals=(externals or {}))
287
395
  if "jobs" not in loader.data:
288
- raise PipeArgumentError("jobs", "Config does not set ``jobs``")
396
+ raise ValueError("Config does not set ``jobs`` value")
289
397
  return cls(
290
398
  jobs=loader.data["jobs"],
291
- params=loader.params(),
399
+ params=loader.data["params"],
292
400
  )
293
401
 
294
402
  def job(self, name: str) -> Job:
295
- """Return Job model that exists on this pipeline."""
403
+ """Return Job model that exists on this pipeline.
404
+
405
+ :param name: A job name that want to get from a mapping of job models.
406
+ :type name: str
407
+
408
+ :rtype: Job
409
+ """
296
410
  if name not in self.jobs:
297
411
  raise ValueError(f"Job {name} does not exists")
298
412
  return self.jobs[name]
299
413
 
300
- def execute(self, params: dict[str, Any] | None = None):
414
+ def execute(self, params: DictData | None = None) -> DictData:
301
415
  """Execute pipeline with passing dynamic parameters.
302
416
 
303
417
  See Also:
@@ -312,18 +426,22 @@ class Pipeline(BaseModel):
312
426
  ... "<job-name>.stages.<stage-id>.outputs.<key>"
313
427
 
314
428
  """
315
- params: dict[str, Any] = params or {}
429
+ params: DictData = params or {}
316
430
  check_key = tuple(f"{k!r}" for k in self.params if k not in params)
317
431
  if check_key:
318
432
  raise ValueError(
319
433
  f"Parameters that needed on pipeline does not pass: "
320
434
  f"{', '.join(check_key)}."
321
435
  )
322
- params: dict[str, Any] = {
436
+
437
+ if any(p not in params for p in self.params if self.params[p].required):
438
+ raise ValueError("Required parameter does not pass")
439
+
440
+ params: DictData = {
323
441
  "params": (
324
442
  params
325
443
  | {
326
- k: self.params[k](params[k])
444
+ k: self.params[k].receive(params[k])
327
445
  for k in params
328
446
  if k in self.params
329
447
  }
@@ -331,7 +449,7 @@ class Pipeline(BaseModel):
331
449
  }
332
450
  for job_id in self.jobs:
333
451
  print(f"[PIPELINE]: Start execute the job: {job_id!r}")
334
- job = self.jobs[job_id]
452
+ job: Job = self.jobs[job_id]
335
453
  # TODO: Condition on ``needs`` of this job was set. It should create
336
454
  # multithreading process on this step.
337
455
  job.execute(params=params)
@@ -9,15 +9,13 @@ from datetime import datetime
9
9
  from typing import Annotated
10
10
  from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
11
11
 
12
- from ddeutil.io import Params
13
12
  from ddeutil.workflow.vendors.__schedule import CronJob, CronRunner
14
13
  from pydantic import BaseModel, ConfigDict, Field
15
14
  from pydantic.functional_validators import field_validator
16
15
  from typing_extensions import Self
17
16
 
18
17
  from .__types import DictData
19
- from .exceptions import ScdlArgumentError
20
- from .loader import SimLoad
18
+ from .loader import Loader
21
19
 
22
20
 
23
21
  class BaseScdl(BaseModel):
@@ -37,14 +35,11 @@ class BaseScdl(BaseModel):
37
35
  def from_loader(
38
36
  cls,
39
37
  name: str,
40
- params: Params,
41
38
  externals: DictData,
42
39
  ) -> Self:
43
- loader: SimLoad = SimLoad(name, params=params, externals=externals)
40
+ loader: Loader = Loader(name, externals=externals)
44
41
  if "cronjob" not in loader.data:
45
- raise ScdlArgumentError(
46
- "cronjob", "Config does not set ``cronjob``"
47
- )
42
+ raise ValueError("Config does not set ``cronjob`` value")
48
43
  return cls(cronjob=loader.data["cronjob"], extras=externals)
49
44
 
50
45
  @field_validator("tz")
@@ -1,10 +1,6 @@
1
- from typing import Any
2
-
3
- from ddeutil.core import lazy
4
-
5
- registries: dict[str, Any] = {
6
- "el-csv-to-parquet": {
7
- "polars": lazy("ddeutil.workflow.tasks._polars.csv_to_parquet"),
8
- "polars-dir": lazy("ddeutil.workflow.tasks._polars.csv_to_parquet_dir"),
9
- },
10
- }
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from ._polars import *
@@ -0,0 +1,54 @@
1
+ import logging
2
+ import math
3
+
4
+ try:
5
+ import pandas as pd
6
+
7
+ logging.debug(f"Polars version: {pd.__version__}")
8
+ except ImportError as err:
9
+ raise ImportError(
10
+ "``split_iterable`` function want to use pandas package that does"
11
+ "not install on your interpreter."
12
+ ) from err
13
+
14
+
15
+ def split_iterable(iterable, chunk_size=None, generator_flag: bool = True):
16
+ """
17
+ Split an iterable into mini batch with batch length of batch_number
18
+ supports batch of a pandas dataframe
19
+ usage:
20
+ >> for i in split_iterable([1,2,3,4,5], chunk_size=2):
21
+ >> print(i)
22
+ [1, 2]
23
+ [3, 4]
24
+ [5]
25
+
26
+ for idx, mini_data in split_iterable(batch(df, chunk_size=10)):
27
+ print(idx)
28
+ print(mini_data)
29
+ """
30
+
31
+ chunk_size: int = chunk_size or 25000
32
+ num_chunks = math.ceil(len(iterable) / chunk_size)
33
+ if generator_flag:
34
+ for _ in range(num_chunks):
35
+ if isinstance(iterable, pd.DataFrame):
36
+ yield iterable.iloc[_ * chunk_size : (_ + 1) * chunk_size]
37
+ else:
38
+ yield iterable[_ * chunk_size : (_ + 1) * chunk_size]
39
+ else:
40
+ _chunks: list = []
41
+ for _ in range(num_chunks):
42
+ if isinstance(iterable, pd.DataFrame):
43
+ _chunks.append(
44
+ iterable.iloc[_ * chunk_size : (_ + 1) * chunk_size]
45
+ )
46
+ else:
47
+ _chunks.append(iterable[_ * chunk_size : (_ + 1) * chunk_size])
48
+ return _chunks
49
+
50
+
51
+ def chunks(dataframe: pd.DataFrame, n: int):
52
+ """Yield successive n-sized chunks from dataframe."""
53
+ for i in range(0, len(dataframe), n):
54
+ yield dataframe.iloc[i : i + n]
@@ -11,23 +11,41 @@ from uuid import uuid4
11
11
  import polars as pl
12
12
  import pyarrow.parquet as pq
13
13
  from ddeutil.workflow.dataset import PolarsCsv, PolarsParq
14
+ from ddeutil.workflow.utils import tag
14
15
 
15
16
 
17
+ def polars_dtype():
18
+ return {
19
+ "str": pl.Utf8,
20
+ "int": pl.Int32,
21
+ }
22
+
23
+
24
+ @tag("polars-dir", name="el-csv-to-parquet")
16
25
  def csv_to_parquet_dir(
17
26
  source: str,
18
27
  sink: str,
19
28
  conversion: dict[str, Any] | None = None,
20
- ):
29
+ ) -> dict[str, int]:
30
+ """Extract Load data from CSV to Parquet file.
31
+
32
+ :param source:
33
+ :param sink:
34
+ :param conversion:
35
+ """
21
36
  print("Start EL for CSV to Parquet with Polars Engine")
22
37
  print("---")
23
38
  # STEP 01: Read the source data to Polars.
24
39
  src_dataset: PolarsCsv = PolarsCsv.from_loader(name=source, externals={})
25
- src_df = src_dataset.load()
40
+ src_df: pl.DataFrame = src_dataset.load()
26
41
  print(src_df)
27
42
 
28
43
  # STEP 02: Schema conversion on Polars DataFrame.
29
44
  conversion: dict[str, Any] = conversion or {}
30
45
  if conversion:
46
+ src_df = src_df.with_columns(
47
+ *[pl.col(c).cast(col.type).alias(col.name) for c, col in conversion]
48
+ )
31
49
  print("Start Schema Conversion ...")
32
50
 
33
51
  # STEP 03: Write data to parquet file format.
@@ -39,3 +57,28 @@ def csv_to_parquet_dir(
39
57
  basename_template=f"{sink.object}-{uuid4().hex}-{{i}}.snappy.parquet",
40
58
  )
41
59
  return {"records": src_df.select(pl.len()).item()}
60
+
61
+
62
+ @tag("polars-dir-scan", name="el-csv-to-parquet")
63
+ def csv_to_parquet_dir_scan(
64
+ source: str,
65
+ sink: str,
66
+ conversion: dict[str, Any] | None = None,
67
+ ) -> dict[str, int]:
68
+ print("Start EL for CSV to Parquet with Polars Engine")
69
+ print("---")
70
+ # STEP 01: Read the source data to Polars.
71
+ src_dataset: PolarsCsv = PolarsCsv.from_loader(name=source, externals={})
72
+ src_df: pl.LazyFrame = src_dataset.scan()
73
+
74
+ if conversion:
75
+ ...
76
+
77
+ sink = PolarsParq.from_loader(name=sink, externals={})
78
+ pq.write_to_dataset(
79
+ table=src_df.collect().to_arrow(),
80
+ root_path=f"{sink.conn.endpoint}/{sink.object}",
81
+ compression="snappy",
82
+ basename_template=f"{sink.object}-{uuid4().hex}-{{i}}.snappy.parquet",
83
+ )
84
+ return {"records": src_df.select(pl.len()).collect().item()}