FlowerPower 0.11.3__tar.gz → 0.11.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {flowerpower-0.11.3/src/FlowerPower.egg-info → flowerpower-0.11.5}/PKG-INFO +1 -1
  2. {flowerpower-0.11.3 → flowerpower-0.11.5}/pyproject.toml +1 -1
  3. {flowerpower-0.11.3 → flowerpower-0.11.5/src/FlowerPower.egg-info}/PKG-INFO +1 -1
  4. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/fs/base.py +9 -1
  5. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/pipeline/manager.py +5 -1
  6. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/base.py +78 -53
  7. {flowerpower-0.11.3 → flowerpower-0.11.5}/LICENSE +0 -0
  8. {flowerpower-0.11.3 → flowerpower-0.11.5}/README.md +0 -0
  9. {flowerpower-0.11.3 → flowerpower-0.11.5}/setup.cfg +0 -0
  10. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/FlowerPower.egg-info/SOURCES.txt +0 -0
  11. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
  12. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/FlowerPower.egg-info/entry_points.txt +0 -0
  13. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/FlowerPower.egg-info/requires.txt +0 -0
  14. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/FlowerPower.egg-info/top_level.txt +0 -0
  15. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/__init__.py +0 -0
  16. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/__init__.py +0 -0
  17. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/base.py +0 -0
  18. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/pipeline/__init__.py +0 -0
  19. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/pipeline/adapter.py +0 -0
  20. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/pipeline/run.py +0 -0
  21. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/pipeline/schedule.py +0 -0
  22. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/project/__init__.py +0 -0
  23. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/project/adapter.py +0 -0
  24. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cfg/project/job_queue.py +0 -0
  25. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cli/__init__.py +0 -0
  26. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cli/cfg.py +0 -0
  27. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cli/job_queue.py +0 -0
  28. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cli/mqtt.py +0 -0
  29. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cli/pipeline.py +0 -0
  30. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/cli/utils.py +0 -0
  31. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/flowerpower.py +0 -0
  32. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/fs/__init__.py +0 -0
  33. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/fs/ext.py +0 -0
  34. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/fs/storage_options.py +0 -0
  35. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/__init__.py +0 -0
  36. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/apscheduler/__init__.py +0 -0
  37. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -0
  38. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -0
  39. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/apscheduler/manager.py +0 -0
  40. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/apscheduler/setup.py +0 -0
  41. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/apscheduler/trigger.py +0 -0
  42. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/apscheduler/utils.py +0 -0
  43. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/base.py +0 -0
  44. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/rq/__init__.py +0 -0
  45. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/rq/_trigger.py +0 -0
  46. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -0
  47. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -0
  48. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/rq/manager.py +0 -0
  49. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/rq/setup.py +0 -0
  50. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/job_queue/rq/utils.py +0 -0
  51. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/mqtt.py +0 -0
  52. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/pipeline/__init__.py +0 -0
  53. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/pipeline/base.py +0 -0
  54. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/pipeline/io.py +0 -0
  55. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/pipeline/job_queue.py +0 -0
  56. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/pipeline/registry.py +0 -0
  57. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/pipeline/runner.py +0 -0
  58. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/pipeline/visualizer.py +0 -0
  59. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/helpers/datetime.py +0 -0
  60. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/helpers/polars.py +0 -0
  61. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/helpers/sql.py +0 -0
  62. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/__init__.py +0 -0
  63. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/csv.py +0 -0
  64. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/deltatable.py +0 -0
  65. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/duckdb.py +0 -0
  66. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/json.py +0 -0
  67. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/mqtt.py +0 -0
  68. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/mssql.py +0 -0
  69. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/mysql.py +0 -0
  70. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/oracle.py +0 -0
  71. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/parquet.py +0 -0
  72. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/postgres.py +0 -0
  73. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/pydala.py +0 -0
  74. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/loader/sqlite.py +0 -0
  75. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/metadata.py +0 -0
  76. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/__init__.py +0 -0
  77. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/csv.py +0 -0
  78. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/deltatable.py +0 -0
  79. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/duckdb.py +0 -0
  80. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/json.py +0 -0
  81. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/mqtt.py +0 -0
  82. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/mssql.py +0 -0
  83. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/mysql.py +0 -0
  84. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/oracle.py +0 -0
  85. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/parquet.py +0 -0
  86. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/postgres.py +0 -0
  87. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/pydala.py +0 -0
  88. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/io/saver/sqlite.py +0 -0
  89. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/mqtt/__init__.py +0 -0
  90. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/mqtt/cfg.py +0 -0
  91. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/plugins/mqtt/manager.py +0 -0
  92. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/settings/__init__.py +0 -0
  93. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/settings/backend.py +0 -0
  94. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/settings/executor.py +0 -0
  95. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/settings/general.py +0 -0
  96. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/settings/hamilton.py +0 -0
  97. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/settings/job_queue.py +0 -0
  98. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/settings/logging.py +0 -0
  99. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/settings/retry.py +0 -0
  100. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/utils/callback.py +0 -0
  101. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/utils/logging.py +0 -0
  102. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/utils/misc.py +0 -0
  103. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/utils/monkey.py +0 -0
  104. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/utils/open_telemetry.py +0 -0
  105. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/utils/scheduler.py +0 -0
  106. {flowerpower-0.11.3 → flowerpower-0.11.5}/src/flowerpower/utils/templates.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.3
3
+ Version: 0.11.5
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -4,7 +4,7 @@ description = "A simple workflow framework. Hamilton + APScheduler = FlowerPower
4
4
  authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
5
5
  readme = "README.md"
6
6
  requires-python = ">= 3.11"
7
- version = "0.11.3"
7
+ version = "0.11.5"
8
8
  keywords = [
9
9
  "hamilton",
10
10
  "workflow",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.3
3
+ Version: 0.11.5
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -608,7 +608,15 @@ def get_filesystem(
608
608
  return DirFileSystem(path=path, fs=fs)
609
609
 
610
610
  pp = infer_storage_options(str(path) if isinstance(path, Path) else path)
611
- protocol = pp.get("protocol")
611
+ protocol = (
612
+ storage_options_kwargs.get("protocol", None)
613
+ or (
614
+ storage_options.get("protocol", None)
615
+ if isinstance(storage_options, dict)
616
+ else getattr(storage_options, "protocol", None)
617
+ )
618
+ or pp.get("protocol", "file")
619
+ )
612
620
 
613
621
  if protocol == "file" or protocol == "local":
614
622
  fs = filesystem(protocol)
@@ -145,7 +145,11 @@ class PipelineManager:
145
145
  cache_storage=cache_storage,
146
146
  )
147
147
  self._fs = fs
148
- self._storage_options = storage_options or fs.storage_options
148
+ self._storage_options = (
149
+ storage_options or fs.storage_options
150
+ if fs.protocol != "dir"
151
+ else fs.fs.storage_options
152
+ )
149
153
 
150
154
  # Store overrides for ProjectConfig loading
151
155
  self._cfg_dir = cfg_dir
@@ -1,4 +1,5 @@
1
1
  import importlib
2
+ import os
2
3
  import posixpath
3
4
  from typing import Any, Generator
4
5
 
@@ -22,8 +23,9 @@ from sqlalchemy import create_engine, text
22
23
  from ...fs import get_filesystem
23
24
  from ...fs.ext import _dict_to_dataframe, path_to_glob
24
25
  from ...fs.storage_options import (AwsStorageOptions, AzureStorageOptions,
25
- GcsStorageOptions, GitHubStorageOptions,
26
- GitLabStorageOptions, StorageOptions)
26
+ BaseStorageOptions, GcsStorageOptions,
27
+ GitHubStorageOptions, GitLabStorageOptions,
28
+ StorageOptions)
27
29
  from ...utils.misc import convert_large_types_to_standard, to_pyarrow_table
28
30
  from .helpers.polars import pl
29
31
  from .helpers.sql import sql2polars_filter, sql2pyarrow_filter
@@ -75,67 +77,80 @@ class BaseFileIO(msgspec.Struct, gc=False):
75
77
  ) = field(default=None)
76
78
  fs: AbstractFileSystem | None = field(default=None)
77
79
  format: str | None = None
78
- _raw_path: str | list[str] | None = field(default=None)
80
+ # _base_path: str | list[str] | None = field(default=None)
81
+ # _full_path: str | list[str] | None = field(default=None)
82
+ # _rel_path: str | list[str] | None = field(default=None)
83
+ # _glob_path
79
84
  _metadata: dict[str, Any] | None = field(default=None)
80
85
 
81
86
  def __post_init__(self):
82
- self._raw_path = self.path
83
- if isinstance(self.storage_options, dict):
84
- if "protocol" not in self.storage_options:
85
- self.storage_options["protocol"] = get_protocol(self.path)
86
- self.storage_options = StorageOptions(
87
- **self.storage_options
88
- ).storage_options
89
- if isinstance(self.storage_options, StorageOptions):
90
- self.storage_options = self.storage_options.storage_options
87
+ # self._base_path = self.path if isinstance(self.path, str) else os.path.commonpath(self.path)
91
88
 
92
89
  if self.fs is None:
93
90
  self.fs = get_filesystem(
94
- path=self.path if isinstance(self.path, str) else self.path[0],
91
+ path=self._base_path,
95
92
  storage_options=self.storage_options,
96
93
  fs=self.fs,
97
94
  dirfs=True,
98
95
  )
96
+ self.storage_options = (
97
+ self.storage_options or self.fs.storage_options
98
+ if self.protocol != "dir"
99
+ else self.fs.fs.storage_options
100
+ )
99
101
 
100
- if hasattr(self.storage_options, "protocol"):
101
- protocol = self.storage_options.protocol
102
- else:
103
- protocol = self.fs.protocol
104
- if protocol == "dir":
105
- protocol = (
106
- self.fs.fs.protocol
107
- if isinstance(self.fs.fs.protocol, str)
108
- else self.fs.fs.protocol[0]
109
- )
110
- if isinstance(protocol, list | tuple):
111
- protocol = protocol[0]
112
-
113
- if isinstance(self.path, str):
114
- self.path = (
115
- self.path.replace(protocol + "://", "")
116
- .replace(f"**/*.{self.format}", "")
117
- .replace("**", "")
118
- .replace("*", "")
119
- .rstrip("/")
120
- )
102
+ # self.path = (
103
+ # self._raw_path.replace(protocol + "://", "")
104
+ # .replace(f"**/*.{self.format}", "")
105
+ # .replace("**", "")
106
+ # .replace("*", "")
107
+ # .rstrip("/")
108
+ # )
109
+
110
+ @property
111
+ def protocol(self):
112
+ """Get the protocol of the filesystem."""
113
+ protocol = (
114
+ self.fs.protocol if self.fs.protocol != "dir" else self.fs.fs.protocol
115
+ )
116
+ if isinstance(protocol, list | tuple):
117
+ protocol = protocol[0]
118
+ return protocol
119
+
120
+ @property
121
+ def _base_path(self) -> str:
122
+ """Get the base path for the filesystem."""
123
+
124
+ path = (
125
+ self.path if isinstance(self.path, str) else os.path.commonpath(self.path)
126
+ )
127
+ return path
121
128
 
122
129
  @property
123
- def _path(self):
130
+ def _path(self) -> str | list[str]:
124
131
  if self.fs.protocol == "dir":
125
132
  if isinstance(self.path, list):
126
133
  return [
127
- p.replace(self.fs.path.lstrip("/"), "").lstrip("/")
134
+ p.replace(self._base_path.lstrip("/"), "").lstrip("/")
128
135
  for p in self.path
129
136
  ]
130
137
  else:
131
- return self.path.replace(self.fs.path.lstrip("/"), "").lstrip("/")
138
+ return self.path.replace(self._base_path.lstrip("/"), "").lstrip("/")
132
139
  return self.path
133
140
 
134
141
  @property
135
- def _glob_path(self):
142
+ def _glob_path(self) -> str | list[str]:
143
+ if isinstance(self._path, list):
144
+ return self._path
136
145
  return path_to_glob(self._path, self.format)
137
146
 
138
- def list_files(self):
147
+ @property
148
+ def _root_path(self) -> str:
149
+ if self.fs.protocol == "dir":
150
+ return self._base_path.replace(self.fs.path, "")
151
+ return self._base_path
152
+
153
+ def list_files(self) -> list[str]:
139
154
  if isinstance(self._path, list):
140
155
  return self._path
141
156
 
@@ -276,7 +291,7 @@ class BaseFileReader(BaseFileIO, gc=False):
276
291
  return df
277
292
 
278
293
  def iter_pandas(
279
- self, batch_size: int = 1, reload: bool = False, **kwargs
294
+ self, reload: bool = False, **kwargs
280
295
  ) -> Generator[pd.DataFrame, None, None]:
281
296
  """Iterate over Pandas DataFrames.
282
297
 
@@ -287,7 +302,10 @@ class BaseFileReader(BaseFileIO, gc=False):
287
302
  Returns:
288
303
  Generator[pd.DataFrame, None, None]: Generator of Pandas DataFrames.
289
304
  """
290
- self._load(batch_size=batch_size, reload=reload, **kwargs)
305
+ if self.batch_size is None and "batch_size" not in kwargs:
306
+ self.batch_size = 1
307
+
308
+ self._load(reload=reload, **kwargs)
291
309
  if isinstance(self._data, list | Generator):
292
310
  for df in self._data:
293
311
  yield df if isinstance(df, pd.DataFrame) else df.to_pandas()
@@ -324,14 +342,17 @@ class BaseFileReader(BaseFileIO, gc=False):
324
342
  return df
325
343
 
326
344
  def _iter_polars_dataframe(
327
- self, batch_size: int = 1, reload: bool = False, **kwargs
345
+ self, reload: bool = False, **kwargs
328
346
  ) -> Generator[pl.DataFrame, None, None]:
329
347
  """Iterate over Polars DataFrames.
330
348
 
331
349
  Returns:
332
350
  Generator[pl.DataFrame, None, None]: Generator of Polars DataFrames.
333
351
  """
334
- self._load(batch_size=batch_size, reload=reload, **kwargs)
352
+ if self.batch_size is None and "batch_size" not in kwargs:
353
+ self.batch_size = 1
354
+
355
+ self._load(reload=reload, **kwargs)
335
356
  if isinstance(self._data, list | Generator):
336
357
  for df in self._data:
337
358
  yield df if isinstance(df, pl.DataFrame) else pl.from_arrow(df)
@@ -361,7 +382,7 @@ class BaseFileReader(BaseFileIO, gc=False):
361
382
  return df
362
383
 
363
384
  def _iter_polars_lazyframe(
364
- self, batch_size: int = 1, reload: bool = False, **kwargs
385
+ self, reload: bool = False, **kwargs
365
386
  ) -> Generator[pl.LazyFrame, None, None]:
366
387
  """Iterate over Polars LazyFrames.
367
388
 
@@ -372,7 +393,9 @@ class BaseFileReader(BaseFileIO, gc=False):
372
393
  Returns:
373
394
  Generator[pl.LazyFrame, None, None]: Generator of Polars LazyFrames.
374
395
  """
375
- self._load(batch_size=batch_size, reload=reload, **kwargs)
396
+ if self.batch_size is None and "batch_size" not in kwargs:
397
+ self.batch_size = 1
398
+ self._load(reload=reload, **kwargs)
376
399
  if isinstance(self._data, list | Generator):
377
400
  for df in self._data:
378
401
  yield (
@@ -420,12 +443,11 @@ class BaseFileReader(BaseFileIO, gc=False):
420
443
  def iter_polars(
421
444
  self,
422
445
  lazy: bool = False,
423
- batch_size: int = 1,
424
446
  **kwargs,
425
447
  ) -> Generator[pl.DataFrame | pl.LazyFrame, None, None]:
426
448
  if lazy:
427
- yield from self._iter_polars_lazyframe(batch_size=batch_size, **kwargs)
428
- yield from self._iter_polars_dataframe(batch_size=batch_size, **kwargs)
449
+ yield from self._iter_polars_lazyframe(**kwargs)
450
+ yield from self._iter_polars_dataframe(**kwargs)
429
451
 
430
452
  def to_pyarrow_table(
431
453
  self, metadata: bool = False, reload: bool = False, **kwargs
@@ -459,14 +481,17 @@ class BaseFileReader(BaseFileIO, gc=False):
459
481
  return df
460
482
 
461
483
  def iter_pyarrow_table(
462
- self, batch_size: int = 1, reload: bool = False, **kwargs
484
+ self, reload: bool = False, **kwargs
463
485
  ) -> Generator[pa.Table, None, None]:
464
486
  """Iterate over PyArrow Tables.
465
487
 
466
488
  Returns:
467
489
  Generator[pa.Table, None, None]: Generator of PyArrow Tables.
468
490
  """
469
- self._load(batch_size=batch_size, reload=reload, **kwargs)
491
+ if self.batch_size is None and "batch_size" not in kwargs:
492
+ self.batch_size = 1
493
+
494
+ self._load(reload=reload, **kwargs)
470
495
  if isinstance(self._data, list | Generator):
471
496
  for df in self._data:
472
497
  yield df.to_arrow(**kwargs) if isinstance(df, pl.DataFrame) else df
@@ -752,7 +777,7 @@ class BaseDatasetReader(BaseFileReader, gc=False):
752
777
  Returns:
753
778
  pds.Dataset: PyArrow Dataset.
754
779
  """
755
- if hasattr(self, "_dataset") and not reload:
780
+ if self._dataset is not None and not reload:
756
781
  if metadata:
757
782
  return self._dataset, self._metadata
758
783
  return self._dataset
@@ -769,9 +794,9 @@ class BaseDatasetReader(BaseFileReader, gc=False):
769
794
  self._dataset, path=self.path, format=self.format
770
795
  )
771
796
  elif self.format == "parquet":
772
- if self.fs.exists(posixpath.join(self._path, "_metadata")):
797
+ if self.fs.exists(posixpath.join(self._root_path, "_metadata")):
773
798
  self._dataset = self.fs.parquet_dataset(
774
- self._path,
799
+ posixpath.join(self._root_path, "_metadata"),
775
800
  schema=self.schema_,
776
801
  partitioning=self.partitioning,
777
802
  **kwargs,
File without changes
File without changes
File without changes