FlowerPower 0.11.6__py3-none-any.whl → 0.11.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flowerpower/fs/ext.py CHANGED
@@ -193,6 +193,7 @@ def _read_json(
193
193
  as_dataframe: (bool, optional) If True, return a DataFrame. Defaults to True.
194
194
  concat: (bool, optional) If True, concatenate the DataFrames. Defaults to True.
195
195
  verbose: (bool, optional) If True, print verbose output. Defaults to False.
196
+ opt_dtypes: (bool, optional) If True, optimize DataFrame dtypes. Defaults to False.
196
197
  **kwargs: Additional keyword arguments.
197
198
 
198
199
  Returns:
@@ -247,8 +248,8 @@ def _read_json(
247
248
  data = [opt_dtype_pl(df, strict=False) for df in data]
248
249
  if concat:
249
250
  result = pl.concat(data, how="diagonal_relaxed")
250
- if opt_dtypes:
251
- result = opt_dtype_pl(result, strict=False)
251
+ # if opt_dtypes:
252
+ # result = opt_dtype_pl(result, strict=False)
252
253
  return result
253
254
  return data
254
255
 
@@ -280,6 +281,7 @@ def _read_json_batches(
280
281
  concat: Combine files within each batch
281
282
  use_threads: Enable parallel file reading within batches
282
283
  verbose: Print progress information
284
+ opt_dtypes: Optimize DataFrame dtypes
283
285
  **kwargs: Additional arguments for DataFrame conversion
284
286
 
285
287
  Yields:
@@ -354,10 +356,16 @@ def _read_json_batches(
354
356
  ][0]
355
357
  for _data in batch_data
356
358
  ]
357
-
359
+ if opt_dtypes:
360
+ batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
358
361
  if concat and len(batch_dfs) > 1:
359
- yield pl.concat(batch_dfs, how="diagonal_relaxed")
362
+ batch_df = pl.concat(batch_dfs, how="diagonal_relaxed")
363
+ # if opt_dtypes:
364
+ # batch_df = opt_dtype_pl(batch_df, strict=False)
365
+ yield batch_df
360
366
  else:
367
+ # if opt_dtypes:
368
+ # batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
361
369
  yield batch_dfs
362
370
  else:
363
371
  yield batch_data
@@ -403,6 +411,7 @@ def read_json(
403
411
  concat: Combine multiple files/batches into single result
404
412
  use_threads: Enable parallel file reading
405
413
  verbose: Print progress information
414
+ opt_dtypes: Optimize DataFrame dtypes for performance
406
415
  **kwargs: Additional arguments passed to DataFrame conversion
407
416
 
408
417
  Returns:
@@ -486,6 +495,7 @@ def _read_csv_file(
486
495
  path: Path to CSV file
487
496
  self: Filesystem instance to use for reading
488
497
  include_file_path: Add source filepath as a column
498
+ opt_dtypes: Optimize DataFrame dtypes
489
499
  **kwargs: Additional arguments passed to pl.read_csv()
490
500
 
491
501
  Returns:
@@ -544,6 +554,7 @@ def _read_csv(
544
554
  use_threads: (bool, optional) If True, read files in parallel. Defaults to True.
545
555
  concat: (bool, optional) If True, concatenate the DataFrames. Defaults to True.
546
556
  verbose: (bool, optional) If True, print verbose output. Defaults to False.
557
+ opt_dtypes: (bool, optional) If True, optimize DataFrame dtypes. Defaults to False.
547
558
  **kwargs: Additional keyword arguments.
548
559
 
549
560
  Returns:
@@ -587,8 +598,8 @@ def _read_csv(
587
598
  )
588
599
  if concat:
589
600
  result = pl.concat(dfs, how="diagonal_relaxed")
590
- if opt_dtypes:
591
- result = opt_dtype_pl(result, strict=False)
601
+ # if opt_dtypes:
602
+ # result = opt_dtype_pl(result, strict=False)
592
603
  return result
593
604
  return dfs
594
605
 
@@ -616,6 +627,7 @@ def _read_csv_batches(
616
627
  concat: Combine files within each batch
617
628
  use_threads: Enable parallel file reading within batches
618
629
  verbose: Print progress information
630
+ opt_dtypes: Optimize DataFrame dtypes
619
631
  **kwargs: Additional arguments passed to pl.read_csv()
620
632
 
621
633
  Yields:
@@ -667,23 +679,28 @@ def _read_csv_batches(
667
679
  n_jobs=-1,
668
680
  backend="threading",
669
681
  verbose=verbose,
682
+ opt_dtypes=opt_dtypes,
670
683
  **kwargs,
671
684
  )
672
685
  else:
673
686
  batch_dfs = [
674
687
  _read_csv_file(
675
- p, self=self, include_file_path=include_file_path, **kwargs
688
+ p,
689
+ self=self,
690
+ include_file_path=include_file_path,
691
+ opt_dtypes=opt_dtypes,
692
+ **kwargs,
676
693
  )
677
694
  for p in batch_paths
678
695
  ]
679
696
 
680
- if opt_dtypes:
681
- batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
697
+ # if opt_dtypes:
698
+ # batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
682
699
 
683
700
  if concat and len(batch_dfs) > 1:
684
701
  result = pl.concat(batch_dfs, how="diagonal_relaxed")
685
- if opt_dtypes:
686
- result = opt_dtype_pl(result, strict=False)
702
+ # if opt_dtypes:
703
+ # result = opt_dtype_pl(result, strict=False)
687
704
  yield result
688
705
  else:
689
706
  yield batch_dfs
@@ -766,6 +783,7 @@ def read_csv(
766
783
  concat=concat,
767
784
  use_threads=use_threads,
768
785
  verbose=verbose,
786
+ opt_dtypes=opt_dtypes,
769
787
  **kwargs,
770
788
  )
771
789
  return _read_csv(
@@ -775,6 +793,7 @@ def read_csv(
775
793
  concat=concat,
776
794
  use_threads=use_threads,
777
795
  verbose=verbose,
796
+ opt_dtypes=opt_dtypes,
778
797
  **kwargs,
779
798
  )
780
799
 
@@ -858,9 +877,7 @@ def _read_parquet(
858
877
  if not include_file_path and concat:
859
878
  if isinstance(path, str):
860
879
  path = path.replace("**", "").replace("*.parquet", "")
861
- table = pq.read_table(path, filesystem=self, **kwargs)
862
- if opt_dtypes:
863
- table = opt_dtype_pa(table, strict=False)
880
+ table = _read_parquet_file(path, self=self, opt_dtypes=opt_dtypes, **kwargs)
864
881
  return table
865
882
  else:
866
883
  if isinstance(path, str):
@@ -907,12 +924,12 @@ def _read_parquet(
907
924
  unified_schema = unify_schemas_pa(schemas)
908
925
  tables = [cast_schema(t, unified_schema) for t in tables]
909
926
  result = pa.concat_tables(tables, promote_options="permissive")
910
- if opt_dtypes:
911
- result = opt_dtype_pa(result, strict=False)
927
+ # if opt_dtypes:
928
+ # result = opt_dtype_pa(result, strict=False)
912
929
  return result
913
930
  elif isinstance(tables, pa.Table):
914
- if opt_dtypes:
915
- tables = opt_dtype_pa(tables, strict=False)
931
+ # if opt_dtypes:
932
+ # tables = opt_dtype_pa(tables, strict=False)
916
933
  return tables
917
934
  else:
918
935
  return pa.concat_tables(tables, promote_options="permissive")
@@ -981,9 +998,9 @@ def _read_parquet_batches(
981
998
  if not include_file_path and concat and batch_size is None:
982
999
  if isinstance(path, str):
983
1000
  path = path.replace("**", "").replace("*.parquet", "")
984
- table = pq.read_table(path, filesystem=self, **kwargs)
985
- if opt_dtypes:
986
- table = opt_dtype_pa(table, strict=False)
1001
+ table = _read_parquet_file(
1002
+ path=path, self=self, opt_dtypes=opt_dtypes, **kwargs
1003
+ )
987
1004
  yield table
988
1005
  return
989
1006
 
@@ -994,7 +1011,11 @@ def _read_parquet_batches(
994
1011
 
995
1012
  if not isinstance(path, list):
996
1013
  yield _read_parquet_file(
997
- path=path, self=self, include_file_path=include_file_path, **kwargs
1014
+ path=path,
1015
+ self=self,
1016
+ include_file_path=include_file_path,
1017
+ opt_dtypes=opt_dtypes,
1018
+ **kwargs,
998
1019
  )
999
1020
  return
1000
1021
 
@@ -1032,12 +1053,12 @@ def _read_parquet_batches(
1032
1053
  unified_schema = unify_schemas_pa(schemas)
1033
1054
  batch_tables = [cast_schema(t, unified_schema) for t in batch_tables]
1034
1055
  result = pa.concat_tables(batch_tables, promote_options="permissive")
1035
- if opt_dtypes:
1036
- result = opt_dtype_pa(result, strict=False)
1056
+ # if opt_dtypes:
1057
+ # result = opt_dtype_pa(result, strict=False)
1037
1058
  yield result
1038
1059
  else:
1039
- if opt_dtypes and isinstance(batch_tables, list):
1040
- batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
1060
+ # if opt_dtypes and isinstance(batch_tables, list):
1061
+ # batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
1041
1062
  yield batch_tables
1042
1063
 
1043
1064
 
@@ -1077,6 +1098,7 @@ def read_parquet(
1077
1098
  concat: Combine multiple files/batches into single Table
1078
1099
  use_threads: Enable parallel file reading
1079
1100
  verbose: Print progress information
1101
+ opt_dtypes: Optimize Table dtypes for performance
1080
1102
  **kwargs: Additional arguments passed to pq.read_table()
1081
1103
 
1082
1104
  Returns:
@@ -1119,6 +1141,7 @@ def read_parquet(
1119
1141
  concat=concat,
1120
1142
  use_threads=use_threads,
1121
1143
  verbose=verbose,
1144
+ opt_dtypes=opt_dtypes,
1122
1145
  **kwargs,
1123
1146
  )
1124
1147
  return _read_parquet(
@@ -1128,6 +1151,7 @@ def read_parquet(
1128
1151
  use_threads=use_threads,
1129
1152
  concat=concat,
1130
1153
  verbose=verbose,
1154
+ opt_dtypes=opt_dtypes,
1131
1155
  **kwargs,
1132
1156
  )
1133
1157
 
@@ -1142,6 +1166,7 @@ def read_files(
1142
1166
  jsonlines: bool = False,
1143
1167
  use_threads: bool = True,
1144
1168
  verbose: bool = False,
1169
+ opt_dtypes: bool = False,
1145
1170
  **kwargs: Any,
1146
1171
  ) -> (
1147
1172
  pl.DataFrame
@@ -1175,6 +1200,7 @@ def read_files(
1175
1200
  jsonlines: For JSON format, whether to read as JSON Lines
1176
1201
  use_threads: Enable parallel file reading
1177
1202
  verbose: Print progress information
1203
+ opt_dtypes: Optimize DataFrame/Arrow Table dtypes for performance
1178
1204
  **kwargs: Additional format-specific arguments
1179
1205
 
1180
1206
  Returns:
@@ -1224,6 +1250,7 @@ def read_files(
1224
1250
  concat=concat,
1225
1251
  use_threads=use_threads,
1226
1252
  verbose=verbose,
1253
+ opt_dtypes=opt_dtypes,
1227
1254
  **kwargs,
1228
1255
  )
1229
1256
  return read_json(
@@ -1234,6 +1261,7 @@ def read_files(
1234
1261
  concat=concat,
1235
1262
  use_threads=use_threads,
1236
1263
  verbose=verbose,
1264
+ opt_dtypes=opt_dtypes,
1237
1265
  **kwargs,
1238
1266
  )
1239
1267
  elif format == "csv":
@@ -1246,6 +1274,7 @@ def read_files(
1246
1274
  concat=concat,
1247
1275
  use_threads=use_threads,
1248
1276
  verbose=verbose,
1277
+ opt_dtypes=opt_dtypes,
1249
1278
  **kwargs,
1250
1279
  )
1251
1280
  return read_csv(
@@ -1255,6 +1284,7 @@ def read_files(
1255
1284
  use_threads=use_threads,
1256
1285
  concat=concat,
1257
1286
  verbose=verbose,
1287
+ opt_dtypes=opt_dtypes,
1258
1288
  **kwargs,
1259
1289
  )
1260
1290
  elif format == "parquet":
@@ -1267,6 +1297,7 @@ def read_files(
1267
1297
  concat=concat,
1268
1298
  use_threads=use_threads,
1269
1299
  verbose=verbose,
1300
+ opt_dtypes=opt_dtypes,
1270
1301
  **kwargs,
1271
1302
  )
1272
1303
  return read_parquet(
@@ -1276,6 +1307,7 @@ def read_files(
1276
1307
  use_threads=use_threads,
1277
1308
  concat=concat,
1278
1309
  verbose=verbose,
1310
+ opt_dtypes=opt_dtypes,
1279
1311
  **kwargs,
1280
1312
  )
1281
1313
 
@@ -185,74 +185,135 @@ class BaseFileReader(BaseFileIO, gc=False):
185
185
  include_file_path: bool = field(default=False)
186
186
  concat: bool = field(default=True)
187
187
  batch_size: int | None = field(default=None)
188
+ opt_dtypes: bool = field(default=True)
189
+ use_threads: bool = field(default=True)
188
190
  conn: duckdb.DuckDBPyConnection | None = field(default=None)
189
191
  ctx: datafusion.SessionContext | None = field(default=None)
190
192
  jsonlines: bool | None = field(default=None)
191
193
  partitioning: str | list[str] | pds.Partitioning | None = field(default=None)
194
+ verbose: bool | None = field(default=None)
192
195
  _data: Any | None = field(default=None)
193
196
 
194
- def _load(self, reload: bool = False, **kwargs):
195
- if "include_file_path" in kwargs:
196
- if self.include_file_path != kwargs["include_file_path"]:
197
+ def _load(
198
+ self,
199
+ metadata: bool = False,
200
+ reload: bool = False,
201
+ batch_size: int | None = None,
202
+ include_file_path: bool = False,
203
+ concat: bool | None = None,
204
+ use_threads: bool | None = None,
205
+ verbose: bool | None = None,
206
+ opt_dtypes: bool | None = None,
207
+ **kwargs,
208
+ ):
209
+ if batch_size is not None:
210
+ if self.batch_size != batch_size:
197
211
  reload = True
198
- self.include_file_path = kwargs.pop("include_file_path")
199
- else:
200
- kwargs.pop("include_file_path")
212
+ self.batch_size = batch_size
201
213
 
202
- if "concat" in kwargs:
203
- if self.concat != kwargs["concat"]:
214
+ if include_file_path is not None:
215
+ if self.include_file_path != include_file_path:
204
216
  reload = True
205
- self.concat = kwargs.pop("concat")
206
- else:
207
- kwargs.pop("concat")
217
+ self.include_file_path = include_file_path
208
218
 
209
- if "batch_size" in kwargs:
210
- if self.batch_size != kwargs["batch_size"]:
219
+ if concat is not None:
220
+ if self.concat != concat:
211
221
  reload = True
212
- self.batch_size = kwargs.pop("batch_size")
213
- else:
214
- kwargs.pop("batch_size")
222
+ self.concat = concat
223
+
224
+ if use_threads is not None:
225
+ if self.use_threads != use_threads:
226
+ reload = True
227
+ self.use_threads = use_threads
228
+
229
+ if verbose is not None:
230
+ if self.verbose != verbose:
231
+ reload = True
232
+ self.verbose = verbose
233
+
234
+ if opt_dtypes is not None:
235
+ if self.opt_dtypes != opt_dtypes:
236
+ reload = True
237
+ self.opt_dtypes = opt_dtypes
215
238
 
216
239
  if "partitioning" in kwargs:
217
240
  if self.partitioning != kwargs["partitioning"]:
218
241
  reload = True
219
242
  self.partitioning = kwargs.pop("partitioning")
220
- else:
221
- kwargs.pop("partitioning")
222
243
 
223
244
  if not hasattr(self, "_data") or self._data is None or reload:
224
245
  self._data = self.fs.read_files(
225
246
  path=self._glob_path,
226
247
  format=self.format,
227
- include_file_path=True,
248
+ include_file_path=True if metadata or self.include_file_path else False,
228
249
  concat=self.concat,
229
250
  jsonlines=self.jsonlines or None,
230
251
  batch_size=self.batch_size,
231
252
  partitioning=self.partitioning,
253
+ opt_dtypes=self.opt_dtypes,
254
+ verbose=self.verbose,
255
+ use_threads=self.use_threads,
232
256
  **kwargs,
233
257
  )
234
- if not isinstance(self._data, Generator):
235
- self._metadata = get_dataframe_metadata(
236
- df=self._data,
237
- path=self.path,
238
- format=self.format,
239
- # num_files=pl.from_arrow(self._data.select(["file_path"])).select(
240
- # pl.n_unique("file_path")
241
- # )[0, 0],
242
- )
243
- if not self.include_file_path:
244
- if isinstance(self._data, pa.Table):
258
+ if metadata:
259
+ if isinstance(self._data, tuple | list):
260
+ self._metadata = [
261
+ get_dataframe_metadata(
262
+ df=df,
263
+ path=self.path,
264
+ format=self.format,
265
+ num_files=pl.from_arrow(df.select(["file_path"])).select(
266
+ pl.n_unique("file_path")
267
+ )[0, 0]
268
+ if isinstance(df, pa.Table)
269
+ else df.select(pl.n_unique("file_path"))[0, 0],
270
+ )
271
+ for df in self._data
272
+ ]
273
+ if not self.include_file_path:
274
+ self._data = [df.drop("file_path") for df in self._data]
275
+
276
+ elif isinstance(self._data, pa.Table):
277
+ self._metadata = get_dataframe_metadata(
278
+ df=self._data,
279
+ path=self.path,
280
+ format=self.format,
281
+ num_files=pl.from_arrow(
282
+ self._data.select(pl.n_unique("file_path"))
283
+ )[0, 0],
284
+ )
285
+ if not self.include_file_path:
286
+ self._data = self._data.drop("file_path")
287
+
288
+ elif isinstance(self._data, pl.DataFrame | pl.LazyFrame):
289
+ self._metadata = get_dataframe_metadata(
290
+ df=self._data,
291
+ path=self.path,
292
+ format=self.format,
293
+ num_files=self._data.select(pl.n_unique("file_path"))[0, 0]
294
+ if isinstance(self._data, pl.DataFrame)
295
+ else self._data.select(pl.n_unique("file_path")).collect()[
296
+ 0, 0
297
+ ],
298
+ )
299
+
300
+ if not self.include_file_path:
245
301
  self._data = self._data.drop("file_path")
246
- elif isinstance(self._data, list | tuple):
247
- self._data = [
248
- df.drop("file_path") if isinstance(df, pa.Table) else df
249
- for df in self._data
250
- ]
302
+ else:
303
+ metadata = {}
251
304
  else:
252
305
  self._metadata = {}
253
306
 
254
307
  def to_pandas(
255
- self, metadata: bool = False, reload: bool = False, **kwargs
308
+ self,
309
+ metadata: bool = False,
310
+ reload: bool = False,
311
+ include_file_path: bool = False,
312
+ concat: bool | None = None,
313
+ use_threads: bool | None = None,
314
+ verbose: bool | None = None,
315
+ opt_dtypes: bool | None = None,
316
+ **kwargs,
256
317
  ) -> (
257
318
  tuple[pd.DataFrame | list[pd.DataFrame], dict[str, Any]]
258
319
  | pd.DataFrame
@@ -263,12 +324,28 @@ class BaseFileReader(BaseFileIO, gc=False):
263
324
  Args:
264
325
  metadata (bool, optional): Include metadata in the output. Default is False.
265
326
  reload (bool, optional): Reload data if True. Default is False.
327
+ include_file_path (bool, optional): Include file path in the output. Default is False.
328
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
329
+ use_threads (bool, optional): Use threads for reading data. Default is True.
330
+ verbose (bool, optional): Verbose output. Default is None.
331
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
332
+ kwargs: Additional keyword arguments.
266
333
 
267
334
  Returns:
268
335
  tuple[pd.DataFrame | list[pd.DataFrame], dict[str, Any]] | pd.DataFrame | list[pd.DataFrame]: Pandas
269
336
  DataFrame or list of DataFrames and optional metadata.
270
337
  """
271
- self._load(reload=reload, **kwargs)
338
+ self._load(
339
+ reload=reload,
340
+ metadata=metadata,
341
+ batch_size=None,
342
+ include_file_path=include_file_path,
343
+ concat=concat,
344
+ use_threads=use_threads,
345
+ verbose=verbose,
346
+ opt_dtypes=opt_dtypes,
347
+ **kwargs,
348
+ )
272
349
  if isinstance(self._data, list):
273
350
  df = [
274
351
  df if isinstance(df, pd.DataFrame) else df.to_pandas()
@@ -282,26 +359,49 @@ class BaseFileReader(BaseFileIO, gc=False):
282
359
  else self._data.to_pandas()
283
360
  )
284
361
  if metadata:
285
- metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
286
- return df, metadata
362
+ # metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
363
+ return df, self._metadata
287
364
  return df
288
365
 
289
366
  def iter_pandas(
290
- self, reload: bool = False, **kwargs
367
+ self,
368
+ reload: bool = False,
369
+ batch_size: int | None = None,
370
+ include_file_path: bool = False,
371
+ concat: bool | None = None,
372
+ use_threads: bool | None = None,
373
+ verbose: bool | None = None,
374
+ opt_dtypes: bool | None = None,
375
+ **kwargs,
291
376
  ) -> Generator[pd.DataFrame, None, None]:
292
377
  """Iterate over Pandas DataFrames.
293
378
 
294
379
  Args:
295
380
  batch_size (int, optional): Batch size for iteration. Default is 1.
296
381
  reload (bool, optional): Reload data if True. Default is False.
382
+ include_file_path (bool, optional): Include file path in the output. Default is False.
383
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
384
+ use_threads (bool, optional): Use threads for reading data. Default is True.
385
+ verbose (bool, optional): Verbose output. Default is None.
386
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
387
+ kwargs: Additional keyword arguments.
297
388
 
298
389
  Returns:
299
390
  Generator[pd.DataFrame, None, None]: Generator of Pandas DataFrames.
300
391
  """
301
- if self.batch_size is None and "batch_size" not in kwargs:
302
- self.batch_size = 1
392
+ batch_size = batch_size or self.batch_size or 1
393
+
394
+ self._load(
395
+ reload=reload,
396
+ batch_size=batch_size,
397
+ include_file_path=include_file_path,
398
+ concat=concat,
399
+ use_threads=use_threads,
400
+ verbose=verbose,
401
+ opt_dtypes=opt_dtypes,
402
+ **kwargs,
403
+ )
303
404
 
304
- self._load(reload=reload, **kwargs)
305
405
  if isinstance(self._data, list | Generator):
306
406
  for df in self._data:
307
407
  yield df if isinstance(df, pd.DataFrame) else df.to_pandas()
@@ -313,13 +413,47 @@ class BaseFileReader(BaseFileIO, gc=False):
313
413
  )
314
414
 
315
415
  def _to_polars_dataframe(
316
- self, metadata: bool = False, reload: bool = False, **kwargs
416
+ self,
417
+ metadata: bool = False,
418
+ reload: bool = False,
419
+ include_file_path: bool = False,
420
+ concat: bool | None = None,
421
+ use_threads: bool | None = None,
422
+ verbose: bool | None = None,
423
+ opt_dtypes: bool | None = None,
424
+ **kwargs,
317
425
  ) -> (
318
426
  tuple[pl.DataFrame | list[pl.DataFrame], dict[str, Any]]
319
427
  | pl.DataFrame
320
428
  | list[pl.DataFrame]
321
429
  ):
322
- self._load(reload=reload, **kwargs)
430
+ """Convert data to Polars DataFrame(s).
431
+
432
+ Args:
433
+ metadata (bool, optional): Include metadata in the output. Default is False.
434
+ reload (bool, optional): Reload data if True. Default is False.
435
+ include_file_path (bool, optional): Include file path in the output. Default is False.
436
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
437
+ use_threads (bool, optional): Use threads for reading data. Default is True.
438
+ verbose (bool, optional): Verbose output. Default is None.
439
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
440
+ kwargs: Additional keyword arguments.
441
+
442
+ Returns:
443
+ tuple[pl.DataFrame | list[pl.DataFrame], dict[str, Any]] | pl.DataFrame | list[pl.DataFrame]: Polars
444
+ DataFrame or list of DataFrames and optional metadata.
445
+ """
446
+ self._load(
447
+ metadata=metadata,
448
+ reload=reload,
449
+ batch_size=None,
450
+ include_file_path=include_file_path,
451
+ concat=concat,
452
+ use_threads=use_threads,
453
+ verbose=verbose,
454
+ opt_dtypes=opt_dtypes,
455
+ **kwargs,
456
+ )
323
457
  if isinstance(self._data, list):
324
458
  df = [
325
459
  df if isinstance(self._data, pl.DataFrame) else pl.from_arrow(df)
@@ -333,22 +467,48 @@ class BaseFileReader(BaseFileIO, gc=False):
333
467
  else pl.from_arrow(self._data)
334
468
  )
335
469
  if metadata:
336
- metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
337
- return df, metadata
470
+ # metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
471
+ return df, self._metadata
338
472
  return df
339
473
 
340
474
  def _iter_polars_dataframe(
341
- self, reload: bool = False, **kwargs
475
+ self,
476
+ reload: bool = False,
477
+ batch_size: int | None = None,
478
+ include_file_path: bool = False,
479
+ concat: bool | None = None,
480
+ use_threads: bool | None = None,
481
+ verbose: bool | None = None,
482
+ opt_dtypes: bool | None = None,
483
+ **kwargs,
342
484
  ) -> Generator[pl.DataFrame, None, None]:
343
485
  """Iterate over Polars DataFrames.
344
486
 
487
+ Args:
488
+ batch_size (int, optional): Batch size for iteration. Default is 1.
489
+ reload (bool, optional): Reload data if True. Default is False.
490
+ include_file_path (bool, optional): Include file path in the output. Default is False.
491
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
492
+ use_threads (bool, optional): Use threads for reading data. Default is True.
493
+ verbose (bool, optional): Verbose output. Default is None.
494
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
495
+ kwargs: Additional keyword arguments.
496
+
345
497
  Returns:
346
498
  Generator[pl.DataFrame, None, None]: Generator of Polars DataFrames.
347
499
  """
348
- if self.batch_size is None and "batch_size" not in kwargs:
349
- self.batch_size = 1
350
-
351
- self._load(reload=reload, **kwargs)
500
+ batch_size = batch_size or self.batch_size or 1
501
+
502
+ self._load(
503
+ reload=reload,
504
+ batch_size=batch_size,
505
+ include_file_path=include_file_path,
506
+ concat=concat,
507
+ use_threads=use_threads,
508
+ verbose=verbose,
509
+ opt_dtypes=opt_dtypes,
510
+ **kwargs,
511
+ )
352
512
  if isinstance(self._data, list | Generator):
353
513
  for df in self._data:
354
514
  yield df if isinstance(df, pl.DataFrame) else pl.from_arrow(df)
@@ -360,38 +520,95 @@ class BaseFileReader(BaseFileIO, gc=False):
360
520
  )
361
521
 
362
522
  def _to_polars_lazyframe(
363
- self, metadata: bool = False, reload: bool = False, **kwargs
523
+ self,
524
+ metadata: bool = False,
525
+ reload: bool = False,
526
+ include_file_path: bool = False,
527
+ concat: bool | None = None,
528
+ use_threads: bool | None = None,
529
+ verbose: bool | None = None,
530
+ opt_dtypes: bool | None = None,
531
+ **kwargs,
364
532
  ) -> (
365
533
  tuple[pl.LazyFrame | list[pl.LazyFrame], dict[str, Any]]
366
534
  | pl.LazyFrame
367
535
  | list[pl.LazyFrame]
368
536
  ):
369
- self._load(reload=reload, **kwargs)
537
+ """Convert data to Polars LazyFrame(s).
538
+
539
+ Args:
540
+ metadata (bool, optional): Include metadata in the output. Default is False.
541
+ reload (bool, optional): Reload data if True. Default is False.
542
+ include_file_path (bool, optional): Include file path in the output. Default is False.
543
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
544
+ use_threads (bool, optional): Use threads for reading data. Default is True.
545
+ verbose (bool, optional): Verbose output. Default is None.
546
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
547
+ kwargs: Additional keyword arguments.
548
+
549
+ Returns:
550
+ tuple[pl.LazyFrame | list[pl.LazyFrame], dict[str, Any]] | pl.LazyFrame | list[pl.LazyFrame]: Polars
551
+ LazyFrame or list of LazyFrames and optional metadata.
552
+ """
553
+ self._load(
554
+ metadata=metadata,
555
+ reload=reload,
556
+ batch_size=None,
557
+ include_file_path=include_file_path,
558
+ concat=concat,
559
+ use_threads=use_threads,
560
+ verbose=verbose,
561
+ opt_dtypes=opt_dtypes,
562
+ **kwargs,
563
+ )
370
564
  if not self.concat:
371
565
  df = [df.lazy() for df in self._to_polars_dataframe()]
372
566
 
373
567
  else:
374
568
  df = self._to_polars_dataframe().lazy()
375
569
  if metadata:
376
- metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
377
- return df, metadata
570
+ # metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
571
+ return df, self._metadata
378
572
  return df
379
573
 
380
574
  def _iter_polars_lazyframe(
381
- self, reload: bool = False, **kwargs
575
+ self,
576
+ reload: bool = False,
577
+ batch_size: int | None = None,
578
+ include_file_path: bool = False,
579
+ concat: bool | None = None,
580
+ use_threads: bool | None = None,
581
+ verbose: bool | None = None,
582
+ opt_dtypes: bool | None = None,
583
+ **kwargs,
382
584
  ) -> Generator[pl.LazyFrame, None, None]:
383
585
  """Iterate over Polars LazyFrames.
384
586
 
385
587
  Args:
386
588
  batch_size (int, optional): Batch size for iteration. Default is 1.
387
589
  reload (bool, optional): Reload data if True. Default is False.
590
+ include_file_path (bool, optional): Include file path in the output. Default is False.
591
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
592
+ use_threads (bool, optional): Use threads for reading data. Default is True.
593
+ verbose (bool, optional): Verbose output. Default is None.
594
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
595
+ kwargs: Additional keyword arguments.
388
596
 
389
597
  Returns:
390
598
  Generator[pl.LazyFrame, None, None]: Generator of Polars LazyFrames.
391
599
  """
392
- if self.batch_size is None and "batch_size" not in kwargs:
393
- self.batch_size = 1
394
- self._load(reload=reload, **kwargs)
600
+ batch_size = batch_size or self.batch_size or 1
601
+
602
+ self._load(
603
+ reload=reload,
604
+ batch_size=batch_size,
605
+ include_file_path=include_file_path,
606
+ concat=concat,
607
+ use_threads=use_threads,
608
+ verbose=verbose,
609
+ opt_dtypes=opt_dtypes,
610
+ **kwargs,
611
+ )
395
612
  if isinstance(self._data, list | Generator):
396
613
  for df in self._data:
397
614
  yield (
@@ -410,6 +627,12 @@ class BaseFileReader(BaseFileIO, gc=False):
410
627
  self,
411
628
  lazy: bool = False,
412
629
  metadata: bool = False,
630
+ reload: bool = False,
631
+ include_file_path: bool = False,
632
+ concat: bool | None = None,
633
+ use_threads: bool | None = None,
634
+ verbose: bool | None = None,
635
+ opt_dtypes: bool | None = None,
413
636
  **kwargs,
414
637
  ) -> (
415
638
  pl.DataFrame
@@ -426,6 +649,14 @@ class BaseFileReader(BaseFileIO, gc=False):
426
649
  Args:
427
650
  lazy (bool, optional): Return a LazyFrame if True, else a DataFrame.
428
651
  metadata (bool, optional): Include metadata in the output. Default is False.
652
+ reload (bool, optional): Reload data if True. Default is False.
653
+ batch_size (int, optional): Batch size for iteration. Default is 1.
654
+ include_file_path (bool, optional): Include file path in the output. Default is False.
655
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
656
+ use_threads (bool, optional): Use threads for reading data. Default is True.
657
+ verbose (bool, optional): Verbose output. Default is None.
658
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
659
+ kwargs: Additional keyword arguments.
429
660
 
430
661
  Returns:
431
662
  pl.DataFrame | pl.LazyFrame | list[pl.DataFrame] | list[pl.LazyFrame] | tuple[pl.DataFrame | pl.LazyFrame
@@ -433,32 +664,115 @@ class BaseFileReader(BaseFileIO, gc=False):
433
664
  metadata.
434
665
  """
435
666
  if lazy:
436
- return self._to_polars_lazyframe(**kwargs)
437
- return self._to_polars_dataframe(**kwargs)
667
+ return self._to_polars_lazyframe(
668
+ metadata=metadata,
669
+ reload=reload,
670
+ batch_size=None,
671
+ include_file_path=include_file_path,
672
+ concat=concat,
673
+ use_threads=use_threads,
674
+ verbose=verbose,
675
+ opt_dtypes=opt_dtypes,
676
+ **kwargs,
677
+ )
678
+ return self._to_polars_dataframe(
679
+ metadata=metadata,
680
+ reload=reload,
681
+ batch_size=None,
682
+ include_file_path=include_file_path,
683
+ concat=concat,
684
+ use_threads=use_threads,
685
+ verbose=verbose,
686
+ opt_dtypes=opt_dtypes,
687
+ **kwargs,
688
+ )
438
689
 
439
690
  def iter_polars(
440
691
  self,
441
692
  lazy: bool = False,
693
+ reload: bool = False,
694
+ batch_size: int | None = None,
695
+ include_file_path: bool = False,
696
+ concat: bool | None = None,
697
+ use_threads: bool | None = None,
698
+ verbose: bool | None = None,
699
+ opt_dtypes: bool | None = None,
442
700
  **kwargs,
443
701
  ) -> Generator[pl.DataFrame | pl.LazyFrame, None, None]:
702
+ """Iterate over Polars DataFrames or LazyFrames.
703
+
704
+ Args:
705
+ lazy (bool, optional): Return a LazyFrame if True, else a DataFrame. Default is False.
706
+ reload (bool, optional): Reload data if True. Default is False.
707
+ batch_size (int, optional): Batch size for iteration. Default is 1.
708
+ include_file_path (bool, optional): Include file path in the output. Default is False.
709
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
710
+ use_threads (bool, optional): Use threads for reading data. Default is True.
711
+ verbose (bool, optional): Verbose output. Default is None.
712
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
713
+ kwargs: Additional keyword arguments.
714
+
715
+ Returns:
716
+ Generator[pl.DataFrame | pl.LazyFrame, None, None]: Generator of Polars DataFrames or LazyFrames.
717
+ """
444
718
  if lazy:
445
- yield from self._iter_polars_lazyframe(**kwargs)
446
- yield from self._iter_polars_dataframe(**kwargs)
719
+ yield from self._iter_polars_lazyframe(
720
+ reload=reload,
721
+ batch_size=batch_size,
722
+ include_file_path=include_file_path,
723
+ concat=concat,
724
+ use_threads=use_threads,
725
+ verbose=verbose,
726
+ opt_dtypes=opt_dtypes,
727
+ **kwargs,
728
+ )
729
+ yield from self._iter_polars_dataframe(
730
+ reload=reload,
731
+ batch_size=batch_size,
732
+ include_file_path=include_file_path,
733
+ concat=concat,
734
+ use_threads=use_threads,
735
+ verbose=verbose,
736
+ opt_dtypes=opt_dtypes,
737
+ **kwargs,
738
+ )
447
739
 
448
740
  def to_pyarrow_table(
449
- self, metadata: bool = False, reload: bool = False, **kwargs
741
+ self,
742
+ metadata: bool = False,
743
+ reload: bool = False,
744
+ include_file_path: bool = False,
745
+ use_threads: bool | None = None,
746
+ verbose: bool | None = None,
747
+ opt_dtypes: bool | None = None,
748
+ **kwargs,
450
749
  ) -> pa.Table | list[pa.Table] | tuple[pa.Table | list[pa.Table], dict[str, Any]]:
451
750
  """Convert data to PyArrow Table(s).
452
751
 
453
752
  Args:
454
753
  metadata (bool, optional): Include metadata in the output. Default is False.
455
754
  reload (bool, optional): Reload data if True. Default is False.
755
+ include_file_path (bool, optional): Include file path in the output. Default is False.
756
+ use_threads (bool, optional): Use threads for reading data. Default is True.
757
+ verbose (bool, optional): Verbose output. Default is None.
758
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
759
+ kwargs: Additional keyword arguments.
456
760
 
457
761
  Returns:
458
762
  pa.Table | list[pa.Table] | tuple[pa.Table | list[pa.Table], dict[str, Any]]: PyArrow Table or list of
459
763
  Tables and optional metadata.
460
764
  """
461
- self._load(reload=reload, **kwargs)
765
+ self._load(
766
+ reload=reload,
767
+ metadata=metadata,
768
+ batch_size=None,
769
+ include_file_path=include_file_path,
770
+ concat=None,
771
+ use_threads=use_threads,
772
+ verbose=verbose,
773
+ opt_dtypes=opt_dtypes,
774
+ **kwargs,
775
+ )
462
776
  if isinstance(self._data, list):
463
777
  df = [
464
778
  df.to_arrow(**kwargs) if isinstance(df, pl.DataFrame) else df
@@ -472,22 +786,48 @@ class BaseFileReader(BaseFileIO, gc=False):
472
786
  else self._data
473
787
  )
474
788
  if metadata:
475
- metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
476
- return df, metadata
789
+ # metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
790
+ return df, self._metadata
477
791
  return df
478
792
 
479
793
  def iter_pyarrow_table(
480
- self, reload: bool = False, **kwargs
794
+ self,
795
+ reload: bool = False,
796
+ batch_size: int | None = None,
797
+ include_file_path: bool = False,
798
+ concat: bool | None = None,
799
+ use_threads: bool | None = None,
800
+ verbose: bool | None = None,
801
+ opt_dtypes: bool | None = None,
802
+ **kwargs,
481
803
  ) -> Generator[pa.Table, None, None]:
482
804
  """Iterate over PyArrow Tables.
483
805
 
806
+ Args:
807
+ reload (bool, optional): Reload data if True. Default is False.
808
+ include_file_path (bool, optional): Include file path in the output. Default is False.
809
+ concat (bool, optional): Concatenate multiple files into a single Table. Default is True.
810
+ batch_size (int, optional): Batch size for iteration. Default is 1.
811
+ use_threads (bool, optional): Use threads for reading data. Default is True.
812
+ verbose (bool, optional): Verbose output. Default is None.
813
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
814
+ kwargs: Additional keyword arguments.
815
+
484
816
  Returns:
485
817
  Generator[pa.Table, None, None]: Generator of PyArrow Tables.
486
818
  """
487
- if self.batch_size is None and "batch_size" not in kwargs:
488
- self.batch_size = 1
489
-
490
- self._load(reload=reload, **kwargs)
819
+ batch_size = batch_size or self.batch_size or 1
820
+
821
+ self._load(
822
+ reload=reload,
823
+ batch_size=batch_size,
824
+ include_file_path=include_file_path,
825
+ concat=concat,
826
+ use_threads=use_threads,
827
+ verbose=verbose,
828
+ opt_dtypes=opt_dtypes,
829
+ **kwargs,
830
+ )
491
831
  if isinstance(self._data, list | Generator):
492
832
  for df in self._data:
493
833
  yield df.to_arrow(**kwargs) if isinstance(df, pl.DataFrame) else df
@@ -503,6 +843,10 @@ class BaseFileReader(BaseFileIO, gc=False):
503
843
  conn: duckdb.DuckDBPyConnection | None = None,
504
844
  metadata: bool = False,
505
845
  reload: bool = False,
846
+ include_file_path: bool = False,
847
+ use_threads: bool | None = None,
848
+ verbose: bool | None = None,
849
+ opt_dtypes: bool | None = None,
506
850
  **kwargs,
507
851
  ) -> duckdb.DuckDBPyRelation | tuple[duckdb.DuckDBPyRelation, dict[str, Any]]:
508
852
  """Convert data to DuckDB relation.
@@ -511,6 +855,11 @@ class BaseFileReader(BaseFileIO, gc=False):
511
855
  conn (duckdb.DuckDBPyConnection, optional): DuckDB connection instance.
512
856
  metadata (bool, optional): Include metadata in the output. Default is False.
513
857
  reload (bool, optional): Reload data if True. Default is False.
858
+ include_file_path (bool, optional): Include file path in the output. Default is False.
859
+ use_threads (bool, optional): Use threads for reading data. Default is True.
860
+ verbose (bool, optional): Verbose output. Default is None.
861
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
862
+ kwargs: Additional keyword arguments.
514
863
 
515
864
  Returns:
516
865
  duckdb.DuckDBPyRelation | tuple[duckdb.DuckDBPyRelation, dict[str, Any]]: DuckDB relation and optional
@@ -523,10 +872,27 @@ class BaseFileReader(BaseFileIO, gc=False):
523
872
 
524
873
  if metadata:
525
874
  return self._conn.from_arrow(
526
- self.to_pyarrow_table(concat=True, reload=reload, **kwargs),
875
+ self.to_pyarrow_table(
876
+ metadata=metadata,
877
+ reload=reload,
878
+ batch_size=None,
879
+ include_file_path=include_file_path,
880
+ se_threads=use_threads,
881
+ verbose=verbose,
882
+ opt_dtypes=opt_dtypes,
883
+ **kwargs,
884
+ ),
527
885
  ), self._metadata
528
886
  return self._conn.from_arrow(
529
- self.to_pyarrow_table(concat=True, reload=reload, **kwargs)
887
+ self.to_pyarrow_table(
888
+ reload=reload,
889
+ batch_size=None,
890
+ include_file_path=include_file_path,
891
+ use_threads=use_threads,
892
+ verbose=verbose,
893
+ opt_dtypes=opt_dtypes,
894
+ **kwargs,
895
+ )
530
896
  )
531
897
 
532
898
  def register_in_duckdb(
@@ -535,6 +901,10 @@ class BaseFileReader(BaseFileIO, gc=False):
535
901
  name: str | None = None,
536
902
  metadata: bool = False,
537
903
  reload: bool = False,
904
+ include_file_path: bool = False,
905
+ use_threads: bool | None = None,
906
+ verbose: bool | None = None,
907
+ opt_dtypes: bool | None = None,
538
908
  **kwargs,
539
909
  ) -> duckdb.DuckDBPyConnection | tuple[duckdb.DuckDBPyConnection, dict[str, Any]]:
540
910
  """Register data in DuckDB.
@@ -544,6 +914,11 @@ class BaseFileReader(BaseFileIO, gc=False):
544
914
  name (str, optional): Name for the DuckDB table.
545
915
  metadata (bool, optional): Include metadata in the output. Default is False.
546
916
  reload (bool, optional): Reload data if True. Default is False.
917
+ include_file_path (bool, optional): Include file path in the output. Default is False.
918
+ use_threads (bool, optional): Use threads for reading data. Default is True.
919
+ verbose (bool, optional): Verbose output. Default is None.
920
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
921
+ kwargs: Additional keyword arguments.
547
922
 
548
923
  Returns:
549
924
  duckdb.DuckDBPyConnection | tuple[duckdb.DuckDBPyConnection, dict[str, Any]]: DuckDB connection instance
@@ -558,7 +933,16 @@ class BaseFileReader(BaseFileIO, gc=False):
558
933
  self._conn = conn
559
934
 
560
935
  self._conn.register(
561
- name, self.to_pyarrow_table(concat=True, reload=reload, **kwargs)
936
+ name,
937
+ self.to_pyarrow_table(
938
+ metadata=metadata,
939
+ reload=reload,
940
+ include_file_path=include_file_path,
941
+ use_threads=use_threads,
942
+ verbose=verbose,
943
+ opt_dtypes=opt_dtypes,
944
+ **kwargs,
945
+ ),
562
946
  )
563
947
  if metadata:
564
948
  return self._conn, self._metadata
@@ -571,6 +955,10 @@ class BaseFileReader(BaseFileIO, gc=False):
571
955
  name: str | None = None,
572
956
  metadata: bool = False,
573
957
  reload: bool = False,
958
+ include_file_path: bool = False,
959
+ use_threads: bool | None = None,
960
+ verbose: bool | None = None,
961
+ opt_dtypes: bool | None = None,
574
962
  **kwargs,
575
963
  ) -> (
576
964
  duckdb.DuckDBPyRelation
@@ -586,6 +974,10 @@ class BaseFileReader(BaseFileIO, gc=False):
586
974
  name (str, optional): Name for the DuckDB table.
587
975
  metadata (bool, optional): Include metadata in the output. Default is False.
588
976
  reload (bool, optional): Reload data if True. Default is False.
977
+ include_file_path (bool, optional): Include file path in the output. Default is False.
978
+ use_threads (bool, optional): Use threads for reading data. Default is True.
979
+ verbose (bool, optional): Verbose output. Default is None.
980
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
589
981
  **kwargs: Additional keyword arguments.
590
982
 
591
983
  Returns:
@@ -596,10 +988,25 @@ class BaseFileReader(BaseFileIO, gc=False):
596
988
  """
597
989
  if as_relation:
598
990
  return self.to_duckdb_relation(
599
- conn=conn, metadata=metadata, reload=reload, **kwargs
991
+ conn=conn,
992
+ metadata=metadata,
993
+ reload=reload,
994
+ include_file_path=include_file_path,
995
+ use_threads=use_threads,
996
+ verbose=verbose,
997
+ opt_dtypes=opt_dtypes,
998
+ **kwargs,
600
999
  )
601
1000
  return self.register_in_duckdb(
602
- conn=conn, name=name, metadata=metadata, reload=reload, **kwargs
1001
+ conn=conn,
1002
+ name=name,
1003
+ metadata=metadata,
1004
+ reload=reload,
1005
+ include_file_path=include_file_path,
1006
+ use_threads=use_threads,
1007
+ verbose=verbose,
1008
+ opt_dtypes=opt_dtypes,
1009
+ **kwargs,
603
1010
  )
604
1011
 
605
1012
  def register_in_datafusion(
@@ -608,6 +1015,10 @@ class BaseFileReader(BaseFileIO, gc=False):
608
1015
  name: str | None = None,
609
1016
  metadata: bool = False,
610
1017
  reload: bool = False,
1018
+ include_file_path: bool = False,
1019
+ use_threads: bool | None = None,
1020
+ verbose: bool | None = None,
1021
+ opt_dtypes: bool | None = None,
611
1022
  **kwargs,
612
1023
  ) -> datafusion.SessionContext | tuple[datafusion.SessionContext, dict[str, Any]]:
613
1024
  """Register data in DataFusion.
@@ -632,11 +1043,18 @@ class BaseFileReader(BaseFileIO, gc=False):
632
1043
 
633
1044
  self._ctx.register_record_batches(
634
1045
  name,
635
- [self.to_pyarrow_table(concat=True, reload=reload, **kwargs).to_batches()],
1046
+ [
1047
+ self.to_pyarrow_table(
1048
+ reload=reload,
1049
+ include_file_path=include_file_path,
1050
+ use_threads=use_threads,
1051
+ opt_dtypes=opt_dtypes**kwargs,
1052
+ ).to_batches()
1053
+ ],
636
1054
  )
637
1055
  if metadata:
638
1056
  return self._ctx, self._metadata
639
- return ctx
1057
+ return self._ctx
640
1058
 
641
1059
  def filter(
642
1060
  self, filter_expr: str | pl.Expr | pa.compute.Expression
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.6
3
+ Version: 0.11.6.2
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -18,7 +18,7 @@ flowerpower/cli/pipeline.py,sha256=60P6u_QOSgp0jJXEMxazEEo5Sh7-SWFo-Kkuaz21YuI,3
18
18
  flowerpower/cli/utils.py,sha256=nDSSj_1nlYlMmj252kRZeohhFqHv9yvdgDEduQCyWOc,5152
19
19
  flowerpower/fs/__init__.py,sha256=uZaPXErEfQqQRbKRIjkB9yiygd45X5_psYn9-VVrBTQ,910
20
20
  flowerpower/fs/base.py,sha256=TqgqBsaFj13O1NpAr8kHuGJ9CTlaSWViMB8Ai_iuCjs,22761
21
- flowerpower/fs/ext.py,sha256=zTZO-j__O6Om7gbOpXJL7uDo2Cki6hOdlx_GDJ-Xujw,67625
21
+ flowerpower/fs/ext.py,sha256=2NmhSbCIL0qnONMRNPHcPUuR39bGjWpxJE4hNHU5Rvw,69044
22
22
  flowerpower/fs/storage_options.py,sha256=msq5TpxAU8tcE_Bxjw6SyxaFa75UjdYnR4-O9U2wmbk,48034
23
23
  flowerpower/job_queue/__init__.py,sha256=a25hIqv2xoFKb4JZlyUukS0ppZ9-2sJKH3XAvbk3rlk,10788
24
24
  flowerpower/job_queue/base.py,sha256=YwLunDQSyqkSU_vJ69C5SSybJeJP1bAiZ3teUtOchxA,13640
@@ -44,7 +44,7 @@ flowerpower/pipeline/manager.py,sha256=KVpOclUEUAETUNJamJJGuKt3oxCaLitQgxWxkE1q0
44
44
  flowerpower/pipeline/registry.py,sha256=6ngmHyKyQsxvIO4qRYxljedY0BE1wE3lpfksEGOzjNs,18963
45
45
  flowerpower/pipeline/runner.py,sha256=dsSVYixFXqlxFk8EJfT4wV_7IwgkXq0ErwH_yf_NGS8,25654
46
46
  flowerpower/pipeline/visualizer.py,sha256=amjMrl5NetErE198HzZBPWVZBi_t5jj9ydxWpuNLoTI,5013
47
- flowerpower/plugins/io/base.py,sha256=tyFbvx8Ij8gTKP8p8GfwpP5dpIWNncGJfcuK_hPCPN0,79383
47
+ flowerpower/plugins/io/base.py,sha256=d3U5L--SpmowOpXyLTpnvbpaVCeyzoxyiqBbSk2h_K4,96685
48
48
  flowerpower/plugins/io/metadata.py,sha256=PCrepLilXRWKDsB5BKFF_-OFs712s1zBeitW-84lDLQ,7005
49
49
  flowerpower/plugins/io/helpers/datetime.py,sha256=1WBUg2ywcsodJQwoF6JiIGc9yhVobvE2IErWp4i95m4,10649
50
50
  flowerpower/plugins/io/helpers/polars.py,sha256=346DBHG-HvoGZWF-DWxgz7H3KlZu8bFylKIqMOnVJSk,27031
@@ -94,9 +94,9 @@ flowerpower/utils/monkey.py,sha256=VPl3yimoWhwD9kI05BFsjNvtyQiDyLfY4Q85Bb6Ma0w,2
94
94
  flowerpower/utils/open_telemetry.py,sha256=fQWJWbIQFtKIxMBjAWeF12NGnqT0isO3A3j-DSOv_vE,949
95
95
  flowerpower/utils/scheduler.py,sha256=2zJ_xmLXpvXUQNF1XS2Gqm3Ogo907ctZ50GtvQB_rhE,9354
96
96
  flowerpower/utils/templates.py,sha256=ouyEeSDqa9PjW8c32fGpcINlpC0WToawRFZkMPtwsLE,1591
97
- flowerpower-0.11.6.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
98
- flowerpower-0.11.6.dist-info/METADATA,sha256=5Lg0RYLDvqAzJw05z2Qp-OmldA5Cy_FA7XTXlOa2Oos,21610
99
- flowerpower-0.11.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
- flowerpower-0.11.6.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
101
- flowerpower-0.11.6.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
102
- flowerpower-0.11.6.dist-info/RECORD,,
97
+ flowerpower-0.11.6.2.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
98
+ flowerpower-0.11.6.2.dist-info/METADATA,sha256=ftcXBLIRI60sqhug1BnV6KRZY66H0_a65hlamW3COz0,21612
99
+ flowerpower-0.11.6.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
+ flowerpower-0.11.6.2.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
101
+ flowerpower-0.11.6.2.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
102
+ flowerpower-0.11.6.2.dist-info/RECORD,,