FlowerPower 0.11.6__py3-none-any.whl → 0.11.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flowerpower/fs/ext.py CHANGED
@@ -193,6 +193,7 @@ def _read_json(
193
193
  as_dataframe: (bool, optional) If True, return a DataFrame. Defaults to True.
194
194
  concat: (bool, optional) If True, concatenate the DataFrames. Defaults to True.
195
195
  verbose: (bool, optional) If True, print verbose output. Defaults to False.
196
+ opt_dtypes: (bool, optional) If True, optimize DataFrame dtypes. Defaults to False.
196
197
  **kwargs: Additional keyword arguments.
197
198
 
198
199
  Returns:
@@ -247,8 +248,8 @@ def _read_json(
247
248
  data = [opt_dtype_pl(df, strict=False) for df in data]
248
249
  if concat:
249
250
  result = pl.concat(data, how="diagonal_relaxed")
250
- if opt_dtypes:
251
- result = opt_dtype_pl(result, strict=False)
251
+ # if opt_dtypes:
252
+ # result = opt_dtype_pl(result, strict=False)
252
253
  return result
253
254
  return data
254
255
 
@@ -280,6 +281,7 @@ def _read_json_batches(
280
281
  concat: Combine files within each batch
281
282
  use_threads: Enable parallel file reading within batches
282
283
  verbose: Print progress information
284
+ opt_dtypes: Optimize DataFrame dtypes
283
285
  **kwargs: Additional arguments for DataFrame conversion
284
286
 
285
287
  Yields:
@@ -354,10 +356,16 @@ def _read_json_batches(
354
356
  ][0]
355
357
  for _data in batch_data
356
358
  ]
357
-
359
+ if opt_dtypes:
360
+ batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
358
361
  if concat and len(batch_dfs) > 1:
359
- yield pl.concat(batch_dfs, how="diagonal_relaxed")
362
+ batch_df = pl.concat(batch_dfs, how="diagonal_relaxed")
363
+ # if opt_dtypes:
364
+ # batch_df = opt_dtype_pl(batch_df, strict=False)
365
+ yield batch_df
360
366
  else:
367
+ # if opt_dtypes:
368
+ # batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
361
369
  yield batch_dfs
362
370
  else:
363
371
  yield batch_data
@@ -403,6 +411,7 @@ def read_json(
403
411
  concat: Combine multiple files/batches into single result
404
412
  use_threads: Enable parallel file reading
405
413
  verbose: Print progress information
414
+ opt_dtypes: Optimize DataFrame dtypes for performance
406
415
  **kwargs: Additional arguments passed to DataFrame conversion
407
416
 
408
417
  Returns:
@@ -486,6 +495,7 @@ def _read_csv_file(
486
495
  path: Path to CSV file
487
496
  self: Filesystem instance to use for reading
488
497
  include_file_path: Add source filepath as a column
498
+ opt_dtypes: Optimize DataFrame dtypes
489
499
  **kwargs: Additional arguments passed to pl.read_csv()
490
500
 
491
501
  Returns:
@@ -544,6 +554,7 @@ def _read_csv(
544
554
  use_threads: (bool, optional) If True, read files in parallel. Defaults to True.
545
555
  concat: (bool, optional) If True, concatenate the DataFrames. Defaults to True.
546
556
  verbose: (bool, optional) If True, print verbose output. Defaults to False.
557
+ opt_dtypes: (bool, optional) If True, optimize DataFrame dtypes. Defaults to False.
547
558
  **kwargs: Additional keyword arguments.
548
559
 
549
560
  Returns:
@@ -587,8 +598,8 @@ def _read_csv(
587
598
  )
588
599
  if concat:
589
600
  result = pl.concat(dfs, how="diagonal_relaxed")
590
- if opt_dtypes:
591
- result = opt_dtype_pl(result, strict=False)
601
+ # if opt_dtypes:
602
+ # result = opt_dtype_pl(result, strict=False)
592
603
  return result
593
604
  return dfs
594
605
 
@@ -616,6 +627,7 @@ def _read_csv_batches(
616
627
  concat: Combine files within each batch
617
628
  use_threads: Enable parallel file reading within batches
618
629
  verbose: Print progress information
630
+ opt_dtypes: Optimize DataFrame dtypes
619
631
  **kwargs: Additional arguments passed to pl.read_csv()
620
632
 
621
633
  Yields:
@@ -667,23 +679,28 @@ def _read_csv_batches(
667
679
  n_jobs=-1,
668
680
  backend="threading",
669
681
  verbose=verbose,
682
+ opt_dtypes=opt_dtypes,
670
683
  **kwargs,
671
684
  )
672
685
  else:
673
686
  batch_dfs = [
674
687
  _read_csv_file(
675
- p, self=self, include_file_path=include_file_path, **kwargs
688
+ p,
689
+ self=self,
690
+ include_file_path=include_file_path,
691
+ opt_dtypes=opt_dtypes,
692
+ **kwargs,
676
693
  )
677
694
  for p in batch_paths
678
695
  ]
679
696
 
680
- if opt_dtypes:
681
- batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
697
+ # if opt_dtypes:
698
+ # batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
682
699
 
683
700
  if concat and len(batch_dfs) > 1:
684
701
  result = pl.concat(batch_dfs, how="diagonal_relaxed")
685
- if opt_dtypes:
686
- result = opt_dtype_pl(result, strict=False)
702
+ # if opt_dtypes:
703
+ # result = opt_dtype_pl(result, strict=False)
687
704
  yield result
688
705
  else:
689
706
  yield batch_dfs
@@ -766,6 +783,7 @@ def read_csv(
766
783
  concat=concat,
767
784
  use_threads=use_threads,
768
785
  verbose=verbose,
786
+ opt_dtypes=opt_dtypes,
769
787
  **kwargs,
770
788
  )
771
789
  return _read_csv(
@@ -775,6 +793,7 @@ def read_csv(
775
793
  concat=concat,
776
794
  use_threads=use_threads,
777
795
  verbose=verbose,
796
+ opt_dtypes=opt_dtypes,
778
797
  **kwargs,
779
798
  )
780
799
 
@@ -858,9 +877,7 @@ def _read_parquet(
858
877
  if not include_file_path and concat:
859
878
  if isinstance(path, str):
860
879
  path = path.replace("**", "").replace("*.parquet", "")
861
- table = pq.read_table(path, filesystem=self, **kwargs)
862
- if opt_dtypes:
863
- table = opt_dtype_pa(table, strict=False)
880
+ table = _read_parquet_file(path, self=self, opt_dtypes=opt_dtypes, **kwargs)
864
881
  return table
865
882
  else:
866
883
  if isinstance(path, str):
@@ -907,12 +924,12 @@ def _read_parquet(
907
924
  unified_schema = unify_schemas_pa(schemas)
908
925
  tables = [cast_schema(t, unified_schema) for t in tables]
909
926
  result = pa.concat_tables(tables, promote_options="permissive")
910
- if opt_dtypes:
911
- result = opt_dtype_pa(result, strict=False)
927
+ # if opt_dtypes:
928
+ # result = opt_dtype_pa(result, strict=False)
912
929
  return result
913
930
  elif isinstance(tables, pa.Table):
914
- if opt_dtypes:
915
- tables = opt_dtype_pa(tables, strict=False)
931
+ # if opt_dtypes:
932
+ # tables = opt_dtype_pa(tables, strict=False)
916
933
  return tables
917
934
  else:
918
935
  return pa.concat_tables(tables, promote_options="permissive")
@@ -981,9 +998,9 @@ def _read_parquet_batches(
981
998
  if not include_file_path and concat and batch_size is None:
982
999
  if isinstance(path, str):
983
1000
  path = path.replace("**", "").replace("*.parquet", "")
984
- table = pq.read_table(path, filesystem=self, **kwargs)
985
- if opt_dtypes:
986
- table = opt_dtype_pa(table, strict=False)
1001
+ table = _read_parquet_file(
1002
+ path=path, self=self, opt_dtypes=opt_dtypes, **kwargs
1003
+ )
987
1004
  yield table
988
1005
  return
989
1006
 
@@ -994,7 +1011,11 @@ def _read_parquet_batches(
994
1011
 
995
1012
  if not isinstance(path, list):
996
1013
  yield _read_parquet_file(
997
- path=path, self=self, include_file_path=include_file_path, **kwargs
1014
+ path=path,
1015
+ self=self,
1016
+ include_file_path=include_file_path,
1017
+ opt_dtypes=opt_dtypes,
1018
+ **kwargs,
998
1019
  )
999
1020
  return
1000
1021
 
@@ -1032,12 +1053,12 @@ def _read_parquet_batches(
1032
1053
  unified_schema = unify_schemas_pa(schemas)
1033
1054
  batch_tables = [cast_schema(t, unified_schema) for t in batch_tables]
1034
1055
  result = pa.concat_tables(batch_tables, promote_options="permissive")
1035
- if opt_dtypes:
1036
- result = opt_dtype_pa(result, strict=False)
1056
+ # if opt_dtypes:
1057
+ # result = opt_dtype_pa(result, strict=False)
1037
1058
  yield result
1038
1059
  else:
1039
- if opt_dtypes and isinstance(batch_tables, list):
1040
- batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
1060
+ # if opt_dtypes and isinstance(batch_tables, list):
1061
+ # batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
1041
1062
  yield batch_tables
1042
1063
 
1043
1064
 
@@ -1077,6 +1098,7 @@ def read_parquet(
1077
1098
  concat: Combine multiple files/batches into single Table
1078
1099
  use_threads: Enable parallel file reading
1079
1100
  verbose: Print progress information
1101
+ opt_dtypes: Optimize Table dtypes for performance
1080
1102
  **kwargs: Additional arguments passed to pq.read_table()
1081
1103
 
1082
1104
  Returns:
@@ -1119,6 +1141,7 @@ def read_parquet(
1119
1141
  concat=concat,
1120
1142
  use_threads=use_threads,
1121
1143
  verbose=verbose,
1144
+ opt_dtypes=opt_dtypes,
1122
1145
  **kwargs,
1123
1146
  )
1124
1147
  return _read_parquet(
@@ -1128,6 +1151,7 @@ def read_parquet(
1128
1151
  use_threads=use_threads,
1129
1152
  concat=concat,
1130
1153
  verbose=verbose,
1154
+ opt_dtypes=opt_dtypes,
1131
1155
  **kwargs,
1132
1156
  )
1133
1157
 
@@ -1142,6 +1166,7 @@ def read_files(
1142
1166
  jsonlines: bool = False,
1143
1167
  use_threads: bool = True,
1144
1168
  verbose: bool = False,
1169
+ opt_dtypes: bool = False,
1145
1170
  **kwargs: Any,
1146
1171
  ) -> (
1147
1172
  pl.DataFrame
@@ -1175,6 +1200,7 @@ def read_files(
1175
1200
  jsonlines: For JSON format, whether to read as JSON Lines
1176
1201
  use_threads: Enable parallel file reading
1177
1202
  verbose: Print progress information
1203
+ opt_dtypes: Optimize DataFrame/Arrow Table dtypes for performance
1178
1204
  **kwargs: Additional format-specific arguments
1179
1205
 
1180
1206
  Returns:
@@ -1224,6 +1250,7 @@ def read_files(
1224
1250
  concat=concat,
1225
1251
  use_threads=use_threads,
1226
1252
  verbose=verbose,
1253
+ opt_dtypes=opt_dtypes,
1227
1254
  **kwargs,
1228
1255
  )
1229
1256
  return read_json(
@@ -1234,6 +1261,7 @@ def read_files(
1234
1261
  concat=concat,
1235
1262
  use_threads=use_threads,
1236
1263
  verbose=verbose,
1264
+ opt_dtypes=opt_dtypes,
1237
1265
  **kwargs,
1238
1266
  )
1239
1267
  elif format == "csv":
@@ -1246,6 +1274,7 @@ def read_files(
1246
1274
  concat=concat,
1247
1275
  use_threads=use_threads,
1248
1276
  verbose=verbose,
1277
+ opt_dtypes=opt_dtypes,
1249
1278
  **kwargs,
1250
1279
  )
1251
1280
  return read_csv(
@@ -1255,6 +1284,7 @@ def read_files(
1255
1284
  use_threads=use_threads,
1256
1285
  concat=concat,
1257
1286
  verbose=verbose,
1287
+ opt_dtypes=opt_dtypes,
1258
1288
  **kwargs,
1259
1289
  )
1260
1290
  elif format == "parquet":
@@ -1267,6 +1297,7 @@ def read_files(
1267
1297
  concat=concat,
1268
1298
  use_threads=use_threads,
1269
1299
  verbose=verbose,
1300
+ opt_dtypes=opt_dtypes,
1270
1301
  **kwargs,
1271
1302
  )
1272
1303
  return read_parquet(
@@ -1276,6 +1307,7 @@ def read_files(
1276
1307
  use_threads=use_threads,
1277
1308
  concat=concat,
1278
1309
  verbose=verbose,
1310
+ opt_dtypes=opt_dtypes,
1279
1311
  **kwargs,
1280
1312
  )
1281
1313
 
@@ -185,74 +185,134 @@ class BaseFileReader(BaseFileIO, gc=False):
185
185
  include_file_path: bool = field(default=False)
186
186
  concat: bool = field(default=True)
187
187
  batch_size: int | None = field(default=None)
188
+ opt_dtypes: bool = field(default=True)
189
+ use_threads: bool = field(default=True)
188
190
  conn: duckdb.DuckDBPyConnection | None = field(default=None)
189
191
  ctx: datafusion.SessionContext | None = field(default=None)
190
192
  jsonlines: bool | None = field(default=None)
191
193
  partitioning: str | list[str] | pds.Partitioning | None = field(default=None)
192
194
  _data: Any | None = field(default=None)
193
195
 
194
- def _load(self, reload: bool = False, **kwargs):
195
- if "include_file_path" in kwargs:
196
- if self.include_file_path != kwargs["include_file_path"]:
196
+ def _load(
197
+ self,
198
+ metadata: bool = False,
199
+ reload: bool = False,
200
+ batch_size: int | None = None,
201
+ include_file_path: bool = False,
202
+ concat: bool | None = None,
203
+ use_threads: bool | None = None,
204
+ verbose: bool | None = None,
205
+ opt_dtypes: bool | None = None,
206
+ **kwargs,
207
+ ):
208
+ if batch_size is not None:
209
+ if self.batch_size != batch_size:
197
210
  reload = True
198
- self.include_file_path = kwargs.pop("include_file_path")
199
- else:
200
- kwargs.pop("include_file_path")
211
+ self.batch_size = batch_size
201
212
 
202
- if "concat" in kwargs:
203
- if self.concat != kwargs["concat"]:
213
+ if include_file_path is not None:
214
+ if self.include_file_path != include_file_path:
204
215
  reload = True
205
- self.concat = kwargs.pop("concat")
206
- else:
207
- kwargs.pop("concat")
216
+ self.include_file_path = include_file_path
208
217
 
209
- if "batch_size" in kwargs:
210
- if self.batch_size != kwargs["batch_size"]:
218
+ if concat is not None:
219
+ if self.concat != concat:
211
220
  reload = True
212
- self.batch_size = kwargs.pop("batch_size")
213
- else:
214
- kwargs.pop("batch_size")
221
+ self.concat = concat
222
+
223
+ if use_threads is not None:
224
+ if self.use_threads != use_threads:
225
+ reload = True
226
+ self.use_threads = use_threads
227
+
228
+ if verbose is not None:
229
+ if self.fs.verbose != verbose:
230
+ reload = True
231
+ self.fs.verbose = verbose
232
+
233
+ if opt_dtypes is not None:
234
+ if self.opt_dtypes != opt_dtypes:
235
+ reload = True
236
+ self.opt_dtypes = opt_dtypes
215
237
 
216
238
  if "partitioning" in kwargs:
217
239
  if self.partitioning != kwargs["partitioning"]:
218
240
  reload = True
219
241
  self.partitioning = kwargs.pop("partitioning")
220
- else:
221
- kwargs.pop("partitioning")
222
242
 
223
243
  if not hasattr(self, "_data") or self._data is None or reload:
224
244
  self._data = self.fs.read_files(
225
245
  path=self._glob_path,
226
246
  format=self.format,
227
- include_file_path=True,
247
+ include_file_path=True if metadata or self.include_file_path else False,
228
248
  concat=self.concat,
229
249
  jsonlines=self.jsonlines or None,
230
250
  batch_size=self.batch_size,
231
251
  partitioning=self.partitioning,
252
+ opt_dtypes=self.opt_dtypes,
253
+ verbose=self.verbose,
254
+ use_threads=self.use_threads,
232
255
  **kwargs,
233
256
  )
234
- if not isinstance(self._data, Generator):
235
- self._metadata = get_dataframe_metadata(
236
- df=self._data,
237
- path=self.path,
238
- format=self.format,
239
- # num_files=pl.from_arrow(self._data.select(["file_path"])).select(
240
- # pl.n_unique("file_path")
241
- # )[0, 0],
242
- )
243
- if not self.include_file_path:
244
- if isinstance(self._data, pa.Table):
257
+ if metadata:
258
+ if isinstance(self._data, tuple | list):
259
+ self._metadata = [
260
+ get_dataframe_metadata(
261
+ df=df,
262
+ path=self.path,
263
+ format=self.format,
264
+ num_files=pl.from_arrow(df.select(["file_path"])).select(
265
+ pl.n_unique("file_path")
266
+ )[0, 0]
267
+ if isinstance(df, pa.Table)
268
+ else df.select(pl.n_unique("file_path"))[0, 0],
269
+ )
270
+ for df in self._data
271
+ ]
272
+ if not self.include_file_path:
273
+ self._data = [df.drop("file_path") for df in self._data]
274
+
275
+ elif isinstance(self._data, pa.Table):
276
+ self._metadata = get_dataframe_metadata(
277
+ df=self._data,
278
+ path=self.path,
279
+ format=self.format,
280
+ num_files=pl.from_arrow(
281
+ self._data.select(pl.n_unique("file_path"))
282
+ )[0, 0],
283
+ )
284
+ if not self.include_file_path:
285
+ self._data = self._data.drop("file_path")
286
+
287
+ elif isinstance(self._data, pl.DataFrame | pl.LazyFrame):
288
+ self._metadata = get_dataframe_metadata(
289
+ df=self._data,
290
+ path=self.path,
291
+ format=self.format,
292
+ num_files=self._data.select(pl.n_unique("file_path"))[0, 0]
293
+ if isinstance(self._data, pl.DataFrame)
294
+ else self._data.select(pl.n_unique("file_path")).collect()[
295
+ 0, 0
296
+ ],
297
+ )
298
+
299
+ if not self.include_file_path:
245
300
  self._data = self._data.drop("file_path")
246
- elif isinstance(self._data, list | tuple):
247
- self._data = [
248
- df.drop("file_path") if isinstance(df, pa.Table) else df
249
- for df in self._data
250
- ]
301
+ else:
302
+ metadata = {}
251
303
  else:
252
304
  self._metadata = {}
253
305
 
254
306
  def to_pandas(
255
- self, metadata: bool = False, reload: bool = False, **kwargs
307
+ self,
308
+ metadata: bool = False,
309
+ reload: bool = False,
310
+ include_file_path: bool = False,
311
+ concat: bool | None = None,
312
+ use_threads: bool | None = None,
313
+ verbose: bool | None = None,
314
+ opt_dtypes: bool | None = None,
315
+ **kwargs,
256
316
  ) -> (
257
317
  tuple[pd.DataFrame | list[pd.DataFrame], dict[str, Any]]
258
318
  | pd.DataFrame
@@ -263,12 +323,28 @@ class BaseFileReader(BaseFileIO, gc=False):
263
323
  Args:
264
324
  metadata (bool, optional): Include metadata in the output. Default is False.
265
325
  reload (bool, optional): Reload data if True. Default is False.
326
+ include_file_path (bool, optional): Include file path in the output. Default is False.
327
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
328
+ use_threads (bool, optional): Use threads for reading data. Default is True.
329
+ verbose (bool, optional): Verbose output. Default is None.
330
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
331
+ kwargs: Additional keyword arguments.
266
332
 
267
333
  Returns:
268
334
  tuple[pd.DataFrame | list[pd.DataFrame], dict[str, Any]] | pd.DataFrame | list[pd.DataFrame]: Pandas
269
335
  DataFrame or list of DataFrames and optional metadata.
270
336
  """
271
- self._load(reload=reload, **kwargs)
337
+ self._load(
338
+ reload=reload,
339
+ metadata=metadata,
340
+ batch_size=None,
341
+ include_file_path=include_file_path,
342
+ concat=concat,
343
+ use_threads=use_threads,
344
+ verbose=verbose,
345
+ opt_dtypes=opt_dtypes,
346
+ **kwargs,
347
+ )
272
348
  if isinstance(self._data, list):
273
349
  df = [
274
350
  df if isinstance(df, pd.DataFrame) else df.to_pandas()
@@ -282,26 +358,49 @@ class BaseFileReader(BaseFileIO, gc=False):
282
358
  else self._data.to_pandas()
283
359
  )
284
360
  if metadata:
285
- metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
286
- return df, metadata
361
+ # metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
362
+ return df, self._metadata
287
363
  return df
288
364
 
289
365
  def iter_pandas(
290
- self, reload: bool = False, **kwargs
366
+ self,
367
+ reload: bool = False,
368
+ batch_size: int | None = None,
369
+ include_file_path: bool = False,
370
+ concat: bool | None = None,
371
+ use_threads: bool | None = None,
372
+ verbose: bool | None = None,
373
+ opt_dtypes: bool | None = None,
374
+ **kwargs,
291
375
  ) -> Generator[pd.DataFrame, None, None]:
292
376
  """Iterate over Pandas DataFrames.
293
377
 
294
378
  Args:
295
379
  batch_size (int, optional): Batch size for iteration. Default is 1.
296
380
  reload (bool, optional): Reload data if True. Default is False.
381
+ include_file_path (bool, optional): Include file path in the output. Default is False.
382
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
383
+ use_threads (bool, optional): Use threads for reading data. Default is True.
384
+ verbose (bool, optional): Verbose output. Default is None.
385
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
386
+ kwargs: Additional keyword arguments.
297
387
 
298
388
  Returns:
299
389
  Generator[pd.DataFrame, None, None]: Generator of Pandas DataFrames.
300
390
  """
301
- if self.batch_size is None and "batch_size" not in kwargs:
302
- self.batch_size = 1
391
+ batch_size = batch_size or self.batch_size or 1
392
+
393
+ self._load(
394
+ reload=reload,
395
+ batch_size=batch_size,
396
+ include_file_path=include_file_path,
397
+ concat=concat,
398
+ use_threads=use_threads,
399
+ verbose=verbose,
400
+ opt_dtypes=opt_dtypes,
401
+ **kwargs,
402
+ )
303
403
 
304
- self._load(reload=reload, **kwargs)
305
404
  if isinstance(self._data, list | Generator):
306
405
  for df in self._data:
307
406
  yield df if isinstance(df, pd.DataFrame) else df.to_pandas()
@@ -313,13 +412,47 @@ class BaseFileReader(BaseFileIO, gc=False):
313
412
  )
314
413
 
315
414
  def _to_polars_dataframe(
316
- self, metadata: bool = False, reload: bool = False, **kwargs
415
+ self,
416
+ metadata: bool = False,
417
+ reload: bool = False,
418
+ include_file_path: bool = False,
419
+ concat: bool | None = None,
420
+ use_threads: bool | None = None,
421
+ verbose: bool | None = None,
422
+ opt_dtypes: bool | None = None,
423
+ **kwargs,
317
424
  ) -> (
318
425
  tuple[pl.DataFrame | list[pl.DataFrame], dict[str, Any]]
319
426
  | pl.DataFrame
320
427
  | list[pl.DataFrame]
321
428
  ):
322
- self._load(reload=reload, **kwargs)
429
+ """Convert data to Polars DataFrame(s).
430
+
431
+ Args:
432
+ metadata (bool, optional): Include metadata in the output. Default is False.
433
+ reload (bool, optional): Reload data if True. Default is False.
434
+ include_file_path (bool, optional): Include file path in the output. Default is False.
435
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
436
+ use_threads (bool, optional): Use threads for reading data. Default is True.
437
+ verbose (bool, optional): Verbose output. Default is None.
438
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
439
+ kwargs: Additional keyword arguments.
440
+
441
+ Returns:
442
+ tuple[pl.DataFrame | list[pl.DataFrame], dict[str, Any]] | pl.DataFrame | list[pl.DataFrame]: Polars
443
+ DataFrame or list of DataFrames and optional metadata.
444
+ """
445
+ self._load(
446
+ metadata=metadata,
447
+ reload=reload,
448
+ batch_size=None,
449
+ include_file_path=include_file_path,
450
+ concat=concat,
451
+ use_threads=use_threads,
452
+ verbose=verbose,
453
+ opt_dtypes=opt_dtypes,
454
+ **kwargs,
455
+ )
323
456
  if isinstance(self._data, list):
324
457
  df = [
325
458
  df if isinstance(self._data, pl.DataFrame) else pl.from_arrow(df)
@@ -333,22 +466,48 @@ class BaseFileReader(BaseFileIO, gc=False):
333
466
  else pl.from_arrow(self._data)
334
467
  )
335
468
  if metadata:
336
- metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
337
- return df, metadata
469
+ # metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
470
+ return df, self._metadata
338
471
  return df
339
472
 
340
473
  def _iter_polars_dataframe(
341
- self, reload: bool = False, **kwargs
474
+ self,
475
+ reload: bool = False,
476
+ batch_size: int | None = None,
477
+ include_file_path: bool = False,
478
+ concat: bool | None = None,
479
+ use_threads: bool | None = None,
480
+ verbose: bool | None = None,
481
+ opt_dtypes: bool | None = None,
482
+ **kwargs,
342
483
  ) -> Generator[pl.DataFrame, None, None]:
343
484
  """Iterate over Polars DataFrames.
344
485
 
486
+ Args:
487
+ batch_size (int, optional): Batch size for iteration. Default is 1.
488
+ reload (bool, optional): Reload data if True. Default is False.
489
+ include_file_path (bool, optional): Include file path in the output. Default is False.
490
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
491
+ use_threads (bool, optional): Use threads for reading data. Default is True.
492
+ verbose (bool, optional): Verbose output. Default is None.
493
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
494
+ kwargs: Additional keyword arguments.
495
+
345
496
  Returns:
346
497
  Generator[pl.DataFrame, None, None]: Generator of Polars DataFrames.
347
498
  """
348
- if self.batch_size is None and "batch_size" not in kwargs:
349
- self.batch_size = 1
350
-
351
- self._load(reload=reload, **kwargs)
499
+ batch_size = batch_size or self.batch_size or 1
500
+
501
+ self._load(
502
+ reload=reload,
503
+ batch_size=batch_size,
504
+ include_file_path=include_file_path,
505
+ concat=concat,
506
+ use_threads=use_threads,
507
+ verbose=verbose,
508
+ opt_dtypes=opt_dtypes,
509
+ **kwargs,
510
+ )
352
511
  if isinstance(self._data, list | Generator):
353
512
  for df in self._data:
354
513
  yield df if isinstance(df, pl.DataFrame) else pl.from_arrow(df)
@@ -360,38 +519,95 @@ class BaseFileReader(BaseFileIO, gc=False):
360
519
  )
361
520
 
362
521
  def _to_polars_lazyframe(
363
- self, metadata: bool = False, reload: bool = False, **kwargs
522
+ self,
523
+ metadata: bool = False,
524
+ reload: bool = False,
525
+ include_file_path: bool = False,
526
+ concat: bool | None = None,
527
+ use_threads: bool | None = None,
528
+ verbose: bool | None = None,
529
+ opt_dtypes: bool | None = None,
530
+ **kwargs,
364
531
  ) -> (
365
532
  tuple[pl.LazyFrame | list[pl.LazyFrame], dict[str, Any]]
366
533
  | pl.LazyFrame
367
534
  | list[pl.LazyFrame]
368
535
  ):
369
- self._load(reload=reload, **kwargs)
536
+ """Convert data to Polars LazyFrame(s).
537
+
538
+ Args:
539
+ metadata (bool, optional): Include metadata in the output. Default is False.
540
+ reload (bool, optional): Reload data if True. Default is False.
541
+ include_file_path (bool, optional): Include file path in the output. Default is False.
542
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
543
+ use_threads (bool, optional): Use threads for reading data. Default is True.
544
+ verbose (bool, optional): Verbose output. Default is None.
545
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
546
+ kwargs: Additional keyword arguments.
547
+
548
+ Returns:
549
+ tuple[pl.LazyFrame | list[pl.LazyFrame], dict[str, Any]] | pl.LazyFrame | list[pl.LazyFrame]: Polars
550
+ LazyFrame or list of LazyFrames and optional metadata.
551
+ """
552
+ self._load(
553
+ metadata=metadata,
554
+ reload=reload,
555
+ batch_size=None,
556
+ include_file_path=include_file_path,
557
+ concat=concat,
558
+ use_threads=use_threads,
559
+ verbose=verbose,
560
+ opt_dtypes=opt_dtypes,
561
+ **kwargs,
562
+ )
370
563
  if not self.concat:
371
564
  df = [df.lazy() for df in self._to_polars_dataframe()]
372
565
 
373
566
  else:
374
567
  df = self._to_polars_dataframe().lazy()
375
568
  if metadata:
376
- metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
377
- return df, metadata
569
+ # metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
570
+ return df, self._metadata
378
571
  return df
379
572
 
380
573
  def _iter_polars_lazyframe(
381
- self, reload: bool = False, **kwargs
574
+ self,
575
+ reload: bool = False,
576
+ batch_size: int | None = None,
577
+ include_file_path: bool = False,
578
+ concat: bool | None = None,
579
+ use_threads: bool | None = None,
580
+ verbose: bool | None = None,
581
+ opt_dtypes: bool | None = None,
582
+ **kwargs,
382
583
  ) -> Generator[pl.LazyFrame, None, None]:
383
584
  """Iterate over Polars LazyFrames.
384
585
 
385
586
  Args:
386
587
  batch_size (int, optional): Batch size for iteration. Default is 1.
387
588
  reload (bool, optional): Reload data if True. Default is False.
589
+ include_file_path (bool, optional): Include file path in the output. Default is False.
590
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
591
+ use_threads (bool, optional): Use threads for reading data. Default is True.
592
+ verbose (bool, optional): Verbose output. Default is None.
593
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
594
+ kwargs: Additional keyword arguments.
388
595
 
389
596
  Returns:
390
597
  Generator[pl.LazyFrame, None, None]: Generator of Polars LazyFrames.
391
598
  """
392
- if self.batch_size is None and "batch_size" not in kwargs:
393
- self.batch_size = 1
394
- self._load(reload=reload, **kwargs)
599
+ batch_size = batch_size or self.batch_size or 1
600
+
601
+ self._load(
602
+ reload=reload,
603
+ batch_size=batch_size,
604
+ include_file_path=include_file_path,
605
+ concat=concat,
606
+ use_threads=use_threads,
607
+ verbose=verbose,
608
+ opt_dtypes=opt_dtypes,
609
+ **kwargs,
610
+ )
395
611
  if isinstance(self._data, list | Generator):
396
612
  for df in self._data:
397
613
  yield (
@@ -410,6 +626,12 @@ class BaseFileReader(BaseFileIO, gc=False):
410
626
  self,
411
627
  lazy: bool = False,
412
628
  metadata: bool = False,
629
+ reload: bool = False,
630
+ include_file_path: bool = False,
631
+ concat: bool | None = None,
632
+ use_threads: bool | None = None,
633
+ verbose: bool | None = None,
634
+ opt_dtypes: bool | None = None,
413
635
  **kwargs,
414
636
  ) -> (
415
637
  pl.DataFrame
@@ -426,6 +648,14 @@ class BaseFileReader(BaseFileIO, gc=False):
426
648
  Args:
427
649
  lazy (bool, optional): Return a LazyFrame if True, else a DataFrame.
428
650
  metadata (bool, optional): Include metadata in the output. Default is False.
651
+ reload (bool, optional): Reload data if True. Default is False.
652
+ batch_size (int, optional): Batch size for iteration. Default is 1.
653
+ include_file_path (bool, optional): Include file path in the output. Default is False.
654
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
655
+ use_threads (bool, optional): Use threads for reading data. Default is True.
656
+ verbose (bool, optional): Verbose output. Default is None.
657
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
658
+ kwargs: Additional keyword arguments.
429
659
 
430
660
  Returns:
431
661
  pl.DataFrame | pl.LazyFrame | list[pl.DataFrame] | list[pl.LazyFrame] | tuple[pl.DataFrame | pl.LazyFrame
@@ -433,32 +663,115 @@ class BaseFileReader(BaseFileIO, gc=False):
433
663
  metadata.
434
664
  """
435
665
  if lazy:
436
- return self._to_polars_lazyframe(**kwargs)
437
- return self._to_polars_dataframe(**kwargs)
666
+ return self._to_polars_lazyframe(
667
+ metadata=metadata,
668
+ reload=reload,
669
+ batch_size=None,
670
+ include_file_path=include_file_path,
671
+ concat=concat,
672
+ use_threads=use_threads,
673
+ verbose=verbose,
674
+ opt_dtypes=opt_dtypes,
675
+ **kwargs,
676
+ )
677
+ return self._to_polars_dataframe(
678
+ metadata=metadata,
679
+ reload=reload,
680
+ batch_size=None,
681
+ include_file_path=include_file_path,
682
+ concat=concat,
683
+ use_threads=use_threads,
684
+ verbose=verbose,
685
+ opt_dtypes=opt_dtypes,
686
+ **kwargs,
687
+ )
438
688
 
439
689
  def iter_polars(
440
690
  self,
441
691
  lazy: bool = False,
692
+ reload: bool = False,
693
+ batch_size: int | None = None,
694
+ include_file_path: bool = False,
695
+ concat: bool | None = None,
696
+ use_threads: bool | None = None,
697
+ verbose: bool | None = None,
698
+ opt_dtypes: bool | None = None,
442
699
  **kwargs,
443
700
  ) -> Generator[pl.DataFrame | pl.LazyFrame, None, None]:
701
+ """Iterate over Polars DataFrames or LazyFrames.
702
+
703
+ Args:
704
+ lazy (bool, optional): Return a LazyFrame if True, else a DataFrame. Default is False.
705
+ reload (bool, optional): Reload data if True. Default is False.
706
+ batch_size (int, optional): Batch size for iteration. Default is 1.
707
+ include_file_path (bool, optional): Include file path in the output. Default is False.
708
+ concat (bool, optional): Concatenate multiple files into a single DataFrame. Default is True.
709
+ use_threads (bool, optional): Use threads for reading data. Default is True.
710
+ verbose (bool, optional): Verbose output. Default is None.
711
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
712
+ kwargs: Additional keyword arguments.
713
+
714
+ Returns:
715
+ Generator[pl.DataFrame | pl.LazyFrame, None, None]: Generator of Polars DataFrames or LazyFrames.
716
+ """
444
717
  if lazy:
445
- yield from self._iter_polars_lazyframe(**kwargs)
446
- yield from self._iter_polars_dataframe(**kwargs)
718
+ yield from self._iter_polars_lazyframe(
719
+ reload=reload,
720
+ batch_size=batch_size,
721
+ include_file_path=include_file_path,
722
+ concat=concat,
723
+ use_threads=use_threads,
724
+ verbose=verbose,
725
+ opt_dtypes=opt_dtypes,
726
+ **kwargs,
727
+ )
728
+ yield from self._iter_polars_dataframe(
729
+ reload=reload,
730
+ batch_size=batch_size,
731
+ include_file_path=include_file_path,
732
+ concat=concat,
733
+ use_threads=use_threads,
734
+ verbose=verbose,
735
+ opt_dtypes=opt_dtypes,
736
+ **kwargs,
737
+ )
447
738
 
448
739
  def to_pyarrow_table(
449
- self, metadata: bool = False, reload: bool = False, **kwargs
740
+ self,
741
+ metadata: bool = False,
742
+ reload: bool = False,
743
+ include_file_path: bool = False,
744
+ use_threads: bool | None = None,
745
+ verbose: bool | None = None,
746
+ opt_dtypes: bool | None = None,
747
+ **kwargs,
450
748
  ) -> pa.Table | list[pa.Table] | tuple[pa.Table | list[pa.Table], dict[str, Any]]:
451
749
  """Convert data to PyArrow Table(s).
452
750
 
453
751
  Args:
454
752
  metadata (bool, optional): Include metadata in the output. Default is False.
455
753
  reload (bool, optional): Reload data if True. Default is False.
754
+ include_file_path (bool, optional): Include file path in the output. Default is False.
755
+ use_threads (bool, optional): Use threads for reading data. Default is True.
756
+ verbose (bool, optional): Verbose output. Default is None.
757
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
758
+ kwargs: Additional keyword arguments.
456
759
 
457
760
  Returns:
458
761
  pa.Table | list[pa.Table] | tuple[pa.Table | list[pa.Table], dict[str, Any]]: PyArrow Table or list of
459
762
  Tables and optional metadata.
460
763
  """
461
- self._load(reload=reload, **kwargs)
764
+ self._load(
765
+ reload=reload,
766
+ metadata=metadata,
767
+ batch_size=None,
768
+ include_file_path=include_file_path,
769
+ concat=None,
770
+ use_threads=use_threads,
771
+ verbose=verbose,
772
+ opt_dtypes=opt_dtypes,
773
+ **kwargs,
774
+ )
462
775
  if isinstance(self._data, list):
463
776
  df = [
464
777
  df.to_arrow(**kwargs) if isinstance(df, pl.DataFrame) else df
@@ -472,22 +785,48 @@ class BaseFileReader(BaseFileIO, gc=False):
472
785
  else self._data
473
786
  )
474
787
  if metadata:
475
- metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
476
- return df, metadata
788
+ # metadata = get_dataframe_metadata(df, path=self.path, format=self.format)
789
+ return df, self._metadata
477
790
  return df
478
791
 
479
792
  def iter_pyarrow_table(
480
- self, reload: bool = False, **kwargs
793
+ self,
794
+ reload: bool = False,
795
+ batch_size: int | None = None,
796
+ include_file_path: bool = False,
797
+ concat: bool | None = None,
798
+ use_threads: bool | None = None,
799
+ verbose: bool | None = None,
800
+ opt_dtypes: bool | None = None,
801
+ **kwargs,
481
802
  ) -> Generator[pa.Table, None, None]:
482
803
  """Iterate over PyArrow Tables.
483
804
 
805
+ Args:
806
+ reload (bool, optional): Reload data if True. Default is False.
807
+ include_file_path (bool, optional): Include file path in the output. Default is False.
808
+ concat (bool, optional): Concatenate multiple files into a single Table. Default is True.
809
+ batch_size (int, optional): Batch size for iteration. Default is 1.
810
+ use_threads (bool, optional): Use threads for reading data. Default is True.
811
+ verbose (bool, optional): Verbose output. Default is None.
812
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
813
+ kwargs: Additional keyword arguments.
814
+
484
815
  Returns:
485
816
  Generator[pa.Table, None, None]: Generator of PyArrow Tables.
486
817
  """
487
- if self.batch_size is None and "batch_size" not in kwargs:
488
- self.batch_size = 1
489
-
490
- self._load(reload=reload, **kwargs)
818
+ batch_size = batch_size or self.batch_size or 1
819
+
820
+ self._load(
821
+ reload=reload,
822
+ batch_size=batch_size,
823
+ include_file_path=include_file_path,
824
+ concat=concat,
825
+ use_threads=use_threads,
826
+ verbose=verbose,
827
+ opt_dtypes=opt_dtypes,
828
+ **kwargs,
829
+ )
491
830
  if isinstance(self._data, list | Generator):
492
831
  for df in self._data:
493
832
  yield df.to_arrow(**kwargs) if isinstance(df, pl.DataFrame) else df
@@ -503,6 +842,10 @@ class BaseFileReader(BaseFileIO, gc=False):
503
842
  conn: duckdb.DuckDBPyConnection | None = None,
504
843
  metadata: bool = False,
505
844
  reload: bool = False,
845
+ include_file_path: bool = False,
846
+ use_threads: bool | None = None,
847
+ verbose: bool | None = None,
848
+ opt_dtypes: bool | None = None,
506
849
  **kwargs,
507
850
  ) -> duckdb.DuckDBPyRelation | tuple[duckdb.DuckDBPyRelation, dict[str, Any]]:
508
851
  """Convert data to DuckDB relation.
@@ -511,6 +854,11 @@ class BaseFileReader(BaseFileIO, gc=False):
511
854
  conn (duckdb.DuckDBPyConnection, optional): DuckDB connection instance.
512
855
  metadata (bool, optional): Include metadata in the output. Default is False.
513
856
  reload (bool, optional): Reload data if True. Default is False.
857
+ include_file_path (bool, optional): Include file path in the output. Default is False.
858
+ use_threads (bool, optional): Use threads for reading data. Default is True.
859
+ verbose (bool, optional): Verbose output. Default is None.
860
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
861
+ kwargs: Additional keyword arguments.
514
862
 
515
863
  Returns:
516
864
  duckdb.DuckDBPyRelation | tuple[duckdb.DuckDBPyRelation, dict[str, Any]]: DuckDB relation and optional
@@ -523,10 +871,27 @@ class BaseFileReader(BaseFileIO, gc=False):
523
871
 
524
872
  if metadata:
525
873
  return self._conn.from_arrow(
526
- self.to_pyarrow_table(concat=True, reload=reload, **kwargs),
874
+ self.to_pyarrow_table(
875
+ metadata=metadata,
876
+ reload=reload,
877
+ batch_size=None,
878
+ include_file_path=include_file_path,
879
+ se_threads=use_threads,
880
+ verbose=verbose,
881
+ opt_dtypes=opt_dtypes,
882
+ **kwargs,
883
+ ),
527
884
  ), self._metadata
528
885
  return self._conn.from_arrow(
529
- self.to_pyarrow_table(concat=True, reload=reload, **kwargs)
886
+ self.to_pyarrow_table(
887
+ reload=reload,
888
+ batch_size=None,
889
+ include_file_path=include_file_path,
890
+ use_threads=use_threads,
891
+ verbose=verbose,
892
+ opt_dtypes=opt_dtypes,
893
+ **kwargs,
894
+ )
530
895
  )
531
896
 
532
897
  def register_in_duckdb(
@@ -535,6 +900,10 @@ class BaseFileReader(BaseFileIO, gc=False):
535
900
  name: str | None = None,
536
901
  metadata: bool = False,
537
902
  reload: bool = False,
903
+ include_file_path: bool = False,
904
+ use_threads: bool | None = None,
905
+ verbose: bool | None = None,
906
+ opt_dtypes: bool | None = None,
538
907
  **kwargs,
539
908
  ) -> duckdb.DuckDBPyConnection | tuple[duckdb.DuckDBPyConnection, dict[str, Any]]:
540
909
  """Register data in DuckDB.
@@ -544,6 +913,11 @@ class BaseFileReader(BaseFileIO, gc=False):
544
913
  name (str, optional): Name for the DuckDB table.
545
914
  metadata (bool, optional): Include metadata in the output. Default is False.
546
915
  reload (bool, optional): Reload data if True. Default is False.
916
+ include_file_path (bool, optional): Include file path in the output. Default is False.
917
+ use_threads (bool, optional): Use threads for reading data. Default is True.
918
+ verbose (bool, optional): Verbose output. Default is None.
919
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
920
+ kwargs: Additional keyword arguments.
547
921
 
548
922
  Returns:
549
923
  duckdb.DuckDBPyConnection | tuple[duckdb.DuckDBPyConnection, dict[str, Any]]: DuckDB connection instance
@@ -558,7 +932,16 @@ class BaseFileReader(BaseFileIO, gc=False):
558
932
  self._conn = conn
559
933
 
560
934
  self._conn.register(
561
- name, self.to_pyarrow_table(concat=True, reload=reload, **kwargs)
935
+ name,
936
+ self.to_pyarrow_table(
937
+ metadata=metadata,
938
+ reload=reload,
939
+ include_file_path=include_file_path,
940
+ use_threads=use_threads,
941
+ verbose=verbose,
942
+ opt_dtypes=opt_dtypes,
943
+ **kwargs,
944
+ ),
562
945
  )
563
946
  if metadata:
564
947
  return self._conn, self._metadata
@@ -571,6 +954,10 @@ class BaseFileReader(BaseFileIO, gc=False):
571
954
  name: str | None = None,
572
955
  metadata: bool = False,
573
956
  reload: bool = False,
957
+ include_file_path: bool = False,
958
+ use_threads: bool | None = None,
959
+ verbose: bool | None = None,
960
+ opt_dtypes: bool | None = None,
574
961
  **kwargs,
575
962
  ) -> (
576
963
  duckdb.DuckDBPyRelation
@@ -586,6 +973,10 @@ class BaseFileReader(BaseFileIO, gc=False):
586
973
  name (str, optional): Name for the DuckDB table.
587
974
  metadata (bool, optional): Include metadata in the output. Default is False.
588
975
  reload (bool, optional): Reload data if True. Default is False.
976
+ include_file_path (bool, optional): Include file path in the output. Default is False.
977
+ use_threads (bool, optional): Use threads for reading data. Default is True.
978
+ verbose (bool, optional): Verbose output. Default is None.
979
+ opt_dtypes (bool, optional): Optimize data types. Default is True.
589
980
  **kwargs: Additional keyword arguments.
590
981
 
591
982
  Returns:
@@ -596,10 +987,25 @@ class BaseFileReader(BaseFileIO, gc=False):
596
987
  """
597
988
  if as_relation:
598
989
  return self.to_duckdb_relation(
599
- conn=conn, metadata=metadata, reload=reload, **kwargs
990
+ conn=conn,
991
+ metadata=metadata,
992
+ reload=reload,
993
+ include_file_path=include_file_path,
994
+ use_threads=use_threads,
995
+ verbose=verbose,
996
+ opt_dtypes=opt_dtypes,
997
+ **kwargs,
600
998
  )
601
999
  return self.register_in_duckdb(
602
- conn=conn, name=name, metadata=metadata, reload=reload, **kwargs
1000
+ conn=conn,
1001
+ name=name,
1002
+ metadata=metadata,
1003
+ reload=reload,
1004
+ include_file_path=include_file_path,
1005
+ use_threads=use_threads,
1006
+ verbose=verbose,
1007
+ opt_dtypes=opt_dtypes,
1008
+ **kwargs,
603
1009
  )
604
1010
 
605
1011
  def register_in_datafusion(
@@ -608,6 +1014,10 @@ class BaseFileReader(BaseFileIO, gc=False):
608
1014
  name: str | None = None,
609
1015
  metadata: bool = False,
610
1016
  reload: bool = False,
1017
+ include_file_path: bool = False,
1018
+ use_threads: bool | None = None,
1019
+ verbose: bool | None = None,
1020
+ opt_dtypes: bool | None = None,
611
1021
  **kwargs,
612
1022
  ) -> datafusion.SessionContext | tuple[datafusion.SessionContext, dict[str, Any]]:
613
1023
  """Register data in DataFusion.
@@ -632,11 +1042,18 @@ class BaseFileReader(BaseFileIO, gc=False):
632
1042
 
633
1043
  self._ctx.register_record_batches(
634
1044
  name,
635
- [self.to_pyarrow_table(concat=True, reload=reload, **kwargs).to_batches()],
1045
+ [
1046
+ self.to_pyarrow_table(
1047
+ reload=reload,
1048
+ include_file_path=include_file_path,
1049
+ use_threads=use_threads,
1050
+ opt_dtypes=opt_dtypes**kwargs,
1051
+ ).to_batches()
1052
+ ],
636
1053
  )
637
1054
  if metadata:
638
1055
  return self._ctx, self._metadata
639
- return ctx
1056
+ return self._ctx
640
1057
 
641
1058
  def filter(
642
1059
  self, filter_expr: str | pl.Expr | pa.compute.Expression
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.6
3
+ Version: 0.11.6.1
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -18,7 +18,7 @@ flowerpower/cli/pipeline.py,sha256=60P6u_QOSgp0jJXEMxazEEo5Sh7-SWFo-Kkuaz21YuI,3
18
18
  flowerpower/cli/utils.py,sha256=nDSSj_1nlYlMmj252kRZeohhFqHv9yvdgDEduQCyWOc,5152
19
19
  flowerpower/fs/__init__.py,sha256=uZaPXErEfQqQRbKRIjkB9yiygd45X5_psYn9-VVrBTQ,910
20
20
  flowerpower/fs/base.py,sha256=TqgqBsaFj13O1NpAr8kHuGJ9CTlaSWViMB8Ai_iuCjs,22761
21
- flowerpower/fs/ext.py,sha256=zTZO-j__O6Om7gbOpXJL7uDo2Cki6hOdlx_GDJ-Xujw,67625
21
+ flowerpower/fs/ext.py,sha256=2NmhSbCIL0qnONMRNPHcPUuR39bGjWpxJE4hNHU5Rvw,69044
22
22
  flowerpower/fs/storage_options.py,sha256=msq5TpxAU8tcE_Bxjw6SyxaFa75UjdYnR4-O9U2wmbk,48034
23
23
  flowerpower/job_queue/__init__.py,sha256=a25hIqv2xoFKb4JZlyUukS0ppZ9-2sJKH3XAvbk3rlk,10788
24
24
  flowerpower/job_queue/base.py,sha256=YwLunDQSyqkSU_vJ69C5SSybJeJP1bAiZ3teUtOchxA,13640
@@ -44,7 +44,7 @@ flowerpower/pipeline/manager.py,sha256=KVpOclUEUAETUNJamJJGuKt3oxCaLitQgxWxkE1q0
44
44
  flowerpower/pipeline/registry.py,sha256=6ngmHyKyQsxvIO4qRYxljedY0BE1wE3lpfksEGOzjNs,18963
45
45
  flowerpower/pipeline/runner.py,sha256=dsSVYixFXqlxFk8EJfT4wV_7IwgkXq0ErwH_yf_NGS8,25654
46
46
  flowerpower/pipeline/visualizer.py,sha256=amjMrl5NetErE198HzZBPWVZBi_t5jj9ydxWpuNLoTI,5013
47
- flowerpower/plugins/io/base.py,sha256=tyFbvx8Ij8gTKP8p8GfwpP5dpIWNncGJfcuK_hPCPN0,79383
47
+ flowerpower/plugins/io/base.py,sha256=-bZBTdFGUWm60JuFpBG_1TZO7D0hmjgSA3a8Prg1MnY,96644
48
48
  flowerpower/plugins/io/metadata.py,sha256=PCrepLilXRWKDsB5BKFF_-OFs712s1zBeitW-84lDLQ,7005
49
49
  flowerpower/plugins/io/helpers/datetime.py,sha256=1WBUg2ywcsodJQwoF6JiIGc9yhVobvE2IErWp4i95m4,10649
50
50
  flowerpower/plugins/io/helpers/polars.py,sha256=346DBHG-HvoGZWF-DWxgz7H3KlZu8bFylKIqMOnVJSk,27031
@@ -94,9 +94,9 @@ flowerpower/utils/monkey.py,sha256=VPl3yimoWhwD9kI05BFsjNvtyQiDyLfY4Q85Bb6Ma0w,2
94
94
  flowerpower/utils/open_telemetry.py,sha256=fQWJWbIQFtKIxMBjAWeF12NGnqT0isO3A3j-DSOv_vE,949
95
95
  flowerpower/utils/scheduler.py,sha256=2zJ_xmLXpvXUQNF1XS2Gqm3Ogo907ctZ50GtvQB_rhE,9354
96
96
  flowerpower/utils/templates.py,sha256=ouyEeSDqa9PjW8c32fGpcINlpC0WToawRFZkMPtwsLE,1591
97
- flowerpower-0.11.6.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
98
- flowerpower-0.11.6.dist-info/METADATA,sha256=5Lg0RYLDvqAzJw05z2Qp-OmldA5Cy_FA7XTXlOa2Oos,21610
99
- flowerpower-0.11.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
- flowerpower-0.11.6.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
101
- flowerpower-0.11.6.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
102
- flowerpower-0.11.6.dist-info/RECORD,,
97
+ flowerpower-0.11.6.1.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
98
+ flowerpower-0.11.6.1.dist-info/METADATA,sha256=KOkDA61ZYzXs3vvwKIQciSsSl-OoniBSpdilRlYXU8g,21612
99
+ flowerpower-0.11.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
+ flowerpower-0.11.6.1.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
101
+ flowerpower-0.11.6.1.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
102
+ flowerpower-0.11.6.1.dist-info/RECORD,,