FlowerPower 0.11.6__tar.gz → 0.11.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {flowerpower-0.11.6/src/FlowerPower.egg-info → flowerpower-0.11.6.1}/PKG-INFO +1 -1
  2. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/pyproject.toml +1 -1
  3. {flowerpower-0.11.6 → flowerpower-0.11.6.1/src/FlowerPower.egg-info}/PKG-INFO +1 -1
  4. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/fs/ext.py +58 -26
  5. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/base.py +497 -80
  6. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/LICENSE +0 -0
  7. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/README.md +0 -0
  8. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/setup.cfg +0 -0
  9. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/FlowerPower.egg-info/SOURCES.txt +0 -0
  10. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
  11. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/FlowerPower.egg-info/entry_points.txt +0 -0
  12. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/FlowerPower.egg-info/requires.txt +0 -0
  13. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/FlowerPower.egg-info/top_level.txt +0 -0
  14. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/__init__.py +0 -0
  15. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/__init__.py +0 -0
  16. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/base.py +0 -0
  17. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/pipeline/__init__.py +0 -0
  18. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/pipeline/adapter.py +0 -0
  19. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/pipeline/run.py +0 -0
  20. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/pipeline/schedule.py +0 -0
  21. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/project/__init__.py +0 -0
  22. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/project/adapter.py +0 -0
  23. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cfg/project/job_queue.py +0 -0
  24. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cli/__init__.py +0 -0
  25. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cli/cfg.py +0 -0
  26. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cli/job_queue.py +0 -0
  27. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cli/mqtt.py +0 -0
  28. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cli/pipeline.py +0 -0
  29. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/cli/utils.py +0 -0
  30. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/flowerpower.py +0 -0
  31. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/fs/__init__.py +0 -0
  32. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/fs/base.py +0 -0
  33. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/fs/storage_options.py +0 -0
  34. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/__init__.py +0 -0
  35. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/apscheduler/__init__.py +0 -0
  36. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -0
  37. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -0
  38. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/apscheduler/manager.py +0 -0
  39. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/apscheduler/setup.py +0 -0
  40. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/apscheduler/trigger.py +0 -0
  41. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/apscheduler/utils.py +0 -0
  42. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/base.py +0 -0
  43. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/rq/__init__.py +0 -0
  44. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/rq/_trigger.py +0 -0
  45. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -0
  46. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -0
  47. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/rq/manager.py +0 -0
  48. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/rq/setup.py +0 -0
  49. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/job_queue/rq/utils.py +0 -0
  50. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/mqtt.py +0 -0
  51. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/pipeline/__init__.py +0 -0
  52. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/pipeline/base.py +0 -0
  53. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/pipeline/io.py +0 -0
  54. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/pipeline/job_queue.py +0 -0
  55. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/pipeline/manager.py +0 -0
  56. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/pipeline/registry.py +0 -0
  57. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/pipeline/runner.py +0 -0
  58. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/pipeline/visualizer.py +0 -0
  59. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/helpers/datetime.py +0 -0
  60. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/helpers/polars.py +0 -0
  61. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/helpers/pyarrow.py +0 -0
  62. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/helpers/sql.py +0 -0
  63. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/__init__.py +0 -0
  64. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/csv.py +0 -0
  65. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/deltatable.py +0 -0
  66. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/duckdb.py +0 -0
  67. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/json.py +0 -0
  68. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/mqtt.py +0 -0
  69. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/mssql.py +0 -0
  70. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/mysql.py +0 -0
  71. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/oracle.py +0 -0
  72. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/parquet.py +0 -0
  73. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/postgres.py +0 -0
  74. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/pydala.py +0 -0
  75. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/loader/sqlite.py +0 -0
  76. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/metadata.py +0 -0
  77. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/__init__.py +0 -0
  78. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/csv.py +0 -0
  79. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/deltatable.py +0 -0
  80. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/duckdb.py +0 -0
  81. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/json.py +0 -0
  82. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/mqtt.py +0 -0
  83. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/mssql.py +0 -0
  84. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/mysql.py +0 -0
  85. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/oracle.py +0 -0
  86. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/parquet.py +0 -0
  87. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/postgres.py +0 -0
  88. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/pydala.py +0 -0
  89. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/io/saver/sqlite.py +0 -0
  90. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/mqtt/__init__.py +0 -0
  91. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/mqtt/cfg.py +0 -0
  92. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/plugins/mqtt/manager.py +0 -0
  93. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/settings/__init__.py +0 -0
  94. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/settings/backend.py +0 -0
  95. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/settings/executor.py +0 -0
  96. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/settings/general.py +0 -0
  97. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/settings/hamilton.py +0 -0
  98. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/settings/job_queue.py +0 -0
  99. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/settings/logging.py +0 -0
  100. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/settings/retry.py +0 -0
  101. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/utils/callback.py +0 -0
  102. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/utils/logging.py +0 -0
  103. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/utils/misc.py +0 -0
  104. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/utils/monkey.py +0 -0
  105. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/utils/open_telemetry.py +0 -0
  106. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/utils/scheduler.py +0 -0
  107. {flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/utils/templates.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.6
3
+ Version: 0.11.6.1
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -4,7 +4,7 @@ description = "A simple workflow framework. Hamilton + APScheduler = FlowerPower
4
4
  authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
5
5
  readme = "README.md"
6
6
  requires-python = ">= 3.11"
7
- version = "0.11.6"
7
+ version = "0.11.6.1"
8
8
  keywords = [
9
9
  "hamilton",
10
10
  "workflow",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.6
3
+ Version: 0.11.6.1
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -193,6 +193,7 @@ def _read_json(
193
193
  as_dataframe: (bool, optional) If True, return a DataFrame. Defaults to True.
194
194
  concat: (bool, optional) If True, concatenate the DataFrames. Defaults to True.
195
195
  verbose: (bool, optional) If True, print verbose output. Defaults to False.
196
+ opt_dtypes: (bool, optional) If True, optimize DataFrame dtypes. Defaults to False.
196
197
  **kwargs: Additional keyword arguments.
197
198
 
198
199
  Returns:
@@ -247,8 +248,8 @@ def _read_json(
247
248
  data = [opt_dtype_pl(df, strict=False) for df in data]
248
249
  if concat:
249
250
  result = pl.concat(data, how="diagonal_relaxed")
250
- if opt_dtypes:
251
- result = opt_dtype_pl(result, strict=False)
251
+ # if opt_dtypes:
252
+ # result = opt_dtype_pl(result, strict=False)
252
253
  return result
253
254
  return data
254
255
 
@@ -280,6 +281,7 @@ def _read_json_batches(
280
281
  concat: Combine files within each batch
281
282
  use_threads: Enable parallel file reading within batches
282
283
  verbose: Print progress information
284
+ opt_dtypes: Optimize DataFrame dtypes
283
285
  **kwargs: Additional arguments for DataFrame conversion
284
286
 
285
287
  Yields:
@@ -354,10 +356,16 @@ def _read_json_batches(
354
356
  ][0]
355
357
  for _data in batch_data
356
358
  ]
357
-
359
+ if opt_dtypes:
360
+ batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
358
361
  if concat and len(batch_dfs) > 1:
359
- yield pl.concat(batch_dfs, how="diagonal_relaxed")
362
+ batch_df = pl.concat(batch_dfs, how="diagonal_relaxed")
363
+ # if opt_dtypes:
364
+ # batch_df = opt_dtype_pl(batch_df, strict=False)
365
+ yield batch_df
360
366
  else:
367
+ # if opt_dtypes:
368
+ # batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
361
369
  yield batch_dfs
362
370
  else:
363
371
  yield batch_data
@@ -403,6 +411,7 @@ def read_json(
403
411
  concat: Combine multiple files/batches into single result
404
412
  use_threads: Enable parallel file reading
405
413
  verbose: Print progress information
414
+ opt_dtypes: Optimize DataFrame dtypes for performance
406
415
  **kwargs: Additional arguments passed to DataFrame conversion
407
416
 
408
417
  Returns:
@@ -486,6 +495,7 @@ def _read_csv_file(
486
495
  path: Path to CSV file
487
496
  self: Filesystem instance to use for reading
488
497
  include_file_path: Add source filepath as a column
498
+ opt_dtypes: Optimize DataFrame dtypes
489
499
  **kwargs: Additional arguments passed to pl.read_csv()
490
500
 
491
501
  Returns:
@@ -544,6 +554,7 @@ def _read_csv(
544
554
  use_threads: (bool, optional) If True, read files in parallel. Defaults to True.
545
555
  concat: (bool, optional) If True, concatenate the DataFrames. Defaults to True.
546
556
  verbose: (bool, optional) If True, print verbose output. Defaults to False.
557
+ opt_dtypes: (bool, optional) If True, optimize DataFrame dtypes. Defaults to False.
547
558
  **kwargs: Additional keyword arguments.
548
559
 
549
560
  Returns:
@@ -587,8 +598,8 @@ def _read_csv(
587
598
  )
588
599
  if concat:
589
600
  result = pl.concat(dfs, how="diagonal_relaxed")
590
- if opt_dtypes:
591
- result = opt_dtype_pl(result, strict=False)
601
+ # if opt_dtypes:
602
+ # result = opt_dtype_pl(result, strict=False)
592
603
  return result
593
604
  return dfs
594
605
 
@@ -616,6 +627,7 @@ def _read_csv_batches(
616
627
  concat: Combine files within each batch
617
628
  use_threads: Enable parallel file reading within batches
618
629
  verbose: Print progress information
630
+ opt_dtypes: Optimize DataFrame dtypes
619
631
  **kwargs: Additional arguments passed to pl.read_csv()
620
632
 
621
633
  Yields:
@@ -667,23 +679,28 @@ def _read_csv_batches(
667
679
  n_jobs=-1,
668
680
  backend="threading",
669
681
  verbose=verbose,
682
+ opt_dtypes=opt_dtypes,
670
683
  **kwargs,
671
684
  )
672
685
  else:
673
686
  batch_dfs = [
674
687
  _read_csv_file(
675
- p, self=self, include_file_path=include_file_path, **kwargs
688
+ p,
689
+ self=self,
690
+ include_file_path=include_file_path,
691
+ opt_dtypes=opt_dtypes,
692
+ **kwargs,
676
693
  )
677
694
  for p in batch_paths
678
695
  ]
679
696
 
680
- if opt_dtypes:
681
- batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
697
+ # if opt_dtypes:
698
+ # batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
682
699
 
683
700
  if concat and len(batch_dfs) > 1:
684
701
  result = pl.concat(batch_dfs, how="diagonal_relaxed")
685
- if opt_dtypes:
686
- result = opt_dtype_pl(result, strict=False)
702
+ # if opt_dtypes:
703
+ # result = opt_dtype_pl(result, strict=False)
687
704
  yield result
688
705
  else:
689
706
  yield batch_dfs
@@ -766,6 +783,7 @@ def read_csv(
766
783
  concat=concat,
767
784
  use_threads=use_threads,
768
785
  verbose=verbose,
786
+ opt_dtypes=opt_dtypes,
769
787
  **kwargs,
770
788
  )
771
789
  return _read_csv(
@@ -775,6 +793,7 @@ def read_csv(
775
793
  concat=concat,
776
794
  use_threads=use_threads,
777
795
  verbose=verbose,
796
+ opt_dtypes=opt_dtypes,
778
797
  **kwargs,
779
798
  )
780
799
 
@@ -858,9 +877,7 @@ def _read_parquet(
858
877
  if not include_file_path and concat:
859
878
  if isinstance(path, str):
860
879
  path = path.replace("**", "").replace("*.parquet", "")
861
- table = pq.read_table(path, filesystem=self, **kwargs)
862
- if opt_dtypes:
863
- table = opt_dtype_pa(table, strict=False)
880
+ table = _read_parquet_file(path, self=self, opt_dtypes=opt_dtypes, **kwargs)
864
881
  return table
865
882
  else:
866
883
  if isinstance(path, str):
@@ -907,12 +924,12 @@ def _read_parquet(
907
924
  unified_schema = unify_schemas_pa(schemas)
908
925
  tables = [cast_schema(t, unified_schema) for t in tables]
909
926
  result = pa.concat_tables(tables, promote_options="permissive")
910
- if opt_dtypes:
911
- result = opt_dtype_pa(result, strict=False)
927
+ # if opt_dtypes:
928
+ # result = opt_dtype_pa(result, strict=False)
912
929
  return result
913
930
  elif isinstance(tables, pa.Table):
914
- if opt_dtypes:
915
- tables = opt_dtype_pa(tables, strict=False)
931
+ # if opt_dtypes:
932
+ # tables = opt_dtype_pa(tables, strict=False)
916
933
  return tables
917
934
  else:
918
935
  return pa.concat_tables(tables, promote_options="permissive")
@@ -981,9 +998,9 @@ def _read_parquet_batches(
981
998
  if not include_file_path and concat and batch_size is None:
982
999
  if isinstance(path, str):
983
1000
  path = path.replace("**", "").replace("*.parquet", "")
984
- table = pq.read_table(path, filesystem=self, **kwargs)
985
- if opt_dtypes:
986
- table = opt_dtype_pa(table, strict=False)
1001
+ table = _read_parquet_file(
1002
+ path=path, self=self, opt_dtypes=opt_dtypes, **kwargs
1003
+ )
987
1004
  yield table
988
1005
  return
989
1006
 
@@ -994,7 +1011,11 @@ def _read_parquet_batches(
994
1011
 
995
1012
  if not isinstance(path, list):
996
1013
  yield _read_parquet_file(
997
- path=path, self=self, include_file_path=include_file_path, **kwargs
1014
+ path=path,
1015
+ self=self,
1016
+ include_file_path=include_file_path,
1017
+ opt_dtypes=opt_dtypes,
1018
+ **kwargs,
998
1019
  )
999
1020
  return
1000
1021
 
@@ -1032,12 +1053,12 @@ def _read_parquet_batches(
1032
1053
  unified_schema = unify_schemas_pa(schemas)
1033
1054
  batch_tables = [cast_schema(t, unified_schema) for t in batch_tables]
1034
1055
  result = pa.concat_tables(batch_tables, promote_options="permissive")
1035
- if opt_dtypes:
1036
- result = opt_dtype_pa(result, strict=False)
1056
+ # if opt_dtypes:
1057
+ # result = opt_dtype_pa(result, strict=False)
1037
1058
  yield result
1038
1059
  else:
1039
- if opt_dtypes and isinstance(batch_tables, list):
1040
- batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
1060
+ # if opt_dtypes and isinstance(batch_tables, list):
1061
+ # batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
1041
1062
  yield batch_tables
1042
1063
 
1043
1064
 
@@ -1077,6 +1098,7 @@ def read_parquet(
1077
1098
  concat: Combine multiple files/batches into single Table
1078
1099
  use_threads: Enable parallel file reading
1079
1100
  verbose: Print progress information
1101
+ opt_dtypes: Optimize Table dtypes for performance
1080
1102
  **kwargs: Additional arguments passed to pq.read_table()
1081
1103
 
1082
1104
  Returns:
@@ -1119,6 +1141,7 @@ def read_parquet(
1119
1141
  concat=concat,
1120
1142
  use_threads=use_threads,
1121
1143
  verbose=verbose,
1144
+ opt_dtypes=opt_dtypes,
1122
1145
  **kwargs,
1123
1146
  )
1124
1147
  return _read_parquet(
@@ -1128,6 +1151,7 @@ def read_parquet(
1128
1151
  use_threads=use_threads,
1129
1152
  concat=concat,
1130
1153
  verbose=verbose,
1154
+ opt_dtypes=opt_dtypes,
1131
1155
  **kwargs,
1132
1156
  )
1133
1157
 
@@ -1142,6 +1166,7 @@ def read_files(
1142
1166
  jsonlines: bool = False,
1143
1167
  use_threads: bool = True,
1144
1168
  verbose: bool = False,
1169
+ opt_dtypes: bool = False,
1145
1170
  **kwargs: Any,
1146
1171
  ) -> (
1147
1172
  pl.DataFrame
@@ -1175,6 +1200,7 @@ def read_files(
1175
1200
  jsonlines: For JSON format, whether to read as JSON Lines
1176
1201
  use_threads: Enable parallel file reading
1177
1202
  verbose: Print progress information
1203
+ opt_dtypes: Optimize DataFrame/Arrow Table dtypes for performance
1178
1204
  **kwargs: Additional format-specific arguments
1179
1205
 
1180
1206
  Returns:
@@ -1224,6 +1250,7 @@ def read_files(
1224
1250
  concat=concat,
1225
1251
  use_threads=use_threads,
1226
1252
  verbose=verbose,
1253
+ opt_dtypes=opt_dtypes,
1227
1254
  **kwargs,
1228
1255
  )
1229
1256
  return read_json(
@@ -1234,6 +1261,7 @@ def read_files(
1234
1261
  concat=concat,
1235
1262
  use_threads=use_threads,
1236
1263
  verbose=verbose,
1264
+ opt_dtypes=opt_dtypes,
1237
1265
  **kwargs,
1238
1266
  )
1239
1267
  elif format == "csv":
@@ -1246,6 +1274,7 @@ def read_files(
1246
1274
  concat=concat,
1247
1275
  use_threads=use_threads,
1248
1276
  verbose=verbose,
1277
+ opt_dtypes=opt_dtypes,
1249
1278
  **kwargs,
1250
1279
  )
1251
1280
  return read_csv(
@@ -1255,6 +1284,7 @@ def read_files(
1255
1284
  use_threads=use_threads,
1256
1285
  concat=concat,
1257
1286
  verbose=verbose,
1287
+ opt_dtypes=opt_dtypes,
1258
1288
  **kwargs,
1259
1289
  )
1260
1290
  elif format == "parquet":
@@ -1267,6 +1297,7 @@ def read_files(
1267
1297
  concat=concat,
1268
1298
  use_threads=use_threads,
1269
1299
  verbose=verbose,
1300
+ opt_dtypes=opt_dtypes,
1270
1301
  **kwargs,
1271
1302
  )
1272
1303
  return read_parquet(
@@ -1276,6 +1307,7 @@ def read_files(
1276
1307
  use_threads=use_threads,
1277
1308
  concat=concat,
1278
1309
  verbose=verbose,
1310
+ opt_dtypes=opt_dtypes,
1279
1311
  **kwargs,
1280
1312
  )
1281
1313