polars-df 0.11.0-x86_64-linux → 0.12.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/LICENSE-THIRD-PARTY.txt +1065 -878
  5. data/lib/polars/3.1/polars.so +0 -0
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/array_expr.rb +4 -4
  9. data/lib/polars/batched_csv_reader.rb +2 -2
  10. data/lib/polars/cat_expr.rb +0 -36
  11. data/lib/polars/cat_name_space.rb +0 -37
  12. data/lib/polars/data_frame.rb +93 -101
  13. data/lib/polars/data_types.rb +1 -1
  14. data/lib/polars/date_time_expr.rb +525 -573
  15. data/lib/polars/date_time_name_space.rb +263 -464
  16. data/lib/polars/dynamic_group_by.rb +3 -3
  17. data/lib/polars/exceptions.rb +3 -0
  18. data/lib/polars/expr.rb +367 -330
  19. data/lib/polars/expr_dispatch.rb +1 -1
  20. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  21. data/lib/polars/functions/as_datatype.rb +63 -40
  22. data/lib/polars/functions/lazy.rb +63 -14
  23. data/lib/polars/functions/lit.rb +1 -1
  24. data/lib/polars/functions/range/date_range.rb +18 -77
  25. data/lib/polars/functions/range/datetime_range.rb +4 -4
  26. data/lib/polars/functions/range/int_range.rb +2 -2
  27. data/lib/polars/functions/range/time_range.rb +4 -4
  28. data/lib/polars/functions/repeat.rb +1 -1
  29. data/lib/polars/functions/whenthen.rb +1 -1
  30. data/lib/polars/io/csv.rb +8 -8
  31. data/lib/polars/io/ipc.rb +3 -3
  32. data/lib/polars/io/json.rb +13 -2
  33. data/lib/polars/io/ndjson.rb +15 -4
  34. data/lib/polars/io/parquet.rb +5 -4
  35. data/lib/polars/lazy_frame.rb +120 -106
  36. data/lib/polars/lazy_group_by.rb +1 -1
  37. data/lib/polars/list_expr.rb +11 -11
  38. data/lib/polars/list_name_space.rb +5 -1
  39. data/lib/polars/rolling_group_by.rb +5 -7
  40. data/lib/polars/series.rb +105 -189
  41. data/lib/polars/string_expr.rb +42 -67
  42. data/lib/polars/string_name_space.rb +5 -4
  43. data/lib/polars/testing.rb +2 -2
  44. data/lib/polars/utils/constants.rb +9 -0
  45. data/lib/polars/utils/convert.rb +97 -0
  46. data/lib/polars/utils/parse.rb +89 -0
  47. data/lib/polars/utils/various.rb +76 -0
  48. data/lib/polars/utils/wrap.rb +19 -0
  49. data/lib/polars/utils.rb +4 -330
  50. data/lib/polars/version.rb +1 -1
  51. data/lib/polars/whenthen.rb +6 -6
  52. data/lib/polars.rb +11 -0
  53. metadata +7 -2
data/lib/polars/io/csv.rb CHANGED
@@ -104,7 +104,7 @@ module Polars
104
104
  ignore_errors: false,
105
105
  parse_dates: false,
106
106
  n_threads: nil,
107
- infer_schema_length: 100,
107
+ infer_schema_length: N_INFER_DEFAULT,
108
108
  batch_size: 8192,
109
109
  n_rows: nil,
110
110
  encoding: "utf8",
@@ -192,7 +192,7 @@ module Polars
192
192
  ignore_errors: false,
193
193
  parse_dates: false,
194
194
  n_threads: nil,
195
- infer_schema_length: 100,
195
+ infer_schema_length: N_INFER_DEFAULT,
196
196
  batch_size: 8192,
197
197
  n_rows: nil,
198
198
  encoding: "utf8",
@@ -222,7 +222,7 @@ module Polars
222
222
  if !dtypes.nil?
223
223
  if dtypes.is_a?(Hash)
224
224
  dtype_list = []
225
- dtypes.each do|k, v|
225
+ dtypes.each do |k, v|
226
226
  dtype_list << [k, Utils.rb_type_to_dtype(v)]
227
227
  end
228
228
  elsif dtypes.is_a?(::Array)
@@ -304,7 +304,7 @@ module Polars
304
304
  missing_utf8_is_empty_string,
305
305
  parse_dates,
306
306
  skip_rows_after_header,
307
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
307
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
308
308
  sample_size,
309
309
  eol_char,
310
310
  raise_if_empty,
@@ -422,7 +422,7 @@ module Polars
422
422
  ignore_errors: false,
423
423
  parse_dates: false,
424
424
  n_threads: nil,
425
- infer_schema_length: 100,
425
+ infer_schema_length: N_INFER_DEFAULT,
426
426
  batch_size: 50_000,
427
427
  n_rows: nil,
428
428
  encoding: "utf8",
@@ -567,7 +567,7 @@ module Polars
567
567
  ignore_errors: false,
568
568
  cache: true,
569
569
  with_column_names: nil,
570
- infer_schema_length: 100,
570
+ infer_schema_length: N_INFER_DEFAULT,
571
571
  n_rows: nil,
572
572
  encoding: "utf8",
573
573
  low_memory: false,
@@ -629,7 +629,7 @@ module Polars
629
629
  ignore_errors: false,
630
630
  cache: true,
631
631
  with_column_names: nil,
632
- infer_schema_length: 100,
632
+ infer_schema_length: N_INFER_DEFAULT,
633
633
  n_rows: nil,
634
634
  encoding: "utf8",
635
635
  low_memory: false,
@@ -669,7 +669,7 @@ module Polars
669
669
  rechunk,
670
670
  skip_rows_after_header,
671
671
  encoding,
672
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
672
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
673
673
  parse_dates,
674
674
  eol_char,
675
675
  truncate_ragged_lines
data/lib/polars/io/ipc.rb CHANGED
@@ -76,7 +76,7 @@ module Polars
76
76
  columns,
77
77
  projection,
78
78
  n_rows,
79
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
79
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
80
80
  memory_map
81
81
  )
82
82
  Utils.wrap_df(rbdf)
@@ -149,7 +149,7 @@ module Polars
149
149
  columns,
150
150
  projection,
151
151
  n_rows,
152
- Utils._prepare_row_count_args(row_index_name, row_index_offset),
152
+ Utils.parse_row_index_args(row_index_name, row_index_offset),
153
153
  rechunk
154
154
  )
155
155
  Utils.wrap_df(pydf)
@@ -238,7 +238,7 @@ module Polars
238
238
  n_rows,
239
239
  cache,
240
240
  rechunk,
241
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
241
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
242
242
  memory_map
243
243
  )
244
244
  Utils.wrap_ldf(rblf)
@@ -6,12 +6,23 @@ module Polars
6
6
  # Path to a file or a file-like object.
7
7
  #
8
8
  # @return [DataFrame]
9
- def read_json(source)
9
+ def read_json(
10
+ source,
11
+ schema: nil,
12
+ schema_overrides: nil,
13
+ infer_schema_length: N_INFER_DEFAULT
14
+ )
10
15
  if Utils.pathlike?(source)
11
16
  source = Utils.normalize_filepath(source)
12
17
  end
13
18
 
14
- rbdf = RbDataFrame.read_json(source)
19
+ rbdf =
20
+ RbDataFrame.read_json(
21
+ source,
22
+ infer_schema_length,
23
+ schema,
24
+ schema_overrides
25
+ )
15
26
  Utils.wrap_df(rbdf)
16
27
  end
17
28
  end
@@ -6,12 +6,23 @@ module Polars
6
6
  # Path to a file or a file-like object.
7
7
  #
8
8
  # @return [DataFrame]
9
- def read_ndjson(source)
9
+ def read_ndjson(
10
+ source,
11
+ schema: nil,
12
+ schema_overrides: nil,
13
+ ignore_errors: false
14
+ )
10
15
  if Utils.pathlike?(source)
11
16
  source = Utils.normalize_filepath(source)
12
17
  end
13
18
 
14
- rbdf = RbDataFrame.read_ndjson(source)
19
+ rbdf =
20
+ RbDataFrame.read_ndjson(
21
+ source,
22
+ ignore_errors,
23
+ schema,
24
+ schema_overrides
25
+ )
15
26
  Utils.wrap_df(rbdf)
16
27
  end
17
28
 
@@ -41,7 +52,7 @@ module Polars
41
52
  # @return [LazyFrame]
42
53
  def scan_ndjson(
43
54
  source,
44
- infer_schema_length: 100,
55
+ infer_schema_length: N_INFER_DEFAULT,
45
56
  batch_size: 1024,
46
57
  n_rows: nil,
47
58
  low_memory: false,
@@ -61,7 +72,7 @@ module Polars
61
72
  n_rows,
62
73
  low_memory,
63
74
  rechunk,
64
- Utils._prepare_row_count_args(row_count_name, row_count_offset)
75
+ Utils.parse_row_index_args(row_count_name, row_count_offset)
65
76
  )
66
77
  Utils.wrap_ldf(rblf)
67
78
  end
@@ -110,7 +110,7 @@ module Polars
110
110
  projection,
111
111
  n_rows,
112
112
  parallel,
113
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
113
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
114
114
  low_memory,
115
115
  use_statistics,
116
116
  rechunk
@@ -178,7 +178,7 @@ module Polars
178
178
 
179
179
  _scan_parquet_impl(
180
180
  source,
181
- n_rows:n_rows,
181
+ n_rows: n_rows,
182
182
  cache: cache,
183
183
  parallel: parallel,
184
184
  rechunk: rechunk,
@@ -202,7 +202,7 @@ module Polars
202
202
  storage_options: nil,
203
203
  low_memory: false,
204
204
  use_statistics: true,
205
- hive_partitioning: true,
205
+ hive_partitioning: nil,
206
206
  glob: true
207
207
  )
208
208
  rblf =
@@ -213,11 +213,12 @@ module Polars
213
213
  cache,
214
214
  parallel,
215
215
  rechunk,
216
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
216
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
217
217
  low_memory,
218
218
  use_statistics,
219
219
  hive_partitioning,
220
220
  nil,
221
+ true,
221
222
  glob
222
223
  )
223
224
  Utils.wrap_ldf(rblf)