polars-df 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/ext/polars/Cargo.toml +10 -7
  5. data/ext/polars/src/batched_csv.rs +1 -1
  6. data/ext/polars/src/conversion/any_value.rs +261 -0
  7. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  8. data/ext/polars/src/conversion/mod.rs +51 -10
  9. data/ext/polars/src/dataframe/construction.rs +6 -8
  10. data/ext/polars/src/dataframe/general.rs +19 -29
  11. data/ext/polars/src/dataframe/io.rs +43 -33
  12. data/ext/polars/src/error.rs +26 -4
  13. data/ext/polars/src/expr/categorical.rs +0 -10
  14. data/ext/polars/src/expr/datetime.rs +4 -12
  15. data/ext/polars/src/expr/general.rs +123 -110
  16. data/ext/polars/src/expr/mod.rs +2 -2
  17. data/ext/polars/src/expr/rolling.rs +17 -9
  18. data/ext/polars/src/expr/string.rs +2 -6
  19. data/ext/polars/src/functions/eager.rs +10 -10
  20. data/ext/polars/src/functions/lazy.rs +21 -21
  21. data/ext/polars/src/functions/range.rs +6 -12
  22. data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
  23. data/ext/polars/src/lazyframe/mod.rs +81 -98
  24. data/ext/polars/src/lib.rs +55 -45
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/rb_modules.rs +25 -1
  27. data/ext/polars/src/series/aggregation.rs +4 -2
  28. data/ext/polars/src/series/arithmetic.rs +21 -11
  29. data/ext/polars/src/series/construction.rs +56 -38
  30. data/ext/polars/src/series/export.rs +1 -1
  31. data/ext/polars/src/series/mod.rs +31 -10
  32. data/ext/polars/src/sql.rs +3 -1
  33. data/lib/polars/array_expr.rb +4 -4
  34. data/lib/polars/batched_csv_reader.rb +2 -2
  35. data/lib/polars/cat_expr.rb +0 -36
  36. data/lib/polars/cat_name_space.rb +0 -37
  37. data/lib/polars/data_frame.rb +93 -101
  38. data/lib/polars/data_types.rb +1 -1
  39. data/lib/polars/date_time_expr.rb +525 -573
  40. data/lib/polars/date_time_name_space.rb +263 -464
  41. data/lib/polars/dynamic_group_by.rb +3 -3
  42. data/lib/polars/exceptions.rb +3 -0
  43. data/lib/polars/expr.rb +367 -330
  44. data/lib/polars/expr_dispatch.rb +1 -1
  45. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  46. data/lib/polars/functions/as_datatype.rb +63 -40
  47. data/lib/polars/functions/lazy.rb +63 -14
  48. data/lib/polars/functions/lit.rb +1 -1
  49. data/lib/polars/functions/range/date_range.rb +18 -77
  50. data/lib/polars/functions/range/datetime_range.rb +4 -4
  51. data/lib/polars/functions/range/int_range.rb +2 -2
  52. data/lib/polars/functions/range/time_range.rb +4 -4
  53. data/lib/polars/functions/repeat.rb +1 -1
  54. data/lib/polars/functions/whenthen.rb +1 -1
  55. data/lib/polars/io/csv.rb +8 -8
  56. data/lib/polars/io/ipc.rb +3 -3
  57. data/lib/polars/io/json.rb +13 -2
  58. data/lib/polars/io/ndjson.rb +15 -4
  59. data/lib/polars/io/parquet.rb +5 -4
  60. data/lib/polars/lazy_frame.rb +120 -106
  61. data/lib/polars/lazy_group_by.rb +1 -1
  62. data/lib/polars/list_expr.rb +11 -11
  63. data/lib/polars/list_name_space.rb +5 -1
  64. data/lib/polars/rolling_group_by.rb +5 -7
  65. data/lib/polars/series.rb +105 -189
  66. data/lib/polars/string_expr.rb +42 -67
  67. data/lib/polars/string_name_space.rb +5 -4
  68. data/lib/polars/testing.rb +2 -2
  69. data/lib/polars/utils/constants.rb +9 -0
  70. data/lib/polars/utils/convert.rb +97 -0
  71. data/lib/polars/utils/parse.rb +89 -0
  72. data/lib/polars/utils/various.rb +76 -0
  73. data/lib/polars/utils/wrap.rb +19 -0
  74. data/lib/polars/utils.rb +4 -330
  75. data/lib/polars/version.rb +1 -1
  76. data/lib/polars/whenthen.rb +6 -6
  77. data/lib/polars.rb +11 -0
  78. metadata +9 -4
  79. data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/lib/polars/io/csv.rb CHANGED
@@ -104,7 +104,7 @@ module Polars
104
104
  ignore_errors: false,
105
105
  parse_dates: false,
106
106
  n_threads: nil,
107
- infer_schema_length: 100,
107
+ infer_schema_length: N_INFER_DEFAULT,
108
108
  batch_size: 8192,
109
109
  n_rows: nil,
110
110
  encoding: "utf8",
@@ -192,7 +192,7 @@ module Polars
192
192
  ignore_errors: false,
193
193
  parse_dates: false,
194
194
  n_threads: nil,
195
- infer_schema_length: 100,
195
+ infer_schema_length: N_INFER_DEFAULT,
196
196
  batch_size: 8192,
197
197
  n_rows: nil,
198
198
  encoding: "utf8",
@@ -222,7 +222,7 @@ module Polars
222
222
  if !dtypes.nil?
223
223
  if dtypes.is_a?(Hash)
224
224
  dtype_list = []
225
- dtypes.each do|k, v|
225
+ dtypes.each do |k, v|
226
226
  dtype_list << [k, Utils.rb_type_to_dtype(v)]
227
227
  end
228
228
  elsif dtypes.is_a?(::Array)
@@ -304,7 +304,7 @@ module Polars
304
304
  missing_utf8_is_empty_string,
305
305
  parse_dates,
306
306
  skip_rows_after_header,
307
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
307
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
308
308
  sample_size,
309
309
  eol_char,
310
310
  raise_if_empty,
@@ -422,7 +422,7 @@ module Polars
422
422
  ignore_errors: false,
423
423
  parse_dates: false,
424
424
  n_threads: nil,
425
- infer_schema_length: 100,
425
+ infer_schema_length: N_INFER_DEFAULT,
426
426
  batch_size: 50_000,
427
427
  n_rows: nil,
428
428
  encoding: "utf8",
@@ -567,7 +567,7 @@ module Polars
567
567
  ignore_errors: false,
568
568
  cache: true,
569
569
  with_column_names: nil,
570
- infer_schema_length: 100,
570
+ infer_schema_length: N_INFER_DEFAULT,
571
571
  n_rows: nil,
572
572
  encoding: "utf8",
573
573
  low_memory: false,
@@ -629,7 +629,7 @@ module Polars
629
629
  ignore_errors: false,
630
630
  cache: true,
631
631
  with_column_names: nil,
632
- infer_schema_length: 100,
632
+ infer_schema_length: N_INFER_DEFAULT,
633
633
  n_rows: nil,
634
634
  encoding: "utf8",
635
635
  low_memory: false,
@@ -669,7 +669,7 @@ module Polars
669
669
  rechunk,
670
670
  skip_rows_after_header,
671
671
  encoding,
672
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
672
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
673
673
  parse_dates,
674
674
  eol_char,
675
675
  truncate_ragged_lines
data/lib/polars/io/ipc.rb CHANGED
@@ -76,7 +76,7 @@ module Polars
76
76
  columns,
77
77
  projection,
78
78
  n_rows,
79
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
79
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
80
80
  memory_map
81
81
  )
82
82
  Utils.wrap_df(rbdf)
@@ -149,7 +149,7 @@ module Polars
149
149
  columns,
150
150
  projection,
151
151
  n_rows,
152
- Utils._prepare_row_count_args(row_index_name, row_index_offset),
152
+ Utils.parse_row_index_args(row_index_name, row_index_offset),
153
153
  rechunk
154
154
  )
155
155
  Utils.wrap_df(pydf)
@@ -238,7 +238,7 @@ module Polars
238
238
  n_rows,
239
239
  cache,
240
240
  rechunk,
241
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
241
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
242
242
  memory_map
243
243
  )
244
244
  Utils.wrap_ldf(rblf)
@@ -6,12 +6,23 @@ module Polars
6
6
  # Path to a file or a file-like object.
7
7
  #
8
8
  # @return [DataFrame]
9
- def read_json(source)
9
+ def read_json(
10
+ source,
11
+ schema: nil,
12
+ schema_overrides: nil,
13
+ infer_schema_length: N_INFER_DEFAULT
14
+ )
10
15
  if Utils.pathlike?(source)
11
16
  source = Utils.normalize_filepath(source)
12
17
  end
13
18
 
14
- rbdf = RbDataFrame.read_json(source)
19
+ rbdf =
20
+ RbDataFrame.read_json(
21
+ source,
22
+ infer_schema_length,
23
+ schema,
24
+ schema_overrides
25
+ )
15
26
  Utils.wrap_df(rbdf)
16
27
  end
17
28
  end
@@ -6,12 +6,23 @@ module Polars
6
6
  # Path to a file or a file-like object.
7
7
  #
8
8
  # @return [DataFrame]
9
- def read_ndjson(source)
9
+ def read_ndjson(
10
+ source,
11
+ schema: nil,
12
+ schema_overrides: nil,
13
+ ignore_errors: false
14
+ )
10
15
  if Utils.pathlike?(source)
11
16
  source = Utils.normalize_filepath(source)
12
17
  end
13
18
 
14
- rbdf = RbDataFrame.read_ndjson(source)
19
+ rbdf =
20
+ RbDataFrame.read_ndjson(
21
+ source,
22
+ ignore_errors,
23
+ schema,
24
+ schema_overrides
25
+ )
15
26
  Utils.wrap_df(rbdf)
16
27
  end
17
28
 
@@ -41,7 +52,7 @@ module Polars
41
52
  # @return [LazyFrame]
42
53
  def scan_ndjson(
43
54
  source,
44
- infer_schema_length: 100,
55
+ infer_schema_length: N_INFER_DEFAULT,
45
56
  batch_size: 1024,
46
57
  n_rows: nil,
47
58
  low_memory: false,
@@ -61,7 +72,7 @@ module Polars
61
72
  n_rows,
62
73
  low_memory,
63
74
  rechunk,
64
- Utils._prepare_row_count_args(row_count_name, row_count_offset)
75
+ Utils.parse_row_index_args(row_count_name, row_count_offset)
65
76
  )
66
77
  Utils.wrap_ldf(rblf)
67
78
  end
@@ -110,7 +110,7 @@ module Polars
110
110
  projection,
111
111
  n_rows,
112
112
  parallel,
113
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
113
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
114
114
  low_memory,
115
115
  use_statistics,
116
116
  rechunk
@@ -178,7 +178,7 @@ module Polars
178
178
 
179
179
  _scan_parquet_impl(
180
180
  source,
181
- n_rows:n_rows,
181
+ n_rows: n_rows,
182
182
  cache: cache,
183
183
  parallel: parallel,
184
184
  rechunk: rechunk,
@@ -202,7 +202,7 @@ module Polars
202
202
  storage_options: nil,
203
203
  low_memory: false,
204
204
  use_statistics: true,
205
- hive_partitioning: true,
205
+ hive_partitioning: nil,
206
206
  glob: true
207
207
  )
208
208
  rblf =
@@ -213,11 +213,12 @@ module Polars
213
213
  cache,
214
214
  parallel,
215
215
  rechunk,
216
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
216
+ Utils.parse_row_index_args(row_count_name, row_count_offset),
217
217
  low_memory,
218
218
  use_statistics,
219
219
  hive_partitioning,
220
220
  nil,
221
+ true,
221
222
  glob
222
223
  )
223
224
  Utils.wrap_ldf(rblf)