polars-df 0.9.0-arm64-darwin → 0.11.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/Cargo.lock +144 -57
  4. data/LICENSE-THIRD-PARTY.txt +629 -29
  5. data/README.md +7 -6
  6. data/lib/polars/3.1/polars.bundle +0 -0
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/array_expr.rb +6 -2
  10. data/lib/polars/batched_csv_reader.rb +11 -3
  11. data/lib/polars/convert.rb +6 -1
  12. data/lib/polars/data_frame.rb +225 -370
  13. data/lib/polars/date_time_expr.rb +11 -4
  14. data/lib/polars/date_time_name_space.rb +14 -4
  15. data/lib/polars/dynamic_group_by.rb +2 -2
  16. data/lib/polars/exceptions.rb +4 -0
  17. data/lib/polars/expr.rb +1171 -54
  18. data/lib/polars/functions/lazy.rb +3 -3
  19. data/lib/polars/functions/range/date_range.rb +92 -0
  20. data/lib/polars/functions/range/datetime_range.rb +149 -0
  21. data/lib/polars/functions/range/time_range.rb +141 -0
  22. data/lib/polars/functions/whenthen.rb +74 -5
  23. data/lib/polars/group_by.rb +88 -23
  24. data/lib/polars/io/avro.rb +24 -0
  25. data/lib/polars/{io.rb → io/csv.rb} +307 -489
  26. data/lib/polars/io/database.rb +73 -0
  27. data/lib/polars/io/ipc.rb +247 -0
  28. data/lib/polars/io/json.rb +18 -0
  29. data/lib/polars/io/ndjson.rb +69 -0
  30. data/lib/polars/io/parquet.rb +226 -0
  31. data/lib/polars/lazy_frame.rb +55 -195
  32. data/lib/polars/lazy_group_by.rb +100 -3
  33. data/lib/polars/list_expr.rb +6 -2
  34. data/lib/polars/rolling_group_by.rb +2 -2
  35. data/lib/polars/series.rb +14 -12
  36. data/lib/polars/string_expr.rb +38 -36
  37. data/lib/polars/utils.rb +89 -1
  38. data/lib/polars/version.rb +1 -1
  39. data/lib/polars/whenthen.rb +83 -0
  40. data/lib/polars.rb +10 -3
  41. metadata +13 -6
  42. data/lib/polars/when.rb +0 -16
  43. data/lib/polars/when_then.rb +0 -19
@@ -46,268 +46,6 @@ module Polars
46
46
  df
47
47
  end
48
48
 
49
- # @private
50
- def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
51
- rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
52
- _from_rbdf(rbdf)
53
- end
54
-
55
- # @private
56
- def self._from_hash(data, schema: nil, schema_overrides: nil)
57
- _from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
58
- end
59
-
60
- # def self._from_records
61
- # end
62
-
63
- # def self._from_numo
64
- # end
65
-
66
- # no self._from_arrow
67
-
68
- # no self._from_pandas
69
-
70
- # @private
71
- def self._read_csv(
72
- file,
73
- has_header: true,
74
- columns: nil,
75
- sep: str = ",",
76
- comment_char: nil,
77
- quote_char: '"',
78
- skip_rows: 0,
79
- dtypes: nil,
80
- null_values: nil,
81
- ignore_errors: false,
82
- parse_dates: false,
83
- n_threads: nil,
84
- infer_schema_length: 100,
85
- batch_size: 8192,
86
- n_rows: nil,
87
- encoding: "utf8",
88
- low_memory: false,
89
- rechunk: true,
90
- skip_rows_after_header: 0,
91
- row_count_name: nil,
92
- row_count_offset: 0,
93
- sample_size: 1024,
94
- eol_char: "\n"
95
- )
96
- if Utils.pathlike?(file)
97
- path = Utils.normalise_filepath(file)
98
- else
99
- path = nil
100
- # if defined?(StringIO) && file.is_a?(StringIO)
101
- # file = file.string
102
- # end
103
- end
104
-
105
- dtype_list = nil
106
- dtype_slice = nil
107
- if !dtypes.nil?
108
- if dtypes.is_a?(Hash)
109
- dtype_list = []
110
- dtypes.each do|k, v|
111
- dtype_list << [k, Utils.rb_type_to_dtype(v)]
112
- end
113
- elsif dtypes.is_a?(::Array)
114
- dtype_slice = dtypes
115
- else
116
- raise ArgumentError, "dtype arg should be list or dict"
117
- end
118
- end
119
-
120
- processed_null_values = Utils._process_null_values(null_values)
121
-
122
- if columns.is_a?(::String)
123
- columns = [columns]
124
- end
125
- if file.is_a?(::String) && file.include?("*")
126
- dtypes_dict = nil
127
- if !dtype_list.nil?
128
- dtypes_dict = dtype_list.to_h
129
- end
130
- if !dtype_slice.nil?
131
- raise ArgumentError, "cannot use glob patterns and unnamed dtypes as `dtypes` argument; Use dtypes: Mapping[str, Type[DataType]"
132
- end
133
- scan = Polars.scan_csv(
134
- file,
135
- has_header: has_header,
136
- sep: sep,
137
- comment_char: comment_char,
138
- quote_char: quote_char,
139
- skip_rows: skip_rows,
140
- dtypes: dtypes_dict,
141
- null_values: null_values,
142
- ignore_errors: ignore_errors,
143
- infer_schema_length: infer_schema_length,
144
- n_rows: n_rows,
145
- low_memory: low_memory,
146
- rechunk: rechunk,
147
- skip_rows_after_header: skip_rows_after_header,
148
- row_count_name: row_count_name,
149
- row_count_offset: row_count_offset,
150
- eol_char: eol_char
151
- )
152
- if columns.nil?
153
- return _from_rbdf(scan.collect._df)
154
- elsif is_str_sequence(columns, allow_str: false)
155
- return _from_rbdf(scan.select(columns).collect._df)
156
- else
157
- raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: List[str]"
158
- end
159
- end
160
-
161
- projection, columns = Utils.handle_projection_columns(columns)
162
-
163
- _from_rbdf(
164
- RbDataFrame.read_csv(
165
- file,
166
- infer_schema_length,
167
- batch_size,
168
- has_header,
169
- ignore_errors,
170
- n_rows,
171
- skip_rows,
172
- projection,
173
- sep,
174
- rechunk,
175
- columns,
176
- encoding,
177
- n_threads,
178
- path,
179
- dtype_list,
180
- dtype_slice,
181
- low_memory,
182
- comment_char,
183
- quote_char,
184
- processed_null_values,
185
- parse_dates,
186
- skip_rows_after_header,
187
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
188
- sample_size,
189
- eol_char
190
- )
191
- )
192
- end
193
-
194
- # @private
195
- def self._read_parquet(
196
- source,
197
- columns: nil,
198
- n_rows: nil,
199
- parallel: "auto",
200
- row_count_name: nil,
201
- row_count_offset: 0,
202
- low_memory: false,
203
- use_statistics: true,
204
- rechunk: true
205
- )
206
- if Utils.pathlike?(source)
207
- source = Utils.normalise_filepath(source)
208
- end
209
- if columns.is_a?(::String)
210
- columns = [columns]
211
- end
212
-
213
- if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
214
- scan =
215
- Polars.scan_parquet(
216
- source,
217
- n_rows: n_rows,
218
- rechunk: true,
219
- parallel: parallel,
220
- row_count_name: row_count_name,
221
- row_count_offset: row_count_offset,
222
- low_memory: low_memory
223
- )
224
-
225
- if columns.nil?
226
- return self._from_rbdf(scan.collect._df)
227
- elsif Utils.is_str_sequence(columns, allow_str: false)
228
- return self._from_rbdf(scan.select(columns).collect._df)
229
- else
230
- raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
231
- end
232
- end
233
-
234
- projection, columns = Utils.handle_projection_columns(columns)
235
- _from_rbdf(
236
- RbDataFrame.read_parquet(
237
- source,
238
- columns,
239
- projection,
240
- n_rows,
241
- parallel,
242
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
243
- low_memory,
244
- use_statistics,
245
- rechunk
246
- )
247
- )
248
- end
249
-
250
- # @private
251
- def self._read_avro(file, columns: nil, n_rows: nil)
252
- if Utils.pathlike?(file)
253
- file = Utils.normalise_filepath(file)
254
- end
255
- projection, columns = Utils.handle_projection_columns(columns)
256
- _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
257
- end
258
-
259
- # @private
260
- def self._read_ipc(
261
- file,
262
- columns: nil,
263
- n_rows: nil,
264
- row_count_name: nil,
265
- row_count_offset: 0,
266
- rechunk: true,
267
- memory_map: true
268
- )
269
- if Utils.pathlike?(file)
270
- file = Utils.normalise_filepath(file)
271
- end
272
- if columns.is_a?(::String)
273
- columns = [columns]
274
- end
275
-
276
- if file.is_a?(::String) && file.include?("*")
277
- raise Todo
278
- end
279
-
280
- projection, columns = Utils.handle_projection_columns(columns)
281
- _from_rbdf(
282
- RbDataFrame.read_ipc(
283
- file,
284
- columns,
285
- projection,
286
- n_rows,
287
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
288
- memory_map
289
- )
290
- )
291
- end
292
-
293
- # @private
294
- def self._read_json(file)
295
- if Utils.pathlike?(file)
296
- file = Utils.normalise_filepath(file)
297
- end
298
-
299
- _from_rbdf(RbDataFrame.read_json(file))
300
- end
301
-
302
- # @private
303
- def self._read_ndjson(file)
304
- if Utils.pathlike?(file)
305
- file = Utils.normalise_filepath(file)
306
- end
307
-
308
- _from_rbdf(RbDataFrame.read_ndjson(file))
309
- end
310
-
311
49
  # Get the shape of the DataFrame.
312
50
  #
313
51
  # @return [Array]
@@ -416,6 +154,13 @@ module Polars
416
154
  _df.dtypes
417
155
  end
418
156
 
157
+ # Get flags that are set on the columns of this DataFrame.
158
+ #
159
+ # @return [Hash]
160
+ def flags
161
+ columns.to_h { |name| [name, self[name].flags] }
162
+ end
163
+
419
164
  # Get the schema.
420
165
  #
421
166
  # @return [Hash]
@@ -814,8 +559,6 @@ module Polars
814
559
 
815
560
  # Serialize to JSON representation.
816
561
  #
817
- # @return [nil]
818
- #
819
562
  # @param file [String]
820
563
  # File path to which the result should be written.
821
564
  # @param pretty [Boolean]
@@ -823,17 +566,45 @@ module Polars
823
566
  # @param row_oriented [Boolean]
824
567
  # Write to row oriented json. This is slower, but more common.
825
568
  #
826
- # @see #write_ndjson
569
+ # @return [nil]
570
+ #
571
+ # @example
572
+ # df = Polars::DataFrame.new(
573
+ # {
574
+ # "foo" => [1, 2, 3],
575
+ # "bar" => [6, 7, 8]
576
+ # }
577
+ # )
578
+ # df.write_json
579
+ # # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
580
+ #
581
+ # @example
582
+ # df.write_json(row_oriented: true)
583
+ # # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
827
584
  def write_json(
828
- file,
585
+ file = nil,
829
586
  pretty: false,
830
587
  row_oriented: false
831
588
  )
832
589
  if Utils.pathlike?(file)
833
- file = Utils.normalise_filepath(file)
590
+ file = Utils.normalize_filepath(file)
591
+ end
592
+ to_string_io = !file.nil? && file.is_a?(StringIO)
593
+ if file.nil? || to_string_io
594
+ buf = StringIO.new
595
+ buf.set_encoding(Encoding::BINARY)
596
+ _df.write_json(buf, pretty, row_oriented)
597
+ json_bytes = buf.string
598
+
599
+ json_str = json_bytes.force_encoding(Encoding::UTF_8)
600
+ if to_string_io
601
+ file.write(json_str)
602
+ else
603
+ return json_str
604
+ end
605
+ else
606
+ _df.write_json(file, pretty, row_oriented)
834
607
  end
835
-
836
- _df.write_json(file, pretty, row_oriented)
837
608
  nil
838
609
  end
839
610
 
@@ -843,12 +614,36 @@ module Polars
843
614
  # File path to which the result should be written.
844
615
  #
845
616
  # @return [nil]
846
- def write_ndjson(file)
617
+ #
618
+ # @example
619
+ # df = Polars::DataFrame.new(
620
+ # {
621
+ # "foo" => [1, 2, 3],
622
+ # "bar" => [6, 7, 8]
623
+ # }
624
+ # )
625
+ # df.write_ndjson()
626
+ # # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
627
+ def write_ndjson(file = nil)
847
628
  if Utils.pathlike?(file)
848
- file = Utils.normalise_filepath(file)
629
+ file = Utils.normalize_filepath(file)
630
+ end
631
+ to_string_io = !file.nil? && file.is_a?(StringIO)
632
+ if file.nil? || to_string_io
633
+ buf = StringIO.new
634
+ buf.set_encoding(Encoding::BINARY)
635
+ _df.write_ndjson(buf)
636
+ json_bytes = buf.string
637
+
638
+ json_str = json_bytes.force_encoding(Encoding::UTF_8)
639
+ if to_string_io
640
+ file.write(json_str)
641
+ else
642
+ return json_str
643
+ end
644
+ else
645
+ _df.write_ndjson(file)
849
646
  end
850
-
851
- _df.write_ndjson(file)
852
647
  nil
853
648
  end
854
649
 
@@ -938,7 +733,7 @@ module Polars
938
733
  end
939
734
 
940
735
  if Utils.pathlike?(file)
941
- file = Utils.normalise_filepath(file)
736
+ file = Utils.normalize_filepath(file)
942
737
  end
943
738
 
944
739
  _df.write_csv(
@@ -976,7 +771,7 @@ module Polars
976
771
  compression = "uncompressed"
977
772
  end
978
773
  if Utils.pathlike?(file)
979
- file = Utils.normalise_filepath(file)
774
+ file = Utils.normalize_filepath(file)
980
775
  end
981
776
 
982
777
  _df.write_avro(file, compression)
@@ -997,7 +792,7 @@ module Polars
997
792
  file.set_encoding(Encoding::BINARY)
998
793
  end
999
794
  if Utils.pathlike?(file)
1000
- file = Utils.normalise_filepath(file)
795
+ file = Utils.normalize_filepath(file)
1001
796
  end
1002
797
 
1003
798
  if compression.nil?
@@ -1008,9 +803,50 @@ module Polars
1008
803
  return_bytes ? file.string : nil
1009
804
  end
1010
805
 
806
+ # Write to Arrow IPC record batch stream.
807
+ #
808
+ # See "Streaming format" in https://arrow.apache.org/docs/python/ipc.html.
809
+ #
810
+ # @param file [Object]
811
+ # Path or writable file-like object to which the IPC record batch data will
812
+ # be written. If set to `None`, the output is returned as a BytesIO object.
813
+ # @param compression ['uncompressed', 'lz4', 'zstd']
814
+ # Compression method. Defaults to "uncompressed".
815
+ #
816
+ # @return [Object]
817
+ #
818
+ # @example
819
+ # df = Polars::DataFrame.new(
820
+ # {
821
+ # "foo" => [1, 2, 3, 4, 5],
822
+ # "bar" => [6, 7, 8, 9, 10],
823
+ # "ham" => ["a", "b", "c", "d", "e"]
824
+ # }
825
+ # )
826
+ # df.write_ipc_stream("new_file.arrow")
827
+ def write_ipc_stream(
828
+ file,
829
+ compression: "uncompressed"
830
+ )
831
+ return_bytes = file.nil?
832
+ if return_bytes
833
+ file = StringIO.new
834
+ file.set_encoding(Encoding::BINARY)
835
+ elsif Utils.pathlike?(file)
836
+ file = Utils.normalize_filepath(file)
837
+ end
838
+
839
+ if compression.nil?
840
+ compression = "uncompressed"
841
+ end
842
+
843
+ _df.write_ipc_stream(file, compression)
844
+ return_bytes ? file.string : nil
845
+ end
846
+
1011
847
  # Write to Apache Parquet file.
1012
848
  #
1013
- # @param file [String]
849
+ # @param file [String, Pathname, StringIO]
1014
850
  # File path to which the file should be written.
1015
851
  # @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
1016
852
  # Choose "zstd" for good compression performance.
@@ -1027,10 +863,9 @@ module Polars
1027
863
  # @param statistics [Boolean]
1028
864
  # Write statistics to the parquet headers. This requires extra compute.
1029
865
  # @param row_group_size [Integer, nil]
1030
- # Size of the row groups in number of rows.
1031
- # If `nil` (default), the chunks of the DataFrame are
1032
- # used. Writing in smaller chunks may reduce memory pressure and improve
1033
- # writing speeds.
866
+ # Size of the row groups in number of rows. Defaults to 512^2 rows.
867
+ # @param data_page_size [Integer, nil]
868
+ # Size of the data page in bytes. Defaults to 1024^2 bytes.
1034
869
  #
1035
870
  # @return [nil]
1036
871
  def write_parquet(
@@ -1038,17 +873,18 @@ module Polars
1038
873
  compression: "zstd",
1039
874
  compression_level: nil,
1040
875
  statistics: false,
1041
- row_group_size: nil
876
+ row_group_size: nil,
877
+ data_page_size: nil
1042
878
  )
1043
879
  if compression.nil?
1044
880
  compression = "uncompressed"
1045
881
  end
1046
882
  if Utils.pathlike?(file)
1047
- file = Utils.normalise_filepath(file)
883
+ file = Utils.normalize_filepath(file)
1048
884
  end
1049
885
 
1050
886
  _df.write_parquet(
1051
- file, compression, compression_level, statistics, row_group_size
887
+ file, compression, compression_level, statistics, row_group_size, data_page_size
1052
888
  )
1053
889
  end
1054
890
 
@@ -1084,7 +920,7 @@ module Polars
1084
920
  # df.estimated_size
1085
921
  # # => 25888898
1086
922
  # df.estimated_size("mb")
1087
- # # => 26.702880859375
923
+ # # => 17.0601749420166
1088
924
  def estimated_size(unit = "b")
1089
925
  sz = _df.estimated_size
1090
926
  Utils.scale_bytes(sz, to: unit)
@@ -1720,10 +1556,7 @@ module Polars
1720
1556
  # # │ 3 ┆ 8 ┆ c │
1721
1557
  # # └─────┴─────┴─────┘
1722
1558
  def drop_nulls(subset: nil)
1723
- if subset.is_a?(::String)
1724
- subset = [subset]
1725
- end
1726
- _from_rbdf(_df.drop_nulls(subset))
1559
+ lazy.drop_nulls(subset: subset).collect(_eager: true)
1727
1560
  end
1728
1561
 
1729
1562
  # Offers a structured way to apply a sequence of user-defined functions (UDFs).
@@ -1785,16 +1618,16 @@ module Polars
1785
1618
  # df.with_row_index
1786
1619
  # # =>
1787
1620
  # # shape: (3, 3)
1788
- # # ┌────────┬─────┬─────┐
1789
- # # │ row_nr ┆ a ┆ b │
1790
- # # │ --- ┆ --- ┆ --- │
1791
- # # │ u32 ┆ i64 ┆ i64 │
1792
- # # ╞════════╪═════╪═════╡
1793
- # # │ 0 ┆ 1 ┆ 2 │
1794
- # # │ 1 ┆ 3 ┆ 4 │
1795
- # # │ 2 ┆ 5 ┆ 6 │
1796
- # # └────────┴─────┴─────┘
1797
- def with_row_index(name: "row_nr", offset: 0)
1621
+ # # ┌───────┬─────┬─────┐
1622
+ # # │ index ┆ a ┆ b │
1623
+ # # │ --- ┆ --- ┆ --- │
1624
+ # # │ u32 ┆ i64 ┆ i64 │
1625
+ # # ╞═══════╪═════╪═════╡
1626
+ # # │ 0 ┆ 1 ┆ 2 │
1627
+ # # │ 1 ┆ 3 ┆ 4 │
1628
+ # # │ 2 ┆ 5 ┆ 6 │
1629
+ # # └───────┴─────┴─────┘
1630
+ def with_row_index(name: "index", offset: 0)
1798
1631
  _from_rbdf(_df.with_row_index(name, offset))
1799
1632
  end
1800
1633
  alias_method :with_row_count, :with_row_index
@@ -2083,16 +1916,16 @@ module Polars
2083
1916
  # )
2084
1917
  # # =>
2085
1918
  # # shape: (4, 3)
2086
- # # ┌─────────────────────┬────────────┬───────────────────────────────────┐
2087
- # # │ time ┆ time_count ┆ time_agg_list
2088
- # # │ --- ┆ --- ┆ ---
2089
- # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
2090
- # # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
2091
- # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16…
2092
- # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16…
2093
- # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16…
2094
- # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
2095
- # # └─────────────────────┴────────────┴───────────────────────────────────┘
1919
+ # # ┌─────────────────────┬────────────┬─────────────────────────────────┐
1920
+ # # │ time ┆ time_count ┆ time_agg_list
1921
+ # # │ --- ┆ --- ┆ ---
1922
+ # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
1923
+ # # ╞═════════════════════╪════════════╪═════════════════════════════════╡
1924
+ # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-…
1925
+ # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-…
1926
+ # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-…
1927
+ # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
1928
+ # # └─────────────────────┴────────────┴─────────────────────────────────┘
2096
1929
  #
2097
1930
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
2098
1931
  # df.group_by_dynamic("time", every: "1h", closed: "both").agg(
@@ -2161,12 +1994,13 @@ module Polars
2161
1994
  # closed: "right"
2162
1995
  # ).agg(Polars.col("A").alias("A_agg_list"))
2163
1996
  # # =>
2164
- # # shape: (3, 4)
1997
+ # # shape: (4, 4)
2165
1998
  # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
2166
1999
  # # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
2167
2000
  # # │ --- ┆ --- ┆ --- ┆ --- │
2168
2001
  # # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
2169
2002
  # # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
2003
+ # # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
2170
2004
  # # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
2171
2005
  # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
2172
2006
  # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
@@ -2566,7 +2400,7 @@ module Polars
2566
2400
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [-1, 5, 8]})
2567
2401
  #
2568
2402
  # @example Return a DataFrame by mapping each row to a tuple:
2569
- # df.apply { |t| [t[0] * 2, t[1] * 3] }
2403
+ # df.map_rows { |t| [t[0] * 2, t[1] * 3] }
2570
2404
  # # =>
2571
2405
  # # shape: (3, 2)
2572
2406
  # # ┌──────────┬──────────┐
@@ -2580,7 +2414,7 @@ module Polars
2580
2414
  # # └──────────┴──────────┘
2581
2415
  #
2582
2416
  # @example Return a Series by mapping each row to a scalar:
2583
- # df.apply { |t| t[0] * 2 + t[1] }
2417
+ # df.map_rows { |t| t[0] * 2 + t[1] }
2584
2418
  # # =>
2585
2419
  # # shape: (3, 1)
2586
2420
  # # ┌───────┐
@@ -2592,14 +2426,15 @@ module Polars
2592
2426
  # # │ 9 │
2593
2427
  # # │ 14 │
2594
2428
  # # └───────┘
2595
- def apply(return_dtype: nil, inference_size: 256, &f)
2596
- out, is_df = _df.apply(f, return_dtype, inference_size)
2429
+ def map_rows(return_dtype: nil, inference_size: 256, &f)
2430
+ out, is_df = _df.map_rows(f, return_dtype, inference_size)
2597
2431
  if is_df
2598
2432
  _from_rbdf(out)
2599
2433
  else
2600
2434
  _from_rbdf(Utils.wrap_s(out).to_frame._df)
2601
2435
  end
2602
2436
  end
2437
+ alias_method :apply, :map_rows
2603
2438
 
2604
2439
  # Return a new DataFrame with the column added or replaced.
2605
2440
  #
@@ -2621,26 +2456,26 @@ module Polars
2621
2456
  # # ┌─────┬─────┬───────────┐
2622
2457
  # # │ a ┆ b ┆ b_squared │
2623
2458
  # # │ --- ┆ --- ┆ --- │
2624
- # # │ i64 ┆ i64 ┆ f64
2459
+ # # │ i64 ┆ i64 ┆ i64
2625
2460
  # # ╞═════╪═════╪═══════════╡
2626
- # # │ 1 ┆ 2 ┆ 4.0
2627
- # # │ 3 ┆ 4 ┆ 16.0
2628
- # # │ 5 ┆ 6 ┆ 36.0
2461
+ # # │ 1 ┆ 2 ┆ 4
2462
+ # # │ 3 ┆ 4 ┆ 16
2463
+ # # │ 5 ┆ 6 ┆ 36
2629
2464
  # # └─────┴─────┴───────────┘
2630
2465
  #
2631
2466
  # @example Replaced
2632
2467
  # df.with_column(Polars.col("a") ** 2)
2633
2468
  # # =>
2634
2469
  # # shape: (3, 2)
2635
- # # ┌──────┬─────┐
2636
- # # │ a ┆ b │
2637
- # # │ --- ┆ --- │
2638
- # # │ f64 ┆ i64 │
2639
- # # ╞══════╪═════╡
2640
- # # │ 1.0 ┆ 2 │
2641
- # # │ 9.0 ┆ 4 │
2642
- # # │ 25.0 ┆ 6 │
2643
- # # └──────┴─────┘
2470
+ # # ┌─────┬─────┐
2471
+ # # │ a ┆ b │
2472
+ # # │ --- ┆ --- │
2473
+ # # │ i64 ┆ i64 │
2474
+ # # ╞═════╪═════╡
2475
+ # # │ 1 ┆ 2 │
2476
+ # # │ 9 ┆ 4 │
2477
+ # # │ 25 ┆ 6 │
2478
+ # # └─────┴─────┘
2644
2479
  def with_column(column)
2645
2480
  lazy
2646
2481
  .with_column(column)
@@ -2807,16 +2642,36 @@ module Polars
2807
2642
  # # │ 2 ┆ 7.0 │
2808
2643
  # # │ 3 ┆ 8.0 │
2809
2644
  # # └─────┴─────┘
2810
- def drop(columns)
2811
- if columns.is_a?(::Array)
2812
- df = clone
2813
- columns.each do |n|
2814
- df._df.drop_in_place(n)
2815
- end
2816
- df
2817
- else
2818
- _from_rbdf(_df.drop(columns))
2819
- end
2645
+ #
2646
+ # @example Drop multiple columns by passing a list of column names.
2647
+ # df.drop(["bar", "ham"])
2648
+ # # =>
2649
+ # # shape: (3, 1)
2650
+ # # ┌─────┐
2651
+ # # │ foo │
2652
+ # # │ --- │
2653
+ # # │ i64 │
2654
+ # # ╞═════╡
2655
+ # # │ 1 │
2656
+ # # │ 2 │
2657
+ # # │ 3 │
2658
+ # # └─────┘
2659
+ #
2660
+ # @example Use positional arguments to drop multiple columns.
2661
+ # df.drop("foo", "ham")
2662
+ # # =>
2663
+ # # shape: (3, 1)
2664
+ # # ┌─────┐
2665
+ # # │ bar │
2666
+ # # │ --- │
2667
+ # # │ f64 │
2668
+ # # ╞═════╡
2669
+ # # │ 6.0 │
2670
+ # # │ 7.0 │
2671
+ # # │ 8.0 │
2672
+ # # └─────┘
2673
+ def drop(*columns)
2674
+ lazy.drop(*columns).collect(_eager: true)
2820
2675
  end
2821
2676
 
2822
2677
  # Drop in place.
@@ -3700,7 +3555,7 @@ module Polars
3700
3555
  # # ┌─────────┐
3701
3556
  # # │ literal │
3702
3557
  # # │ --- │
3703
- # # │ i64
3558
+ # # │ i32
3704
3559
  # # ╞═════════╡
3705
3560
  # # │ 0 │
3706
3561
  # # │ 0 │
@@ -3735,16 +3590,16 @@ module Polars
3735
3590
  # df.with_columns((Polars.col("a") ** 2).alias("a^2"))
3736
3591
  # # =>
3737
3592
  # # shape: (4, 4)
3738
- # # ┌─────┬──────┬───────┬──────┐
3739
- # # │ a ┆ b ┆ c ┆ a^2
3740
- # # │ --- ┆ --- ┆ --- ┆ ---
3741
- # # │ i64 ┆ f64 ┆ bool ┆ f64
3742
- # # ╞═════╪══════╪═══════╪══════╡
3743
- # # │ 1 ┆ 0.5 ┆ true ┆ 1.0
3744
- # # │ 2 ┆ 4.0 ┆ true ┆ 4.0
3745
- # # │ 3 ┆ 10.0 ┆ false ┆ 9.0
3746
- # # │ 4 ┆ 13.0 ┆ true ┆ 16.0
3747
- # # └─────┴──────┴───────┴──────┘
3593
+ # # ┌─────┬──────┬───────┬─────┐
3594
+ # # │ a ┆ b ┆ c ┆ a^2
3595
+ # # │ --- ┆ --- ┆ --- ┆ ---
3596
+ # # │ i64 ┆ f64 ┆ bool ┆ i64
3597
+ # # ╞═════╪══════╪═══════╪═════╡
3598
+ # # │ 1 ┆ 0.5 ┆ true ┆ 1
3599
+ # # │ 2 ┆ 4.0 ┆ true ┆ 4
3600
+ # # │ 3 ┆ 10.0 ┆ false ┆ 9
3601
+ # # │ 4 ┆ 13.0 ┆ true ┆ 16
3602
+ # # └─────┴──────┴───────┴─────┘
3748
3603
  #
3749
3604
  # @example Added columns will replace existing columns with the same name.
3750
3605
  # df.with_columns(Polars.col("a").cast(Polars::Float64))
@@ -3771,16 +3626,16 @@ module Polars
3771
3626
  # )
3772
3627
  # # =>
3773
3628
  # # shape: (4, 6)
3774
- # # ┌─────┬──────┬───────┬──────┬──────┬───────┐
3775
- # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
3776
- # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3777
- # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
3778
- # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
3779
- # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
3780
- # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
3781
- # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
3782
- # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
3783
- # # └─────┴──────┴───────┴──────┴──────┴───────┘
3629
+ # # ┌─────┬──────┬───────┬─────┬──────┬───────┐
3630
+ # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
3631
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3632
+ # # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
3633
+ # # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
3634
+ # # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
3635
+ # # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
3636
+ # # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
3637
+ # # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
3638
+ # # └─────┴──────┴───────┴─────┴──────┴───────┘
3784
3639
  #
3785
3640
  # @example Multiple columns also can be added using positional arguments instead of a list.
3786
3641
  # df.with_columns(
@@ -3790,16 +3645,16 @@ module Polars
3790
3645
  # )
3791
3646
  # # =>
3792
3647
  # # shape: (4, 6)
3793
- # # ┌─────┬──────┬───────┬──────┬──────┬───────┐
3794
- # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
3795
- # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3796
- # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
3797
- # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
3798
- # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
3799
- # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
3800
- # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
3801
- # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
3802
- # # └─────┴──────┴───────┴──────┴──────┴───────┘
3648
+ # # ┌─────┬──────┬───────┬─────┬──────┬───────┐
3649
+ # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
3650
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
3651
+ # # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
3652
+ # # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
3653
+ # # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
3654
+ # # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
3655
+ # # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
3656
+ # # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
3657
+ # # └─────┴──────┴───────┴─────┴──────┴───────┘
3803
3658
  #
3804
3659
  # @example Use keyword arguments to easily name your expression inputs.
3805
3660
  # df.with_columns(
@@ -5181,7 +5036,7 @@ module Polars
5181
5036
  elsif data[0].is_a?(Hash)
5182
5037
  column_names, dtypes = _unpack_schema(columns)
5183
5038
  schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
5184
- rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
5039
+ rbdf = RbDataFrame.from_hashes(data, schema, schema_overrides, false, infer_schema_length)
5185
5040
  if column_names
5186
5041
  rbdf = _post_apply_columns(rbdf, column_names)
5187
5042
  end
@@ -5215,7 +5070,7 @@ module Polars
5215
5070
  if unpack_nested
5216
5071
  raise Todo
5217
5072
  else
5218
- rbdf = RbDataFrame.read_rows(
5073
+ rbdf = RbDataFrame.from_rows(
5219
5074
  data,
5220
5075
  infer_schema_length,
5221
5076
  local_schema_override.any? ? local_schema_override : nil