polars-df 0.3.1-x86_64-linux → 0.5.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
@@ -18,7 +18,10 @@ module Polars
18
18
  # Whether to interpret two-dimensional data as columns or as rows. If `nil`,
19
19
  # the orientation is inferred by matching the columns and data dimensions. If
20
20
  # this does not yield conclusive results, column orientation is used.
21
- def initialize(data = nil, columns: nil, orient: nil)
21
+ def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
22
+ schema ||= columns
23
+ raise Todo if schema_overrides
24
+
22
25
  # TODO deprecate in favor of read_sql
23
26
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
24
27
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
@@ -29,14 +32,14 @@ module Polars
29
32
  end
30
33
 
31
34
  if data.nil?
32
- self._df = self.class.hash_to_rbdf({}, columns: columns)
35
+ self._df = self.class.hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
33
36
  elsif data.is_a?(Hash)
34
37
  data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
35
- self._df = self.class.hash_to_rbdf(data, columns: columns)
38
+ self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
36
39
  elsif data.is_a?(Array)
37
- self._df = self.class.sequence_to_rbdf(data, columns: columns, orient: orient)
40
+ self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
38
41
  elsif data.is_a?(Series)
39
- self._df = self.class.series_to_rbdf(data, columns: columns)
42
+ self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
40
43
  else
41
44
  raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
42
45
  end
@@ -56,8 +59,8 @@ module Polars
56
59
  end
57
60
 
58
61
  # @private
59
- def self._from_hash(data, columns: nil)
60
- _from_rbdf(hash_to_rbdf(data, columns: columns))
62
+ def self._from_hash(data, schema: nil, schema_overrides: nil)
63
+ _from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
61
64
  end
62
65
 
63
66
  # def self._from_records
@@ -97,7 +100,7 @@ module Polars
97
100
  eol_char: "\n"
98
101
  )
99
102
  if Utils.pathlike?(file)
100
- path = Utils.format_path(file)
103
+ path = Utils.normalise_filepath(file)
101
104
  else
102
105
  path = nil
103
106
  # if defined?(StringIO) && file.is_a?(StringIO)
@@ -196,32 +199,56 @@ module Polars
196
199
 
197
200
  # @private
198
201
  def self._read_parquet(
199
- file,
202
+ source,
200
203
  columns: nil,
201
204
  n_rows: nil,
202
205
  parallel: "auto",
203
206
  row_count_name: nil,
204
207
  row_count_offset: 0,
205
- low_memory: false
208
+ low_memory: false,
209
+ use_statistics: true,
210
+ rechunk: true
206
211
  )
207
- if Utils.pathlike?(file)
208
- file = Utils.format_path(file)
212
+ if Utils.pathlike?(source)
213
+ source = Utils.normalise_filepath(source)
214
+ end
215
+ if columns.is_a?(String)
216
+ columns = [columns]
209
217
  end
210
218
 
211
- if file.is_a?(String) && file.include?("*")
212
- raise Todo
219
+ if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
220
+ scan =
221
+ Polars.scan_parquet(
222
+ source,
223
+ n_rows: n_rows,
224
+ rechunk: true,
225
+ parallel: parallel,
226
+ row_count_name: row_count_name,
227
+ row_count_offset: row_count_offset,
228
+ low_memory: low_memory
229
+ )
230
+
231
+ if columns.nil?
232
+ return self._from_rbdf(scan.collect._df)
233
+ elsif Utils.is_str_sequence(columns, allow_str: false)
234
+ return self._from_rbdf(scan.select(columns).collect._df)
235
+ else
236
+ raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
237
+ end
213
238
  end
214
239
 
215
240
  projection, columns = Utils.handle_projection_columns(columns)
216
241
  _from_rbdf(
217
242
  RbDataFrame.read_parquet(
218
- file,
243
+ source,
219
244
  columns,
220
245
  projection,
221
246
  n_rows,
222
247
  parallel,
223
248
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
224
- low_memory
249
+ low_memory,
250
+ use_statistics,
251
+ rechunk
225
252
  )
226
253
  )
227
254
  end
@@ -229,7 +256,7 @@ module Polars
229
256
  # @private
230
257
  def self._read_avro(file, columns: nil, n_rows: nil)
231
258
  if Utils.pathlike?(file)
232
- file = Utils.format_path(file)
259
+ file = Utils.normalise_filepath(file)
233
260
  end
234
261
  projection, columns = Utils.handle_projection_columns(columns)
235
262
  _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
@@ -246,7 +273,7 @@ module Polars
246
273
  memory_map: true
247
274
  )
248
275
  if Utils.pathlike?(file)
249
- file = Utils.format_path(file)
276
+ file = Utils.normalise_filepath(file)
250
277
  end
251
278
  if columns.is_a?(String)
252
279
  columns = [columns]
@@ -272,7 +299,7 @@ module Polars
272
299
  # @private
273
300
  def self._read_json(file)
274
301
  if Utils.pathlike?(file)
275
- file = Utils.format_path(file)
302
+ file = Utils.normalise_filepath(file)
276
303
  end
277
304
 
278
305
  _from_rbdf(RbDataFrame.read_json(file))
@@ -281,7 +308,7 @@ module Polars
281
308
  # @private
282
309
  def self._read_ndjson(file)
283
310
  if Utils.pathlike?(file)
284
- file = Utils.format_path(file)
311
+ file = Utils.normalise_filepath(file)
285
312
  end
286
313
 
287
314
  _from_rbdf(RbDataFrame.read_ndjson(file))
@@ -312,6 +339,7 @@ module Polars
312
339
  end
313
340
  alias_method :count, :height
314
341
  alias_method :length, :height
342
+ alias_method :size, :height
315
343
 
316
344
  # Get the width of the DataFrame.
317
345
  #
@@ -522,6 +550,13 @@ module Polars
522
550
  end
523
551
  alias_method :inspect, :to_s
524
552
 
553
+ # Returns an array representing the DataFrame
554
+ #
555
+ # @return [Array]
556
+ def to_a
557
+ rows(named: true)
558
+ end
559
+
525
560
  # Check if DataFrame includes column.
526
561
  #
527
562
  # @return [Boolean]
@@ -631,7 +666,7 @@ module Polars
631
666
  end
632
667
 
633
668
  # Ruby-specific
634
- if item.is_a?(Expr)
669
+ if item.is_a?(Expr) || item.is_a?(Series)
635
670
  return filter(item)
636
671
  end
637
672
 
@@ -641,15 +676,42 @@ module Polars
641
676
  # Set item.
642
677
  #
643
678
  # @return [Object]
644
- #
645
- # def []=(key, value)
646
- # if key.is_a?(String)
647
- # raise TypeError, "'DataFrame' object does not support 'Series' assignment by index. Use 'DataFrame.with_columns'"
648
- # end
679
+ def []=(*key, value)
680
+ if key.length == 1
681
+ key = key.first
682
+ elsif key.length != 2
683
+ raise ArgumentError, "wrong number of arguments (given #{key.length + 1}, expected 2..3)"
684
+ end
649
685
 
650
- # raise Todo
651
- # end
686
+ if Utils.strlike?(key)
687
+ if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
688
+ value = Series.new(value)
689
+ elsif !value.is_a?(Series)
690
+ value = Polars.lit(value)
691
+ end
692
+ self._df = with_column(value.alias(key.to_s))._df
693
+ elsif key.is_a?(Array)
694
+ row_selection, col_selection = key
695
+
696
+ if Utils.strlike?(col_selection)
697
+ s = self[col_selection]
698
+ elsif col_selection.is_a?(Integer)
699
+ raise Todo
700
+ else
701
+ raise ArgumentError, "column selection not understood: #{col_selection}"
702
+ end
703
+
704
+ s[row_selection] = value
652
705
 
706
+ if col_selection.is_a?(Integer)
707
+ replace_at_idx(col_selection, s)
708
+ elsif Utils.strlike?(col_selection)
709
+ replace(col_selection, s)
710
+ end
711
+ else
712
+ raise Todo
713
+ end
714
+ end
653
715
 
654
716
  # Return the dataframe as a scalar.
655
717
  #
@@ -774,7 +836,7 @@ module Polars
774
836
  row_oriented: false
775
837
  )
776
838
  if Utils.pathlike?(file)
777
- file = Utils.format_path(file)
839
+ file = Utils.normalise_filepath(file)
778
840
  end
779
841
 
780
842
  _df.write_json(file, pretty, row_oriented)
@@ -789,7 +851,7 @@ module Polars
789
851
  # @return [nil]
790
852
  def write_ndjson(file)
791
853
  if Utils.pathlike?(file)
792
- file = Utils.format_path(file)
854
+ file = Utils.normalise_filepath(file)
793
855
  end
794
856
 
795
857
  _df.write_ndjson(file)
@@ -879,7 +941,7 @@ module Polars
879
941
  end
880
942
 
881
943
  if Utils.pathlike?(file)
882
- file = Utils.format_path(file)
944
+ file = Utils.normalise_filepath(file)
883
945
  end
884
946
 
885
947
  _df.write_csv(
@@ -917,7 +979,7 @@ module Polars
917
979
  compression = "uncompressed"
918
980
  end
919
981
  if Utils.pathlike?(file)
920
- file = Utils.format_path(file)
982
+ file = Utils.normalise_filepath(file)
921
983
  end
922
984
 
923
985
  _df.write_avro(file, compression)
@@ -936,7 +998,7 @@ module Polars
936
998
  compression = "uncompressed"
937
999
  end
938
1000
  if Utils.pathlike?(file)
939
- file = Utils.format_path(file)
1001
+ file = Utils.normalise_filepath(file)
940
1002
  end
941
1003
 
942
1004
  _df.write_ipc(file, compression)
@@ -978,7 +1040,7 @@ module Polars
978
1040
  compression = "uncompressed"
979
1041
  end
980
1042
  if Utils.pathlike?(file)
981
- file = Utils.format_path(file)
1043
+ file = Utils.normalise_filepath(file)
982
1044
  end
983
1045
 
984
1046
  _df.write_parquet(
@@ -1438,6 +1500,20 @@ module Polars
1438
1500
  end
1439
1501
  end
1440
1502
 
1503
+ # Sort the DataFrame by column in-place.
1504
+ #
1505
+ # @param by [String]
1506
+ # By which column to sort.
1507
+ # @param reverse [Boolean]
1508
+ # Reverse/descending sort.
1509
+ # @param nulls_last [Boolean]
1510
+ # Place null values last. Can only be used if sorted by a single column.
1511
+ #
1512
+ # @return [DataFrame]
1513
+ def sort!(by, reverse: false, nulls_last: false)
1514
+ self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
1515
+ end
1516
+
1441
1517
  # Check if DataFrame is equal to other.
1442
1518
  #
1443
1519
  # @param other [DataFrame]
@@ -1495,7 +1571,7 @@ module Polars
1495
1571
  # # │ 30 ┆ 6 │
1496
1572
  # # └─────┴─────┘
1497
1573
  def replace(column, new_col)
1498
- _df.replace(column, new_col._s)
1574
+ _df.replace(column.to_s, new_col._s)
1499
1575
  self
1500
1576
  end
1501
1577
 
@@ -1836,7 +1912,7 @@ module Polars
1836
1912
  # "2020-01-08 23:16:43"
1837
1913
  # ]
1838
1914
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1839
- # Polars.col("dt").str.strptime(:datetime)
1915
+ # Polars.col("dt").str.strptime(Polars::Datetime)
1840
1916
  # )
1841
1917
  # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1842
1918
  # [
@@ -2767,6 +2843,16 @@ module Polars
2767
2843
  Utils.wrap_s(_df.drop_in_place(name))
2768
2844
  end
2769
2845
 
2846
+ # Drop in place if exists.
2847
+ #
2848
+ # @param name [Object]
2849
+ # Column to drop.
2850
+ #
2851
+ # @return [Series]
2852
+ def delete(name)
2853
+ drop_in_place(name) if include?(name)
2854
+ end
2855
+
2770
2856
  # Create an empty copy of the current DataFrame.
2771
2857
  #
2772
2858
  # Returns a DataFrame with identical schema but no data.
@@ -3042,24 +3128,28 @@ module Polars
3042
3128
  if aggregate_fn.is_a?(String)
3043
3129
  case aggregate_fn
3044
3130
  when "first"
3045
- aggregate_fn = Polars.element.first
3131
+ aggregate_expr = Polars.element.first._rbexpr
3046
3132
  when "sum"
3047
- aggregate_fn = Polars.element.sum
3133
+ aggregate_expr = Polars.element.sum._rbexpr
3048
3134
  when "max"
3049
- aggregate_fn = Polars.element.max
3135
+ aggregate_expr = Polars.element.max._rbexpr
3050
3136
  when "min"
3051
- aggregate_fn = Polars.element.min
3137
+ aggregate_expr = Polars.element.min._rbexpr
3052
3138
  when "mean"
3053
- aggregate_fn = Polars.element.mean
3139
+ aggregate_expr = Polars.element.mean._rbexpr
3054
3140
  when "median"
3055
- aggregate_fn = Polars.element.median
3141
+ aggregate_expr = Polars.element.median._rbexpr
3056
3142
  when "last"
3057
- aggregate_fn = Polars.element.last
3143
+ aggregate_expr = Polars.element.last._rbexpr
3058
3144
  when "count"
3059
- aggregate_fn = Polars.count
3145
+ aggregate_expr = Polars.count._rbexpr
3060
3146
  else
3061
3147
  raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
3062
3148
  end
3149
+ elsif aggregate_fn.nil?
3150
+ aggregate_expr = nil
3151
+ else
3152
+ aggregate_expr = aggregate_function._rbexpr
3063
3153
  end
3064
3154
 
3065
3155
  _from_rbdf(
@@ -3067,9 +3157,9 @@ module Polars
3067
3157
  values,
3068
3158
  index,
3069
3159
  columns,
3070
- aggregate_fn._rbexpr,
3071
3160
  maintain_order,
3072
3161
  sort_columns,
3162
+ aggregate_expr,
3073
3163
  separator
3074
3164
  )
3075
3165
  )
@@ -3174,7 +3264,7 @@ module Polars
3174
3264
  # # │ B ┆ 1 │
3175
3265
  # # │ C ┆ 2 │
3176
3266
  # # │ D ┆ 3 │
3177
- # # │ ... ...
3267
+ # # │ E 4
3178
3268
  # # │ F ┆ 5 │
3179
3269
  # # │ G ┆ 6 │
3180
3270
  # # │ H ┆ 7 │
@@ -4053,15 +4143,12 @@ module Polars
4053
4143
  # # │ 5 ┆ 3.0 ┆ true │
4054
4144
  # # └─────┴─────┴───────┘
4055
4145
  def unique(maintain_order: true, subset: nil, keep: "first")
4056
- if !subset.nil?
4057
- if subset.is_a?(String)
4058
- subset = [subset]
4059
- elsif !subset.is_a?(Array)
4060
- subset = subset.to_a
4061
- end
4062
- end
4063
-
4064
- _from_rbdf(_df.unique(maintain_order, subset, keep))
4146
+ self._from_rbdf(
4147
+ lazy
4148
+ .unique(maintain_order: maintain_order, subset: subset, keep: keep)
4149
+ .collect(no_optimization: true)
4150
+ ._df
4151
+ )
4065
4152
  end
4066
4153
 
4067
4154
  # Return the number of unique rows, or the number of unique row-subsets.
@@ -4403,7 +4490,7 @@ module Polars
4403
4490
  end
4404
4491
  end
4405
4492
 
4406
- # Returns an iterator over the DataFrame of rows of python-native values.
4493
+ # Returns an iterator over the DataFrame of rows of Ruby-native values.
4407
4494
  #
4408
4495
  # @param named [Boolean]
4409
4496
  # Return hashes instead of arrays. The hashes are a mapping of
@@ -4464,6 +4551,24 @@ module Polars
4464
4551
  end
4465
4552
  end
4466
4553
 
4554
+ # Returns an iterator over the DataFrame of rows of Ruby-native values.
4555
+ #
4556
+ # @param named [Boolean]
4557
+ # Return hashes instead of arrays. The hashes are a mapping of
4558
+ # column name to row value. This is more expensive than returning an
4559
+ # array, but allows for accessing values by column name.
4560
+ # @param buffer_size [Integer]
4561
+ # Determines the number of rows that are buffered internally while iterating
4562
+ # over the data; you should only modify this in very specific cases where the
4563
+ # default value is determined not to be a good fit to your access pattern, as
4564
+ # the speedup from using the buffer is significant (~2-4x). Setting this
4565
+ # value to zero disables row buffering.
4566
+ #
4567
+ # @return [Object]
4568
+ def each_row(named: true, buffer_size: 500, &block)
4569
+ iter_rows(named: named, buffer_size: buffer_size, &block)
4570
+ end
4571
+
4467
4572
  # Shrink DataFrame memory usage.
4468
4573
  #
4469
4574
  # Shrinks to fit the exact capacity needed to hold the data.
@@ -4717,20 +4822,63 @@ module Polars
4717
4822
  end
4718
4823
 
4719
4824
  # @private
4720
- def self.hash_to_rbdf(data, columns: nil)
4721
- if !columns.nil?
4722
- columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
4825
+ def self.expand_hash_scalars(data, schema_overrides: nil, order: nil, nan_to_null: false)
4826
+ updated_data = {}
4827
+ unless data.empty?
4828
+ dtypes = schema_overrides || {}
4829
+ array_len = data.values.map { |val| Utils.arrlen(val) || 0 }.max
4830
+ if array_len > 0
4831
+ data.each do |name, val|
4832
+ dtype = dtypes[name]
4833
+ if val.is_a?(Hash) && dtype != Struct
4834
+ updated_data[name] = DataFrame.new(val).to_struct(name)
4835
+ elsif !Utils.arrlen(val).nil?
4836
+ updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
4837
+ elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
4838
+ dtype = Polars::Float64 if val.nil? && dtype.nil?
4839
+ updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
4840
+ else
4841
+ raise Todo
4842
+ end
4843
+ end
4844
+ elsif data.values.all? { |val| Utils.arrlen(val) == 0 }
4845
+ data.each do |name, val|
4846
+ updated_data[name] = Series.new(name, val, dtype: dtypes[name])
4847
+ end
4848
+ elsif data.values.all? { |val| Utils.arrlen(val).nil? }
4849
+ data.each do |name, val|
4850
+ updated_data[name] = Series.new(name, [val], dtype: dtypes[name])
4851
+ end
4852
+ end
4853
+ end
4854
+ updated_data
4855
+ end
4723
4856
 
4724
- if data.empty? && dtypes
4725
- data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
4726
- else
4727
- data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
4857
+ # @private
4858
+ def self.hash_to_rbdf(data, schema: nil, schema_overrides: nil, nan_to_null: nil)
4859
+ if schema.is_a?(Hash) && !data.empty?
4860
+ if !data.all? { |col, _| schema[col] }
4861
+ raise ArgumentError, "The given column-schema names do not match the data dictionary"
4728
4862
  end
4729
- data_series = _handle_columns_arg(data_series, columns: columns)
4730
- return RbDataFrame.new(data_series)
4863
+
4864
+ data = schema.to_h { |col| [col, data[col]] }
4865
+ end
4866
+
4867
+ column_names, schema_overrides = _unpack_schema(
4868
+ schema, lookup_names: data.keys, schema_overrides: schema_overrides
4869
+ )
4870
+ if column_names.empty?
4871
+ column_names = data.keys
4872
+ end
4873
+
4874
+ if data.empty? && !schema_overrides.empty?
4875
+ data_series = column_names.map { |name| Series.new(name, [], dtype: schema_overrides[name], nan_to_null: nan_to_null)._s }
4876
+ else
4877
+ data_series = expand_hash_scalars(data, schema_overrides: schema_overrides, nan_to_null: nan_to_null).values.map(&:_s)
4731
4878
  end
4732
4879
 
4733
- RbDataFrame.read_hash(data)
4880
+ data_series = _handle_columns_arg(data_series, columns: column_names, from_hash: true)
4881
+ RbDataFrame.new(data_series)
4734
4882
  end
4735
4883
 
4736
4884
  # @private
@@ -4739,14 +4887,12 @@ module Polars
4739
4887
  end
4740
4888
 
4741
4889
  # @private
4742
- def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4743
- raise Todo if schema_overrides
4744
-
4745
- if columns.is_a?(Hash)
4746
- columns = columns.to_a
4890
+ def self._unpack_schema(schema, schema_overrides: nil, n_expected: nil, lookup_names: nil, include_overrides_in_columns: false)
4891
+ if schema.is_a?(Hash)
4892
+ schema = schema.to_a
4747
4893
  end
4748
4894
  column_names =
4749
- (columns || []).map.with_index do |col, i|
4895
+ (schema || []).map.with_index do |col, i|
4750
4896
  if col.is_a?(String)
4751
4897
  col || "column_#{i}"
4752
4898
  else
@@ -4759,21 +4905,38 @@ module Polars
4759
4905
  # TODO zip_longest
4760
4906
  lookup = column_names.zip(lookup_names || []).to_h
4761
4907
 
4762
- [
4763
- column_names,
4764
- (columns || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4908
+ column_dtypes =
4909
+ (schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4765
4910
  [lookup[col[0]] || col[0], col[1]]
4766
4911
  end
4767
- ]
4912
+
4913
+ if schema_overrides
4914
+ raise Todo
4915
+ end
4916
+
4917
+ column_dtypes.each do |col, dtype|
4918
+ if !Utils.is_polars_dtype(dtype, include_unknown: true) && !dtype.nil?
4919
+ column_dtypes[col] = Utils.rb_type_to_dtype(dtype)
4920
+ end
4921
+ end
4922
+
4923
+ [column_names, column_dtypes]
4768
4924
  end
4769
4925
 
4770
- def self._handle_columns_arg(data, columns: nil)
4771
- if columns.nil?
4926
+ def self._handle_columns_arg(data, columns: nil, from_hash: false)
4927
+ if columns.nil? || columns.empty?
4772
4928
  data
4773
4929
  else
4774
4930
  if data.empty?
4775
4931
  columns.map { |c| Series.new(c, nil)._s }
4776
4932
  elsif data.length == columns.length
4933
+ if from_hash
4934
+ series_map = data.to_h { |s| [s.name, s] }
4935
+ if columns.all? { |col| series_map.key?(col) }
4936
+ return columns.map { |col| series_map[col] }
4937
+ end
4938
+ end
4939
+
4777
4940
  columns.each_with_index do |c, i|
4778
4941
  # not in-place?
4779
4942
  data[i].rename(c)
@@ -4788,7 +4951,7 @@ module Polars
4788
4951
  def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4789
4952
  rbdf_columns = rbdf.columns
4790
4953
  rbdf_dtypes = rbdf.dtypes
4791
- columns, dtypes = _unpack_columns(
4954
+ columns, dtypes = _unpack_schema(
4792
4955
  (columns || rbdf_columns), schema_overrides: schema_overrides
4793
4956
  )
4794
4957
  column_subset = []
@@ -4826,20 +4989,23 @@ module Polars
4826
4989
  end
4827
4990
 
4828
4991
  # @private
4829
- def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4992
+ def self.sequence_to_rbdf(data, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 50)
4993
+ raise Todo if schema_overrides
4994
+ columns = schema
4995
+
4830
4996
  if data.length == 0
4831
- return hash_to_rbdf({}, columns: columns)
4997
+ return hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
4832
4998
  end
4833
4999
 
4834
5000
  if data[0].is_a?(Series)
4835
5001
  # series_names = data.map(&:name)
4836
- # columns, dtypes = _unpack_columns(columns || series_names, n_expected: data.length)
5002
+ # columns, dtypes = _unpack_schema(columns || series_names, n_expected: data.length)
4837
5003
  data_series = []
4838
5004
  data.each do |s|
4839
5005
  data_series << s._s
4840
5006
  end
4841
5007
  elsif data[0].is_a?(Hash)
4842
- column_names, dtypes = _unpack_columns(columns)
5008
+ column_names, dtypes = _unpack_schema(columns)
4843
5009
  schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4844
5010
  rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4845
5011
  if column_names
@@ -4865,11 +5031,21 @@ module Polars
4865
5031
  end
4866
5032
 
4867
5033
  # @private
4868
- def self.series_to_rbdf(data, columns: nil)
4869
- if columns
4870
- raise Todo
5034
+ def self.series_to_rbdf(data, schema: nil, schema_overrides: nil)
5035
+ data_series = [data._s]
5036
+ series_name = data_series.map(&:name)
5037
+ column_names, schema_overrides = _unpack_schema(
5038
+ schema || series_name, schema_overrides: schema_overrides, n_expected: 1
5039
+ )
5040
+ if schema_overrides.any?
5041
+ new_dtype = schema_overrides.values[0]
5042
+ if new_dtype != data.dtype
5043
+ data_series[0] = data_series[0].cast(new_dtype, true)
5044
+ end
4871
5045
  end
4872
- RbDataFrame.new([data._s])
5046
+
5047
+ data_series = _handle_columns_arg(data_series, columns: column_names)
5048
+ RbDataFrame.new(data_series)
4873
5049
  end
4874
5050
 
4875
5051
  def wrap_ldf(ldf)
@@ -84,20 +84,22 @@ module Polars
84
84
 
85
85
  # Calendar date and time type.
86
86
  class Datetime < TemporalType
87
- attr_reader :tu
87
+ attr_reader :time_unit, :time_zone
88
+ alias_method :tu, :time_unit
88
89
 
89
90
  def initialize(time_unit = "us", time_zone = nil)
90
- @tu = time_unit || "us"
91
+ @time_unit = time_unit || "us"
91
92
  @time_zone = time_zone
92
93
  end
93
94
  end
94
95
 
95
96
  # Time duration/delta type.
96
97
  class Duration < TemporalType
97
- attr_reader :tu
98
+ attr_reader :time_unit
99
+ alias_method :tu, :time_unit
98
100
 
99
101
  def initialize(time_unit = "us")
100
- @tu = time_unit
102
+ @time_unit = time_unit
101
103
  end
102
104
  end
103
105