polars-df 0.3.1-aarch64-linux → 0.5.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,10 @@ module Polars
18
18
  # Whether to interpret two-dimensional data as columns or as rows. If `nil`,
19
19
  # the orientation is inferred by matching the columns and data dimensions. If
20
20
  # this does not yield conclusive results, column orientation is used.
21
- def initialize(data = nil, columns: nil, orient: nil)
21
+ def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
22
+ schema ||= columns
23
+ raise Todo if schema_overrides
24
+
22
25
  # TODO deprecate in favor of read_sql
23
26
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
24
27
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
@@ -29,14 +32,14 @@ module Polars
29
32
  end
30
33
 
31
34
  if data.nil?
32
- self._df = self.class.hash_to_rbdf({}, columns: columns)
35
+ self._df = self.class.hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
33
36
  elsif data.is_a?(Hash)
34
37
  data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
35
- self._df = self.class.hash_to_rbdf(data, columns: columns)
38
+ self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
36
39
  elsif data.is_a?(Array)
37
- self._df = self.class.sequence_to_rbdf(data, columns: columns, orient: orient)
40
+ self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
38
41
  elsif data.is_a?(Series)
39
- self._df = self.class.series_to_rbdf(data, columns: columns)
42
+ self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
40
43
  else
41
44
  raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
42
45
  end
@@ -56,8 +59,8 @@ module Polars
56
59
  end
57
60
 
58
61
  # @private
59
- def self._from_hash(data, columns: nil)
60
- _from_rbdf(hash_to_rbdf(data, columns: columns))
62
+ def self._from_hash(data, schema: nil, schema_overrides: nil)
63
+ _from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
61
64
  end
62
65
 
63
66
  # def self._from_records
@@ -97,7 +100,7 @@ module Polars
97
100
  eol_char: "\n"
98
101
  )
99
102
  if Utils.pathlike?(file)
100
- path = Utils.format_path(file)
103
+ path = Utils.normalise_filepath(file)
101
104
  else
102
105
  path = nil
103
106
  # if defined?(StringIO) && file.is_a?(StringIO)
@@ -196,32 +199,56 @@ module Polars
196
199
 
197
200
  # @private
198
201
  def self._read_parquet(
199
- file,
202
+ source,
200
203
  columns: nil,
201
204
  n_rows: nil,
202
205
  parallel: "auto",
203
206
  row_count_name: nil,
204
207
  row_count_offset: 0,
205
- low_memory: false
208
+ low_memory: false,
209
+ use_statistics: true,
210
+ rechunk: true
206
211
  )
207
- if Utils.pathlike?(file)
208
- file = Utils.format_path(file)
212
+ if Utils.pathlike?(source)
213
+ source = Utils.normalise_filepath(source)
214
+ end
215
+ if columns.is_a?(String)
216
+ columns = [columns]
209
217
  end
210
218
 
211
- if file.is_a?(String) && file.include?("*")
212
- raise Todo
219
+ if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
220
+ scan =
221
+ Polars.scan_parquet(
222
+ source,
223
+ n_rows: n_rows,
224
+ rechunk: true,
225
+ parallel: parallel,
226
+ row_count_name: row_count_name,
227
+ row_count_offset: row_count_offset,
228
+ low_memory: low_memory
229
+ )
230
+
231
+ if columns.nil?
232
+ return self._from_rbdf(scan.collect._df)
233
+ elsif Utils.is_str_sequence(columns, allow_str: false)
234
+ return self._from_rbdf(scan.select(columns).collect._df)
235
+ else
236
+ raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
237
+ end
213
238
  end
214
239
 
215
240
  projection, columns = Utils.handle_projection_columns(columns)
216
241
  _from_rbdf(
217
242
  RbDataFrame.read_parquet(
218
- file,
243
+ source,
219
244
  columns,
220
245
  projection,
221
246
  n_rows,
222
247
  parallel,
223
248
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
224
- low_memory
249
+ low_memory,
250
+ use_statistics,
251
+ rechunk
225
252
  )
226
253
  )
227
254
  end
@@ -229,7 +256,7 @@ module Polars
229
256
  # @private
230
257
  def self._read_avro(file, columns: nil, n_rows: nil)
231
258
  if Utils.pathlike?(file)
232
- file = Utils.format_path(file)
259
+ file = Utils.normalise_filepath(file)
233
260
  end
234
261
  projection, columns = Utils.handle_projection_columns(columns)
235
262
  _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
@@ -246,7 +273,7 @@ module Polars
246
273
  memory_map: true
247
274
  )
248
275
  if Utils.pathlike?(file)
249
- file = Utils.format_path(file)
276
+ file = Utils.normalise_filepath(file)
250
277
  end
251
278
  if columns.is_a?(String)
252
279
  columns = [columns]
@@ -272,7 +299,7 @@ module Polars
272
299
  # @private
273
300
  def self._read_json(file)
274
301
  if Utils.pathlike?(file)
275
- file = Utils.format_path(file)
302
+ file = Utils.normalise_filepath(file)
276
303
  end
277
304
 
278
305
  _from_rbdf(RbDataFrame.read_json(file))
@@ -281,7 +308,7 @@ module Polars
281
308
  # @private
282
309
  def self._read_ndjson(file)
283
310
  if Utils.pathlike?(file)
284
- file = Utils.format_path(file)
311
+ file = Utils.normalise_filepath(file)
285
312
  end
286
313
 
287
314
  _from_rbdf(RbDataFrame.read_ndjson(file))
@@ -312,6 +339,7 @@ module Polars
312
339
  end
313
340
  alias_method :count, :height
314
341
  alias_method :length, :height
342
+ alias_method :size, :height
315
343
 
316
344
  # Get the width of the DataFrame.
317
345
  #
@@ -522,6 +550,13 @@ module Polars
522
550
  end
523
551
  alias_method :inspect, :to_s
524
552
 
553
+ # Returns an array representing the DataFrame
554
+ #
555
+ # @return [Array]
556
+ def to_a
557
+ rows(named: true)
558
+ end
559
+
525
560
  # Check if DataFrame includes column.
526
561
  #
527
562
  # @return [Boolean]
@@ -631,7 +666,7 @@ module Polars
631
666
  end
632
667
 
633
668
  # Ruby-specific
634
- if item.is_a?(Expr)
669
+ if item.is_a?(Expr) || item.is_a?(Series)
635
670
  return filter(item)
636
671
  end
637
672
 
@@ -641,15 +676,42 @@ module Polars
641
676
  # Set item.
642
677
  #
643
678
  # @return [Object]
644
- #
645
- # def []=(key, value)
646
- # if key.is_a?(String)
647
- # raise TypeError, "'DataFrame' object does not support 'Series' assignment by index. Use 'DataFrame.with_columns'"
648
- # end
679
+ def []=(*key, value)
680
+ if key.length == 1
681
+ key = key.first
682
+ elsif key.length != 2
683
+ raise ArgumentError, "wrong number of arguments (given #{key.length + 1}, expected 2..3)"
684
+ end
649
685
 
650
- # raise Todo
651
- # end
686
+ if Utils.strlike?(key)
687
+ if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
688
+ value = Series.new(value)
689
+ elsif !value.is_a?(Series)
690
+ value = Polars.lit(value)
691
+ end
692
+ self._df = with_column(value.alias(key.to_s))._df
693
+ elsif key.is_a?(Array)
694
+ row_selection, col_selection = key
695
+
696
+ if Utils.strlike?(col_selection)
697
+ s = self[col_selection]
698
+ elsif col_selection.is_a?(Integer)
699
+ raise Todo
700
+ else
701
+ raise ArgumentError, "column selection not understood: #{col_selection}"
702
+ end
703
+
704
+ s[row_selection] = value
652
705
 
706
+ if col_selection.is_a?(Integer)
707
+ replace_at_idx(col_selection, s)
708
+ elsif Utils.strlike?(col_selection)
709
+ replace(col_selection, s)
710
+ end
711
+ else
712
+ raise Todo
713
+ end
714
+ end
653
715
 
654
716
  # Return the dataframe as a scalar.
655
717
  #
@@ -774,7 +836,7 @@ module Polars
774
836
  row_oriented: false
775
837
  )
776
838
  if Utils.pathlike?(file)
777
- file = Utils.format_path(file)
839
+ file = Utils.normalise_filepath(file)
778
840
  end
779
841
 
780
842
  _df.write_json(file, pretty, row_oriented)
@@ -789,7 +851,7 @@ module Polars
789
851
  # @return [nil]
790
852
  def write_ndjson(file)
791
853
  if Utils.pathlike?(file)
792
- file = Utils.format_path(file)
854
+ file = Utils.normalise_filepath(file)
793
855
  end
794
856
 
795
857
  _df.write_ndjson(file)
@@ -879,7 +941,7 @@ module Polars
879
941
  end
880
942
 
881
943
  if Utils.pathlike?(file)
882
- file = Utils.format_path(file)
944
+ file = Utils.normalise_filepath(file)
883
945
  end
884
946
 
885
947
  _df.write_csv(
@@ -917,7 +979,7 @@ module Polars
917
979
  compression = "uncompressed"
918
980
  end
919
981
  if Utils.pathlike?(file)
920
- file = Utils.format_path(file)
982
+ file = Utils.normalise_filepath(file)
921
983
  end
922
984
 
923
985
  _df.write_avro(file, compression)
@@ -936,7 +998,7 @@ module Polars
936
998
  compression = "uncompressed"
937
999
  end
938
1000
  if Utils.pathlike?(file)
939
- file = Utils.format_path(file)
1001
+ file = Utils.normalise_filepath(file)
940
1002
  end
941
1003
 
942
1004
  _df.write_ipc(file, compression)
@@ -978,7 +1040,7 @@ module Polars
978
1040
  compression = "uncompressed"
979
1041
  end
980
1042
  if Utils.pathlike?(file)
981
- file = Utils.format_path(file)
1043
+ file = Utils.normalise_filepath(file)
982
1044
  end
983
1045
 
984
1046
  _df.write_parquet(
@@ -1438,6 +1500,20 @@ module Polars
1438
1500
  end
1439
1501
  end
1440
1502
 
1503
+ # Sort the DataFrame by column in-place.
1504
+ #
1505
+ # @param by [String]
1506
+ # By which column to sort.
1507
+ # @param reverse [Boolean]
1508
+ # Reverse/descending sort.
1509
+ # @param nulls_last [Boolean]
1510
+ # Place null values last. Can only be used if sorted by a single column.
1511
+ #
1512
+ # @return [DataFrame]
1513
+ def sort!(by, reverse: false, nulls_last: false)
1514
+ self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
1515
+ end
1516
+
1441
1517
  # Check if DataFrame is equal to other.
1442
1518
  #
1443
1519
  # @param other [DataFrame]
@@ -1495,7 +1571,7 @@ module Polars
1495
1571
  # # │ 30 ┆ 6 │
1496
1572
  # # └─────┴─────┘
1497
1573
  def replace(column, new_col)
1498
- _df.replace(column, new_col._s)
1574
+ _df.replace(column.to_s, new_col._s)
1499
1575
  self
1500
1576
  end
1501
1577
 
@@ -1836,7 +1912,7 @@ module Polars
1836
1912
  # "2020-01-08 23:16:43"
1837
1913
  # ]
1838
1914
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1839
- # Polars.col("dt").str.strptime(:datetime)
1915
+ # Polars.col("dt").str.strptime(Polars::Datetime)
1840
1916
  # )
1841
1917
  # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1842
1918
  # [
@@ -2767,6 +2843,16 @@ module Polars
2767
2843
  Utils.wrap_s(_df.drop_in_place(name))
2768
2844
  end
2769
2845
 
2846
+ # Drop in place if exists.
2847
+ #
2848
+ # @param name [Object]
2849
+ # Column to drop.
2850
+ #
2851
+ # @return [Series]
2852
+ def delete(name)
2853
+ drop_in_place(name) if include?(name)
2854
+ end
2855
+
2770
2856
  # Create an empty copy of the current DataFrame.
2771
2857
  #
2772
2858
  # Returns a DataFrame with identical schema but no data.
@@ -3042,24 +3128,28 @@ module Polars
3042
3128
  if aggregate_fn.is_a?(String)
3043
3129
  case aggregate_fn
3044
3130
  when "first"
3045
- aggregate_fn = Polars.element.first
3131
+ aggregate_expr = Polars.element.first._rbexpr
3046
3132
  when "sum"
3047
- aggregate_fn = Polars.element.sum
3133
+ aggregate_expr = Polars.element.sum._rbexpr
3048
3134
  when "max"
3049
- aggregate_fn = Polars.element.max
3135
+ aggregate_expr = Polars.element.max._rbexpr
3050
3136
  when "min"
3051
- aggregate_fn = Polars.element.min
3137
+ aggregate_expr = Polars.element.min._rbexpr
3052
3138
  when "mean"
3053
- aggregate_fn = Polars.element.mean
3139
+ aggregate_expr = Polars.element.mean._rbexpr
3054
3140
  when "median"
3055
- aggregate_fn = Polars.element.median
3141
+ aggregate_expr = Polars.element.median._rbexpr
3056
3142
  when "last"
3057
- aggregate_fn = Polars.element.last
3143
+ aggregate_expr = Polars.element.last._rbexpr
3058
3144
  when "count"
3059
- aggregate_fn = Polars.count
3145
+ aggregate_expr = Polars.count._rbexpr
3060
3146
  else
3061
3147
  raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
3062
3148
  end
3149
+ elsif aggregate_fn.nil?
3150
+ aggregate_expr = nil
3151
+ else
3152
+ aggregate_expr = aggregate_function._rbexpr
3063
3153
  end
3064
3154
 
3065
3155
  _from_rbdf(
@@ -3067,9 +3157,9 @@ module Polars
3067
3157
  values,
3068
3158
  index,
3069
3159
  columns,
3070
- aggregate_fn._rbexpr,
3071
3160
  maintain_order,
3072
3161
  sort_columns,
3162
+ aggregate_expr,
3073
3163
  separator
3074
3164
  )
3075
3165
  )
@@ -3174,7 +3264,7 @@ module Polars
3174
3264
  # # │ B ┆ 1 │
3175
3265
  # # │ C ┆ 2 │
3176
3266
  # # │ D ┆ 3 │
3177
- # # │ ... ...
3267
+ # # │ E 4
3178
3268
  # # │ F ┆ 5 │
3179
3269
  # # │ G ┆ 6 │
3180
3270
  # # │ H ┆ 7 │
@@ -4053,15 +4143,12 @@ module Polars
4053
4143
  # # │ 5 ┆ 3.0 ┆ true │
4054
4144
  # # └─────┴─────┴───────┘
4055
4145
  def unique(maintain_order: true, subset: nil, keep: "first")
4056
- if !subset.nil?
4057
- if subset.is_a?(String)
4058
- subset = [subset]
4059
- elsif !subset.is_a?(Array)
4060
- subset = subset.to_a
4061
- end
4062
- end
4063
-
4064
- _from_rbdf(_df.unique(maintain_order, subset, keep))
4146
+ self._from_rbdf(
4147
+ lazy
4148
+ .unique(maintain_order: maintain_order, subset: subset, keep: keep)
4149
+ .collect(no_optimization: true)
4150
+ ._df
4151
+ )
4065
4152
  end
4066
4153
 
4067
4154
  # Return the number of unique rows, or the number of unique row-subsets.
@@ -4403,7 +4490,7 @@ module Polars
4403
4490
  end
4404
4491
  end
4405
4492
 
4406
- # Returns an iterator over the DataFrame of rows of python-native values.
4493
+ # Returns an iterator over the DataFrame of rows of Ruby-native values.
4407
4494
  #
4408
4495
  # @param named [Boolean]
4409
4496
  # Return hashes instead of arrays. The hashes are a mapping of
@@ -4464,6 +4551,24 @@ module Polars
4464
4551
  end
4465
4552
  end
4466
4553
 
4554
+ # Returns an iterator over the DataFrame of rows of Ruby-native values.
4555
+ #
4556
+ # @param named [Boolean]
4557
+ # Return hashes instead of arrays. The hashes are a mapping of
4558
+ # column name to row value. This is more expensive than returning an
4559
+ # array, but allows for accessing values by column name.
4560
+ # @param buffer_size [Integer]
4561
+ # Determines the number of rows that are buffered internally while iterating
4562
+ # over the data; you should only modify this in very specific cases where the
4563
+ # default value is determined not to be a good fit to your access pattern, as
4564
+ # the speedup from using the buffer is significant (~2-4x). Setting this
4565
+ # value to zero disables row buffering.
4566
+ #
4567
+ # @return [Object]
4568
+ def each_row(named: true, buffer_size: 500, &block)
4569
+ iter_rows(named: named, buffer_size: buffer_size, &block)
4570
+ end
4571
+
4467
4572
  # Shrink DataFrame memory usage.
4468
4573
  #
4469
4574
  # Shrinks to fit the exact capacity needed to hold the data.
@@ -4717,20 +4822,63 @@ module Polars
4717
4822
  end
4718
4823
 
4719
4824
  # @private
4720
- def self.hash_to_rbdf(data, columns: nil)
4721
- if !columns.nil?
4722
- columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
4825
+ def self.expand_hash_scalars(data, schema_overrides: nil, order: nil, nan_to_null: false)
4826
+ updated_data = {}
4827
+ unless data.empty?
4828
+ dtypes = schema_overrides || {}
4829
+ array_len = data.values.map { |val| Utils.arrlen(val) || 0 }.max
4830
+ if array_len > 0
4831
+ data.each do |name, val|
4832
+ dtype = dtypes[name]
4833
+ if val.is_a?(Hash) && dtype != Struct
4834
+ updated_data[name] = DataFrame.new(val).to_struct(name)
4835
+ elsif !Utils.arrlen(val).nil?
4836
+ updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
4837
+ elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
4838
+ dtype = Polars::Float64 if val.nil? && dtype.nil?
4839
+ updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
4840
+ else
4841
+ raise Todo
4842
+ end
4843
+ end
4844
+ elsif data.values.all? { |val| Utils.arrlen(val) == 0 }
4845
+ data.each do |name, val|
4846
+ updated_data[name] = Series.new(name, val, dtype: dtypes[name])
4847
+ end
4848
+ elsif data.values.all? { |val| Utils.arrlen(val).nil? }
4849
+ data.each do |name, val|
4850
+ updated_data[name] = Series.new(name, [val], dtype: dtypes[name])
4851
+ end
4852
+ end
4853
+ end
4854
+ updated_data
4855
+ end
4723
4856
 
4724
- if data.empty? && dtypes
4725
- data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
4726
- else
4727
- data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
4857
+ # @private
4858
+ def self.hash_to_rbdf(data, schema: nil, schema_overrides: nil, nan_to_null: nil)
4859
+ if schema.is_a?(Hash) && !data.empty?
4860
+ if !data.all? { |col, _| schema[col] }
4861
+ raise ArgumentError, "The given column-schema names do not match the data dictionary"
4728
4862
  end
4729
- data_series = _handle_columns_arg(data_series, columns: columns)
4730
- return RbDataFrame.new(data_series)
4863
+
4864
+ data = schema.to_h { |col| [col, data[col]] }
4865
+ end
4866
+
4867
+ column_names, schema_overrides = _unpack_schema(
4868
+ schema, lookup_names: data.keys, schema_overrides: schema_overrides
4869
+ )
4870
+ if column_names.empty?
4871
+ column_names = data.keys
4872
+ end
4873
+
4874
+ if data.empty? && !schema_overrides.empty?
4875
+ data_series = column_names.map { |name| Series.new(name, [], dtype: schema_overrides[name], nan_to_null: nan_to_null)._s }
4876
+ else
4877
+ data_series = expand_hash_scalars(data, schema_overrides: schema_overrides, nan_to_null: nan_to_null).values.map(&:_s)
4731
4878
  end
4732
4879
 
4733
- RbDataFrame.read_hash(data)
4880
+ data_series = _handle_columns_arg(data_series, columns: column_names, from_hash: true)
4881
+ RbDataFrame.new(data_series)
4734
4882
  end
4735
4883
 
4736
4884
  # @private
@@ -4739,14 +4887,12 @@ module Polars
4739
4887
  end
4740
4888
 
4741
4889
  # @private
4742
- def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4743
- raise Todo if schema_overrides
4744
-
4745
- if columns.is_a?(Hash)
4746
- columns = columns.to_a
4890
+ def self._unpack_schema(schema, schema_overrides: nil, n_expected: nil, lookup_names: nil, include_overrides_in_columns: false)
4891
+ if schema.is_a?(Hash)
4892
+ schema = schema.to_a
4747
4893
  end
4748
4894
  column_names =
4749
- (columns || []).map.with_index do |col, i|
4895
+ (schema || []).map.with_index do |col, i|
4750
4896
  if col.is_a?(String)
4751
4897
  col || "column_#{i}"
4752
4898
  else
@@ -4759,21 +4905,38 @@ module Polars
4759
4905
  # TODO zip_longest
4760
4906
  lookup = column_names.zip(lookup_names || []).to_h
4761
4907
 
4762
- [
4763
- column_names,
4764
- (columns || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4908
+ column_dtypes =
4909
+ (schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4765
4910
  [lookup[col[0]] || col[0], col[1]]
4766
4911
  end
4767
- ]
4912
+
4913
+ if schema_overrides
4914
+ raise Todo
4915
+ end
4916
+
4917
+ column_dtypes.each do |col, dtype|
4918
+ if !Utils.is_polars_dtype(dtype, include_unknown: true) && !dtype.nil?
4919
+ column_dtypes[col] = Utils.rb_type_to_dtype(dtype)
4920
+ end
4921
+ end
4922
+
4923
+ [column_names, column_dtypes]
4768
4924
  end
4769
4925
 
4770
- def self._handle_columns_arg(data, columns: nil)
4771
- if columns.nil?
4926
+ def self._handle_columns_arg(data, columns: nil, from_hash: false)
4927
+ if columns.nil? || columns.empty?
4772
4928
  data
4773
4929
  else
4774
4930
  if data.empty?
4775
4931
  columns.map { |c| Series.new(c, nil)._s }
4776
4932
  elsif data.length == columns.length
4933
+ if from_hash
4934
+ series_map = data.to_h { |s| [s.name, s] }
4935
+ if columns.all? { |col| series_map.key?(col) }
4936
+ return columns.map { |col| series_map[col] }
4937
+ end
4938
+ end
4939
+
4777
4940
  columns.each_with_index do |c, i|
4778
4941
  # not in-place?
4779
4942
  data[i].rename(c)
@@ -4788,7 +4951,7 @@ module Polars
4788
4951
  def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4789
4952
  rbdf_columns = rbdf.columns
4790
4953
  rbdf_dtypes = rbdf.dtypes
4791
- columns, dtypes = _unpack_columns(
4954
+ columns, dtypes = _unpack_schema(
4792
4955
  (columns || rbdf_columns), schema_overrides: schema_overrides
4793
4956
  )
4794
4957
  column_subset = []
@@ -4826,20 +4989,23 @@ module Polars
4826
4989
  end
4827
4990
 
4828
4991
  # @private
4829
- def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4992
+ def self.sequence_to_rbdf(data, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 50)
4993
+ raise Todo if schema_overrides
4994
+ columns = schema
4995
+
4830
4996
  if data.length == 0
4831
- return hash_to_rbdf({}, columns: columns)
4997
+ return hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
4832
4998
  end
4833
4999
 
4834
5000
  if data[0].is_a?(Series)
4835
5001
  # series_names = data.map(&:name)
4836
- # columns, dtypes = _unpack_columns(columns || series_names, n_expected: data.length)
5002
+ # columns, dtypes = _unpack_schema(columns || series_names, n_expected: data.length)
4837
5003
  data_series = []
4838
5004
  data.each do |s|
4839
5005
  data_series << s._s
4840
5006
  end
4841
5007
  elsif data[0].is_a?(Hash)
4842
- column_names, dtypes = _unpack_columns(columns)
5008
+ column_names, dtypes = _unpack_schema(columns)
4843
5009
  schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4844
5010
  rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4845
5011
  if column_names
@@ -4865,11 +5031,21 @@ module Polars
4865
5031
  end
4866
5032
 
4867
5033
  # @private
4868
- def self.series_to_rbdf(data, columns: nil)
4869
- if columns
4870
- raise Todo
5034
+ def self.series_to_rbdf(data, schema: nil, schema_overrides: nil)
5035
+ data_series = [data._s]
5036
+ series_name = data_series.map(&:name)
5037
+ column_names, schema_overrides = _unpack_schema(
5038
+ schema || series_name, schema_overrides: schema_overrides, n_expected: 1
5039
+ )
5040
+ if schema_overrides.any?
5041
+ new_dtype = schema_overrides.values[0]
5042
+ if new_dtype != data.dtype
5043
+ data_series[0] = data_series[0].cast(new_dtype, true)
5044
+ end
4871
5045
  end
4872
- RbDataFrame.new([data._s])
5046
+
5047
+ data_series = _handle_columns_arg(data_series, columns: column_names)
5048
+ RbDataFrame.new(data_series)
4873
5049
  end
4874
5050
 
4875
5051
  def wrap_ldf(ldf)
@@ -84,20 +84,22 @@ module Polars
84
84
 
85
85
  # Calendar date and time type.
86
86
  class Datetime < TemporalType
87
- attr_reader :tu
87
+ attr_reader :time_unit, :time_zone
88
+ alias_method :tu, :time_unit
88
89
 
89
90
  def initialize(time_unit = "us", time_zone = nil)
90
- @tu = time_unit || "us"
91
+ @time_unit = time_unit || "us"
91
92
  @time_zone = time_zone
92
93
  end
93
94
  end
94
95
 
95
96
  # Time duration/delta type.
96
97
  class Duration < TemporalType
97
- attr_reader :tu
98
+ attr_reader :time_unit
99
+ alias_method :tu, :time_unit
98
100
 
99
101
  def initialize(time_unit = "us")
100
- @tu = time_unit
102
+ @time_unit = time_unit
101
103
  end
102
104
  end
103
105