polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,15 +20,9 @@ module Polars
20
20
  # this does not yield conclusive results, column orientation is used.
21
21
  def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
22
22
  schema ||= columns
23
- raise Todo if schema_overrides
24
23
 
25
- # TODO deprecate in favor of read_sql
26
24
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
27
- result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
28
- data = {}
29
- result.columns.each_with_index do |k, i|
30
- data[k] = result.rows.map { |r| r[i] }
31
- end
25
+ raise ArgumentError, "Use read_database instead"
32
26
  end
33
27
 
34
28
  if data.nil?
@@ -125,10 +119,10 @@ module Polars
125
119
 
126
120
  processed_null_values = Utils._process_null_values(null_values)
127
121
 
128
- if columns.is_a?(String)
122
+ if columns.is_a?(::String)
129
123
  columns = [columns]
130
124
  end
131
- if file.is_a?(String) && file.include?("*")
125
+ if file.is_a?(::String) && file.include?("*")
132
126
  dtypes_dict = nil
133
127
  if !dtype_list.nil?
134
128
  dtypes_dict = dtype_list.to_h
@@ -212,11 +206,11 @@ module Polars
212
206
  if Utils.pathlike?(source)
213
207
  source = Utils.normalise_filepath(source)
214
208
  end
215
- if columns.is_a?(String)
209
+ if columns.is_a?(::String)
216
210
  columns = [columns]
217
211
  end
218
212
 
219
- if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
213
+ if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
220
214
  scan =
221
215
  Polars.scan_parquet(
222
216
  source,
@@ -275,11 +269,11 @@ module Polars
275
269
  if Utils.pathlike?(file)
276
270
  file = Utils.normalise_filepath(file)
277
271
  end
278
- if columns.is_a?(String)
272
+ if columns.is_a?(::String)
279
273
  columns = [columns]
280
274
  end
281
275
 
282
- if file.is_a?(String) && file.include?("*")
276
+ if file.is_a?(::String) && file.include?("*")
283
277
  raise Todo
284
278
  end
285
279
 
@@ -417,7 +411,7 @@ module Polars
417
411
  # }
418
412
  # )
419
413
  # df.dtypes
420
- # # => [Polars::Int64, Polars::Float64, Polars::Utf8]
414
+ # # => [Polars::Int64, Polars::Float64, Polars::String]
421
415
  def dtypes
422
416
  _df.dtypes
423
417
  end
@@ -435,7 +429,7 @@ module Polars
435
429
  # }
436
430
  # )
437
431
  # df.schema
438
- # # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::Utf8}
432
+ # # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
439
433
  def schema
440
434
  columns.zip(dtypes).to_h
441
435
  end
@@ -595,13 +589,13 @@ module Polars
595
589
  return df.slice(row_selection, 1)
596
590
  end
597
591
  # df[2, "a"]
598
- if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
592
+ if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
599
593
  return self[col_selection][row_selection]
600
594
  end
601
595
  end
602
596
 
603
597
  # column selection can be "a" and ["a", "b"]
604
- if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
598
+ if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
605
599
  col_selection = [col_selection]
606
600
  end
607
601
 
@@ -627,7 +621,7 @@ module Polars
627
621
 
628
622
  # select single column
629
623
  # df["foo"]
630
- if item.is_a?(String) || item.is_a?(Symbol)
624
+ if item.is_a?(::String) || item.is_a?(Symbol)
631
625
  return Utils.wrap_s(_df.column(item.to_s))
632
626
  end
633
627
 
@@ -653,7 +647,7 @@ module Polars
653
647
 
654
648
  if item.is_a?(Series)
655
649
  dtype = item.dtype
656
- if dtype == Utf8
650
+ if dtype == String
657
651
  return _from_rbdf(_df.select(item))
658
652
  elsif dtype == UInt32
659
653
  return _from_rbdf(_df.take_with_series(item._s))
@@ -704,7 +698,7 @@ module Polars
704
698
  s[row_selection] = value
705
699
 
706
700
  if col_selection.is_a?(Integer)
707
- replace_at_idx(col_selection, s)
701
+ replace_column(col_selection, s)
708
702
  elsif Utils.strlike?(col_selection)
709
703
  replace(col_selection, s)
710
704
  end
@@ -905,6 +899,7 @@ module Polars
905
899
  def write_csv(
906
900
  file = nil,
907
901
  has_header: true,
902
+ include_header: nil,
908
903
  sep: ",",
909
904
  quote: '"',
910
905
  batch_size: 1024,
@@ -914,6 +909,8 @@ module Polars
914
909
  float_precision: nil,
915
910
  null_value: nil
916
911
  )
912
+ include_header = has_header if include_header.nil?
913
+
917
914
  if sep.length > 1
918
915
  raise ArgumentError, "only single byte separator is allowed"
919
916
  elsif quote.length > 1
@@ -927,7 +924,7 @@ module Polars
927
924
  buffer.set_encoding(Encoding::BINARY)
928
925
  _df.write_csv(
929
926
  buffer,
930
- has_header,
927
+ include_header,
931
928
  sep.ord,
932
929
  quote.ord,
933
930
  batch_size,
@@ -946,7 +943,7 @@ module Polars
946
943
 
947
944
  _df.write_csv(
948
945
  file,
949
- has_header,
946
+ include_header,
950
947
  sep.ord,
951
948
  quote.ord,
952
949
  batch_size,
@@ -1151,22 +1148,8 @@ module Polars
1151
1148
  # # │ b ┆ 1 ┆ 2 ┆ 3 │
1152
1149
  # # └─────┴─────┴─────┴─────┘
1153
1150
  def transpose(include_header: false, header_name: "column", column_names: nil)
1154
- df = _from_rbdf(_df.transpose(include_header, header_name))
1155
- if !column_names.nil?
1156
- names = []
1157
- n = df.width
1158
- if include_header
1159
- names << header_name
1160
- n -= 1
1161
- end
1162
-
1163
- column_names = column_names.each
1164
- n.times do
1165
- names << column_names.next
1166
- end
1167
- df.columns = names
1168
- end
1169
- df
1151
+ keep_names_as = include_header ? header_name : nil
1152
+ _from_rbdf(_df.transpose(keep_names_as, column_names))
1170
1153
  end
1171
1154
 
1172
1155
  # Reverse the DataFrame.
@@ -1239,7 +1222,7 @@ module Polars
1239
1222
  # @example
1240
1223
  # df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
1241
1224
  # s = Polars::Series.new("baz", [97, 98, 99])
1242
- # df.insert_at_idx(1, s)
1225
+ # df.insert_column(1, s)
1243
1226
  # # =>
1244
1227
  # # shape: (3, 3)
1245
1228
  # # ┌─────┬─────┬─────┐
@@ -1261,7 +1244,7 @@ module Polars
1261
1244
  # }
1262
1245
  # )
1263
1246
  # s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
1264
- # df.insert_at_idx(3, s)
1247
+ # df.insert_column(3, s)
1265
1248
  # # =>
1266
1249
  # # shape: (4, 4)
1267
1250
  # # ┌─────┬──────┬───────┬──────┐
@@ -1274,13 +1257,14 @@ module Polars
1274
1257
  # # │ 3 ┆ 10.0 ┆ false ┆ 20.5 │
1275
1258
  # # │ 4 ┆ 13.0 ┆ true ┆ 0.0 │
1276
1259
  # # └─────┴──────┴───────┴──────┘
1277
- def insert_at_idx(index, series)
1260
+ def insert_column(index, series)
1278
1261
  if index < 0
1279
1262
  index = columns.length + index
1280
1263
  end
1281
- _df.insert_at_idx(index, series._s)
1264
+ _df.insert_column(index, series._s)
1282
1265
  self
1283
1266
  end
1267
+ alias_method :insert_at_idx, :insert_column
1284
1268
 
1285
1269
  # Filter the rows in the DataFrame based on a predicate expression.
1286
1270
  #
@@ -1384,7 +1368,7 @@ module Polars
1384
1368
  ]
1385
1369
  )._df
1386
1370
  )
1387
- summary.insert_at_idx(
1371
+ summary.insert_column(
1388
1372
  0,
1389
1373
  Polars::Series.new(
1390
1374
  "describe",
@@ -1405,11 +1389,12 @@ module Polars
1405
1389
  # df = Polars::DataFrame.new(
1406
1390
  # {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
1407
1391
  # )
1408
- # df.find_idx_by_name("ham")
1392
+ # df.get_column_index("ham")
1409
1393
  # # => 2
1410
- def find_idx_by_name(name)
1411
- _df.find_idx_by_name(name)
1394
+ def get_column_index(name)
1395
+ _df.get_column_index(name)
1412
1396
  end
1397
+ alias_method :find_idx_by_name, :get_column_index
1413
1398
 
1414
1399
  # Replace a column at an index location.
1415
1400
  #
@@ -1429,7 +1414,7 @@ module Polars
1429
1414
  # }
1430
1415
  # )
1431
1416
  # s = Polars::Series.new("apple", [10, 20, 30])
1432
- # df.replace_at_idx(0, s)
1417
+ # df.replace_column(0, s)
1433
1418
  # # =>
1434
1419
  # # shape: (3, 3)
1435
1420
  # # ┌───────┬─────┬─────┐
@@ -1441,13 +1426,14 @@ module Polars
1441
1426
  # # │ 20 ┆ 7 ┆ b │
1442
1427
  # # │ 30 ┆ 8 ┆ c │
1443
1428
  # # └───────┴─────┴─────┘
1444
- def replace_at_idx(index, series)
1429
+ def replace_column(index, series)
1445
1430
  if index < 0
1446
1431
  index = columns.length + index
1447
1432
  end
1448
- _df.replace_at_idx(index, series._s)
1433
+ _df.replace_column(index, series._s)
1449
1434
  self
1450
1435
  end
1436
+ alias_method :replace_at_idx, :replace_column
1451
1437
 
1452
1438
  # Sort the DataFrame by column.
1453
1439
  #
@@ -1541,13 +1527,14 @@ module Polars
1541
1527
  # "ham" => ["c", "b", "a"]
1542
1528
  # }
1543
1529
  # )
1544
- # df1.frame_equal(df1)
1530
+ # df1.equals(df1)
1545
1531
  # # => true
1546
- # df1.frame_equal(df2)
1532
+ # df1.equals(df2)
1547
1533
  # # => false
1548
- def frame_equal(other, null_equal: true)
1549
- _df.frame_equal(other._df, null_equal)
1534
+ def equals(other, null_equal: true)
1535
+ _df.equals(other._df, null_equal)
1550
1536
  end
1537
+ alias_method :frame_equal, :equals
1551
1538
 
1552
1539
  # Replace a column by a new Series.
1553
1540
  #
@@ -1733,7 +1720,7 @@ module Polars
1733
1720
  # # │ 3 ┆ 8 ┆ c │
1734
1721
  # # └─────┴─────┴─────┘
1735
1722
  def drop_nulls(subset: nil)
1736
- if subset.is_a?(String)
1723
+ if subset.is_a?(::String)
1737
1724
  subset = [subset]
1738
1725
  end
1739
1726
  _from_rbdf(_df.drop_nulls(subset))
@@ -1811,13 +1798,13 @@ module Polars
1811
1798
  _from_rbdf(_df.with_row_count(name, offset))
1812
1799
  end
1813
1800
 
1814
- # Start a groupby operation.
1801
+ # Start a group by operation.
1815
1802
  #
1816
1803
  # @param by [Object]
1817
1804
  # Column(s) to group by.
1818
1805
  # @param maintain_order [Boolean]
1819
1806
  # Make sure that the order of the groups remain consistent. This is more
1820
- # expensive than a default groupby. Note that this only works in expression
1807
+ # expensive than a default group by. Note that this only works in expression
1821
1808
  # aggregations.
1822
1809
  #
1823
1810
  # @return [GroupBy]
@@ -1830,7 +1817,7 @@ module Polars
1830
1817
  # "c" => [6, 5, 4, 3, 2, 1]
1831
1818
  # }
1832
1819
  # )
1833
- # df.groupby("a").agg(Polars.col("b").sum).sort("a")
1820
+ # df.group_by("a").agg(Polars.col("b").sum).sort("a")
1834
1821
  # # =>
1835
1822
  # # shape: (3, 2)
1836
1823
  # # ┌─────┬─────┐
@@ -1842,25 +1829,26 @@ module Polars
1842
1829
  # # │ b ┆ 11 │
1843
1830
  # # │ c ┆ 6 │
1844
1831
  # # └─────┴─────┘
1845
- def groupby(by, maintain_order: false)
1832
+ def group_by(by, maintain_order: false)
1846
1833
  if !Utils.bool?(maintain_order)
1847
- raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
1834
+ raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
1848
1835
  end
1849
1836
  GroupBy.new(
1850
- _df,
1837
+ self,
1851
1838
  by,
1852
- self.class,
1853
1839
  maintain_order: maintain_order
1854
1840
  )
1855
1841
  end
1842
+ alias_method :groupby, :group_by
1843
+ alias_method :group, :group_by
1856
1844
 
1857
1845
  # Create rolling groups based on a time column.
1858
1846
  #
1859
1847
  # Also works for index values of type `:i32` or `:i64`.
1860
1848
  #
1861
- # Different from a `dynamic_groupby` the windows are now determined by the
1849
+ # Different from a `dynamic_group_by` the windows are now determined by the
1862
1850
  # individual values and are not of constant intervals. For constant intervals use
1863
- # *groupby_dynamic*
1851
+ # *group_by_dynamic*
1864
1852
  #
1865
1853
  # The `period` and `offset` arguments are created either from a timedelta, or
1866
1854
  # by using the following string language:
@@ -1880,7 +1868,7 @@ module Polars
1880
1868
  # Or combine them:
1881
1869
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1882
1870
  #
1883
- # In case of a groupby_rolling on an integer column, the windows are defined by:
1871
+ # In case of a group_by_rolling on an integer column, the windows are defined by:
1884
1872
  #
1885
1873
  # - **"1i" # length 1**
1886
1874
  # - **"10i" # length 10**
@@ -1891,7 +1879,7 @@ module Polars
1891
1879
  # This column must be sorted in ascending order. If not the output will not
1892
1880
  # make sense.
1893
1881
  #
1894
- # In case of a rolling groupby on indices, dtype needs to be one of
1882
+ # In case of a rolling group by on indices, dtype needs to be one of
1895
1883
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
1896
1884
  # performance matters use an `:i64` column.
1897
1885
  # @param period [Object]
@@ -1923,7 +1911,7 @@ module Polars
1923
1911
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1924
1912
  # Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
1925
1913
  # )
1926
- # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1914
+ # df.group_by_rolling(index_column: "dt", period: "2d").agg(
1927
1915
  # [
1928
1916
  # Polars.sum("a").alias("sum_a"),
1929
1917
  # Polars.min("a").alias("min_a"),
@@ -1944,7 +1932,7 @@ module Polars
1944
1932
  # # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
1945
1933
  # # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
1946
1934
  # # └─────────────────────┴───────┴───────┴───────┘
1947
- def groupby_rolling(
1935
+ def group_by_rolling(
1948
1936
  index_column:,
1949
1937
  period:,
1950
1938
  offset: nil,
@@ -1954,11 +1942,12 @@ module Polars
1954
1942
  )
1955
1943
  RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
1956
1944
  end
1945
+ alias_method :groupby_rolling, :group_by_rolling
1957
1946
 
1958
1947
  # Group based on a time value (or index value of type `:i32`, `:i64`).
1959
1948
  #
1960
1949
  # Time windows are calculated and rows are assigned to windows. Different from a
1961
- # normal groupby is that a row can be member of multiple groups. The time/index
1950
+ # normal group by is that a row can be member of multiple groups. The time/index
1962
1951
  # window could be seen as a rolling window, with a window size determined by
1963
1952
  # dates/times/values instead of slots in the DataFrame.
1964
1953
  #
@@ -1986,7 +1975,7 @@ module Polars
1986
1975
  # Or combine them:
1987
1976
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1988
1977
  #
1989
- # In case of a groupby_dynamic on an integer column, the windows are defined by:
1978
+ # In case of a group_by_dynamic on an integer column, the windows are defined by:
1990
1979
  #
1991
1980
  # - "1i" # length 1
1992
1981
  # - "10i" # length 10
@@ -1997,7 +1986,7 @@ module Polars
1997
1986
  # This column must be sorted in ascending order. If not the output will not
1998
1987
  # make sense.
1999
1988
  #
2000
- # In case of a dynamic groupby on indices, dtype needs to be one of
1989
+ # In case of a dynamic group by on indices, dtype needs to be one of
2001
1990
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
2002
1991
  # performance matters use an `:i64` column.
2003
1992
  # @param every
@@ -2048,7 +2037,7 @@ module Polars
2048
2037
  # # └─────────────────────┴─────┘
2049
2038
  #
2050
2039
  # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
2051
- # df.groupby_dynamic("time", every: "1h", closed: "right").agg(
2040
+ # df.group_by_dynamic("time", every: "1h", closed: "right").agg(
2052
2041
  # [
2053
2042
  # Polars.col("time").min.alias("time_min"),
2054
2043
  # Polars.col("time").max.alias("time_max")
@@ -2068,7 +2057,7 @@ module Polars
2068
2057
  # # └─────────────────────┴─────────────────────┴─────────────────────┘
2069
2058
  #
2070
2059
  # @example The window boundaries can also be added to the aggregation result.
2071
- # df.groupby_dynamic(
2060
+ # df.group_by_dynamic(
2072
2061
  # "time", every: "1h", include_boundaries: true, closed: "right"
2073
2062
  # ).agg([Polars.col("time").count.alias("time_count")])
2074
2063
  # # =>
@@ -2085,7 +2074,7 @@ module Polars
2085
2074
  # # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
2086
2075
  #
2087
2076
  # @example When closed="left", should not include right end of interval.
2088
- # df.groupby_dynamic("time", every: "1h", closed: "left").agg(
2077
+ # df.group_by_dynamic("time", every: "1h", closed: "left").agg(
2089
2078
  # [
2090
2079
  # Polars.col("time").count.alias("time_count"),
2091
2080
  # Polars.col("time").alias("time_agg_list")
@@ -2105,7 +2094,7 @@ module Polars
2105
2094
  # # └─────────────────────┴────────────┴───────────────────────────────────┘
2106
2095
  #
2107
2096
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
2108
- # df.groupby_dynamic("time", every: "1h", closed: "both").agg(
2097
+ # df.group_by_dynamic("time", every: "1h", closed: "both").agg(
2109
2098
  # [Polars.col("time").count.alias("time_count")]
2110
2099
  # )
2111
2100
  # # =>
@@ -2122,7 +2111,7 @@ module Polars
2122
2111
  # # │ 2021-12-16 03:00:00 ┆ 1 │
2123
2112
  # # └─────────────────────┴────────────┘
2124
2113
  #
2125
- # @example Dynamic groupbys can also be combined with grouping on normal keys.
2114
+ # @example Dynamic group bys can also be combined with grouping on normal keys.
2126
2115
  # df = Polars::DataFrame.new(
2127
2116
  # {
2128
2117
  # "time" => Polars.date_range(
@@ -2133,7 +2122,7 @@ module Polars
2133
2122
  # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
2134
2123
  # }
2135
2124
  # )
2136
- # df.groupby_dynamic(
2125
+ # df.group_by_dynamic(
2137
2126
  # "time",
2138
2127
  # every: "1h",
2139
2128
  # closed: "both",
@@ -2156,14 +2145,14 @@ module Polars
2156
2145
  # # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
2157
2146
  # # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
2158
2147
  #
2159
- # @example Dynamic groupby on an index column.
2148
+ # @example Dynamic group by on an index column.
2160
2149
  # df = Polars::DataFrame.new(
2161
2150
  # {
2162
2151
  # "idx" => Polars.arange(0, 6, eager: true),
2163
2152
  # "A" => ["A", "A", "B", "B", "B", "C"]
2164
2153
  # }
2165
2154
  # )
2166
- # df.groupby_dynamic(
2155
+ # df.group_by_dynamic(
2167
2156
  # "idx",
2168
2157
  # every: "2i",
2169
2158
  # period: "3i",
@@ -2181,7 +2170,7 @@ module Polars
2181
2170
  # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
2182
2171
  # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
2183
2172
  # # └─────────────────┴─────────────────┴─────┴─────────────────┘
2184
- def groupby_dynamic(
2173
+ def group_by_dynamic(
2185
2174
  index_column,
2186
2175
  every:,
2187
2176
  period: nil,
@@ -2205,6 +2194,7 @@ module Polars
2205
2194
  start_by
2206
2195
  )
2207
2196
  end
2197
+ alias_method :groupby_dynamic, :group_by_dynamic
2208
2198
 
2209
2199
  # Upsample a DataFrame at a regular frequency.
2210
2200
  #
@@ -2281,7 +2271,7 @@ module Polars
2281
2271
  if by.nil?
2282
2272
  by = []
2283
2273
  end
2284
- if by.is_a?(String)
2274
+ if by.is_a?(::String)
2285
2275
  by = [by]
2286
2276
  end
2287
2277
  if offset.nil?
@@ -2475,17 +2465,17 @@ module Polars
2475
2465
  # @example
2476
2466
  # df.join(other_df, on: "ham", how: "outer")
2477
2467
  # # =>
2478
- # # shape: (4, 4)
2479
- # # ┌──────┬──────┬─────┬───────┐
2480
- # # │ foo ┆ bar ┆ ham ┆ apple │
2481
- # # │ --- ┆ --- ┆ --- ┆ ---
2482
- # # │ i64 ┆ f64 ┆ str ┆ str
2483
- # # ╞══════╪══════╪═════╪═══════╡
2484
- # # │ 1 ┆ 6.0 ┆ a ┆ x │
2485
- # # │ 2 ┆ 7.0 ┆ b ┆ y │
2486
- # # │ null ┆ null ┆ d ┆ z │
2487
- # # │ 3 ┆ 8.0 ┆ c ┆ null │
2488
- # # └──────┴──────┴─────┴───────┘
2468
+ # # shape: (4, 5)
2469
+ # # ┌──────┬──────┬──────┬───────┬───────────┐
2470
+ # # │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right
2471
+ # # │ --- ┆ --- ┆ --- --- ┆ ---
2472
+ # # │ i64 ┆ f64 ┆ str str ┆ str
2473
+ # # ╞══════╪══════╪══════╪═══════╪═══════════╡
2474
+ # # │ 1 ┆ 6.0 ┆ a ┆ x ┆ a
2475
+ # # │ 2 ┆ 7.0 ┆ b ┆ y ┆ b
2476
+ # # │ null ┆ null ┆ null ┆ z ┆ d
2477
+ # # │ 3 ┆ 8.0 ┆ c ┆ null ┆ null
2478
+ # # └──────┴──────┴──────┴───────┴───────────┘
2489
2479
  #
2490
2480
  # @example
2491
2481
  # df.join(other_df, on: "ham", how: "left")
@@ -3125,17 +3115,17 @@ module Polars
3125
3115
  sort_columns: false,
3126
3116
  separator: "_"
3127
3117
  )
3128
- if values.is_a?(String)
3118
+ if values.is_a?(::String)
3129
3119
  values = [values]
3130
3120
  end
3131
- if index.is_a?(String)
3121
+ if index.is_a?(::String)
3132
3122
  index = [index]
3133
3123
  end
3134
- if columns.is_a?(String)
3124
+ if columns.is_a?(::String)
3135
3125
  columns = [columns]
3136
3126
  end
3137
3127
 
3138
- if aggregate_fn.is_a?(String)
3128
+ if aggregate_fn.is_a?(::String)
3139
3129
  case aggregate_fn
3140
3130
  when "first"
3141
3131
  aggregate_expr = Polars.element.first._rbexpr
@@ -3220,10 +3210,10 @@ module Polars
3220
3210
  # # │ z ┆ c ┆ 6 │
3221
3211
  # # └─────┴──────────┴───────┘
3222
3212
  def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
3223
- if value_vars.is_a?(String)
3213
+ if value_vars.is_a?(::String)
3224
3214
  value_vars = [value_vars]
3225
3215
  end
3226
- if id_vars.is_a?(String)
3216
+ if id_vars.is_a?(::String)
3227
3217
  id_vars = [id_vars]
3228
3218
  end
3229
3219
  if value_vars.nil?
@@ -3437,7 +3427,7 @@ module Polars
3437
3427
  # # │ C ┆ 2 ┆ l │
3438
3428
  # # └─────┴─────┴─────┘}
3439
3429
  def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
3440
- if groups.is_a?(String)
3430
+ if groups.is_a?(::String)
3441
3431
  groups = [groups]
3442
3432
  elsif !groups.is_a?(::Array)
3443
3433
  groups = Array(groups)
@@ -3464,8 +3454,10 @@ module Polars
3464
3454
 
3465
3455
  # Shift values by the given period.
3466
3456
  #
3467
- # @param periods [Integer]
3457
+ # @param n [Integer]
3468
3458
  # Number of places to shift (may be negative).
3459
+ # @param fill_value [Object]
3460
+ # Fill the resulting null values with this value.
3469
3461
  #
3470
3462
  # @return [DataFrame]
3471
3463
  #
@@ -3503,8 +3495,8 @@ module Polars
3503
3495
  # # │ 3 ┆ 8 ┆ c │
3504
3496
  # # │ null ┆ null ┆ null │
3505
3497
  # # └──────┴──────┴──────┘
3506
- def shift(periods)
3507
- _from_rbdf(_df.shift(periods))
3498
+ def shift(n, fill_value: nil)
3499
+ lazy.shift(n, fill_value: fill_value).collect(_eager: true)
3508
3500
  end
3509
3501
 
3510
3502
  # Shift the values by a given period and fill the resulting null values.
@@ -3537,9 +3529,7 @@ module Polars
3537
3529
  # # │ 2 ┆ 7 ┆ b │
3538
3530
  # # └─────┴─────┴─────┘
3539
3531
  def shift_and_fill(periods, fill_value)
3540
- lazy
3541
- .shift_and_fill(periods, fill_value)
3542
- .collect(no_optimization: true, string_cache: false)
3532
+ shift(periods, fill_value: fill_value)
3543
3533
  end
3544
3534
 
3545
3535
  # Get a mask of all duplicated rows in this DataFrame.
@@ -3788,9 +3778,9 @@ module Polars
3788
3778
  # # └─────┴─────┴─────┘
3789
3779
  def max(axis: 0)
3790
3780
  if axis == 0
3791
- _from_rbdf(_df.max)
3781
+ lazy.max.collect(_eager: true)
3792
3782
  elsif axis == 1
3793
- Utils.wrap_s(_df.hmax)
3783
+ Utils.wrap_s(_df.max_horizontal)
3794
3784
  else
3795
3785
  raise ArgumentError, "Axis should be 0 or 1."
3796
3786
  end
@@ -3820,9 +3810,9 @@ module Polars
3820
3810
  # # └─────┴─────┴─────┘
3821
3811
  def min(axis: 0)
3822
3812
  if axis == 0
3823
- _from_rbdf(_df.min)
3813
+ lazy.min.collect(_eager: true)
3824
3814
  elsif axis == 1
3825
- Utils.wrap_s(_df.hmin)
3815
+ Utils.wrap_s(_df.min_horizontal)
3826
3816
  else
3827
3817
  raise ArgumentError, "Axis should be 0 or 1."
3828
3818
  end
@@ -3869,9 +3859,9 @@ module Polars
3869
3859
  def sum(axis: 0, null_strategy: "ignore")
3870
3860
  case axis
3871
3861
  when 0
3872
- _from_rbdf(_df.sum)
3862
+ lazy.sum.collect(_eager: true)
3873
3863
  when 1
3874
- Utils.wrap_s(_df.hsum(null_strategy))
3864
+ Utils.wrap_s(_df.sum_horizontal(null_strategy))
3875
3865
  else
3876
3866
  raise ArgumentError, "Axis should be 0 or 1."
3877
3867
  end
@@ -3907,9 +3897,9 @@ module Polars
3907
3897
  def mean(axis: 0, null_strategy: "ignore")
3908
3898
  case axis
3909
3899
  when 0
3910
- _from_rbdf(_df.mean)
3900
+ lazy.mean.collect(_eager: true)
3911
3901
  when 1
3912
- Utils.wrap_s(_df.hmean(null_strategy))
3902
+ Utils.wrap_s(_df.mean_horizontal(null_strategy))
3913
3903
  else
3914
3904
  raise ArgumentError, "Axis should be 0 or 1."
3915
3905
  end
@@ -3953,7 +3943,7 @@ module Polars
3953
3943
  # # │ 0.816497 ┆ 0.816497 ┆ null │
3954
3944
  # # └──────────┴──────────┴──────┘
3955
3945
  def std(ddof: 1)
3956
- _from_rbdf(_df.std(ddof))
3946
+ lazy.std(ddof: ddof).collect(_eager: true)
3957
3947
  end
3958
3948
 
3959
3949
  # Aggregate the columns of this DataFrame to their variance value.
@@ -3994,7 +3984,7 @@ module Polars
3994
3984
  # # │ 0.666667 ┆ 0.666667 ┆ null │
3995
3985
  # # └──────────┴──────────┴──────┘
3996
3986
  def var(ddof: 1)
3997
- _from_rbdf(_df.var(ddof))
3987
+ lazy.var(ddof: ddof).collect(_eager: true)
3998
3988
  end
3999
3989
 
4000
3990
  # Aggregate the columns of this DataFrame to their median value.
@@ -4020,7 +4010,7 @@ module Polars
4020
4010
  # # │ 2.0 ┆ 7.0 ┆ null │
4021
4011
  # # └─────┴─────┴──────┘
4022
4012
  def median
4023
- _from_rbdf(_df.median)
4013
+ lazy.median.collect(_eager: true)
4024
4014
  end
4025
4015
 
4026
4016
  # Aggregate the columns of this DataFrame to their product values.
@@ -4077,7 +4067,7 @@ module Polars
4077
4067
  # # │ 2.0 ┆ 7.0 ┆ null │
4078
4068
  # # └─────┴─────┴──────┘
4079
4069
  def quantile(quantile, interpolation: "nearest")
4080
- _from_rbdf(_df.quantile(quantile, interpolation))
4070
+ lazy.quantile(quantile, interpolation: interpolation).collect(_eager: true)
4081
4071
  end
4082
4072
 
4083
4073
  # Get one hot encoded dummy variables.
@@ -4108,7 +4098,7 @@ module Polars
4108
4098
  # # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
4109
4099
  # # └───────┴───────┴───────┴───────┴───────┴───────┘
4110
4100
  def to_dummies(columns: nil, separator: "_", drop_first: false)
4111
- if columns.is_a?(String)
4101
+ if columns.is_a?(::String)
4112
4102
  columns = [columns]
4113
4103
  end
4114
4104
  _from_rbdf(_df.to_dummies(columns, separator, drop_first))
@@ -4294,15 +4284,20 @@ module Polars
4294
4284
  end
4295
4285
 
4296
4286
  if n.nil? && !frac.nil?
4287
+ frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
4288
+
4297
4289
  _from_rbdf(
4298
- _df.sample_frac(frac, with_replacement, shuffle, seed)
4290
+ _df.sample_frac(frac._s, with_replacement, shuffle, seed)
4299
4291
  )
4300
4292
  end
4301
4293
 
4302
4294
  if n.nil?
4303
4295
  n = 1
4304
4296
  end
4305
- _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
4297
+
4298
+ n = Series.new("", [n]) unless n.is_a?(Series)
4299
+
4300
+ _from_rbdf(_df.sample_n(n._s, with_replacement, shuffle, seed))
4306
4301
  end
4307
4302
 
4308
4303
  # Apply a horizontal reduction on a DataFrame.
@@ -4601,7 +4596,7 @@ module Polars
4601
4596
  #
4602
4597
  # @example
4603
4598
  # s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
4604
- # s.take_every(2)
4599
+ # s.gather_every(2)
4605
4600
  # # =>
4606
4601
  # # shape: (2, 2)
4607
4602
  # # ┌─────┬─────┐
@@ -4612,9 +4607,10 @@ module Polars
4612
4607
  # # │ 1 ┆ 5 │
4613
4608
  # # │ 3 ┆ 7 │
4614
4609
  # # └─────┴─────┘
4615
- def take_every(n)
4616
- select(Utils.col("*").take_every(n))
4610
+ def gather_every(n, offset = 0)
4611
+ select(Utils.col("*").gather_every(n, offset))
4617
4612
  end
4613
+ alias_method :take_every, :gather_every
4618
4614
 
4619
4615
  # Hash and combine the rows in this DataFrame.
4620
4616
  #
@@ -4671,16 +4667,16 @@ module Polars
4671
4667
  # df.interpolate
4672
4668
  # # =>
4673
4669
  # # shape: (4, 3)
4674
- # # ┌─────┬──────┬─────┐
4675
- # # │ foo ┆ bar ┆ baz
4676
- # # │ --- ┆ --- ┆ ---
4677
- # # │ i64 i64i64
4678
- # # ╞═════╪══════╪═════╡
4679
- # # │ 1 ┆ 6 ┆ 1
4680
- # # │ 5 ┆ 7 ┆ 3
4681
- # # │ 9 ┆ 9 ┆ 6
4682
- # # │ 10 ┆ null ┆ 9
4683
- # # └─────┴──────┴─────┘
4670
+ # # ┌──────┬──────┬──────────┐
4671
+ # # │ foo ┆ bar ┆ baz
4672
+ # # │ --- ┆ --- ┆ ---
4673
+ # # │ f64 f64f64
4674
+ # # ╞══════╪══════╪══════════╡
4675
+ # # │ 1.0 ┆ 6.0 ┆ 1.0
4676
+ # # │ 5.0 ┆ 7.0 ┆ 3.666667
4677
+ # # │ 9.0 ┆ 9.0 ┆ 6.333333
4678
+ # # │ 10.0 ┆ null ┆ 9.0
4679
+ # # └──────┴──────┴──────────┘
4684
4680
  def interpolate
4685
4681
  select(Utils.col("*").interpolate)
4686
4682
  end
@@ -4762,7 +4758,7 @@ module Polars
4762
4758
  # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
4763
4759
  # # └────────┴─────┴─────┴──────┴───────────┴───────┘
4764
4760
  def unnest(names)
4765
- if names.is_a?(String)
4761
+ if names.is_a?(::String)
4766
4762
  names = [names]
4767
4763
  end
4768
4764
  _from_rbdf(_df.unnest(names))
@@ -4875,10 +4871,10 @@ module Polars
4875
4871
  if val.is_a?(Hash) && dtype != Struct
4876
4872
  updated_data[name] = DataFrame.new(val).to_struct(name)
4877
4873
  elsif !Utils.arrlen(val).nil?
4878
- updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
4879
- elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
4874
+ updated_data[name] = Series.new(::String.new(name), val, dtype: dtype)
4875
+ elsif val.nil? || [Integer, Float, TrueClass, FalseClass, ::String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
4880
4876
  dtype = Polars::Float64 if val.nil? && dtype.nil?
4881
- updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
4877
+ updated_data[name] = Series.new(::String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
4882
4878
  else
4883
4879
  raise Todo
4884
4880
  end
@@ -4935,7 +4931,7 @@ module Polars
4935
4931
  end
4936
4932
  column_names =
4937
4933
  (schema || []).map.with_index do |col, i|
4938
- if col.is_a?(String)
4934
+ if col.is_a?(::String)
4939
4935
  col || "column_#{i}"
4940
4936
  else
4941
4937
  col[0]
@@ -4948,12 +4944,12 @@ module Polars
4948
4944
  lookup = column_names.zip(lookup_names || []).to_h
4949
4945
 
4950
4946
  column_dtypes =
4951
- (schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4947
+ (schema || []).select { |col| !col.is_a?(::String) && col[1] }.to_h do |col|
4952
4948
  [lookup[col[0]] || col[0], col[1]]
4953
4949
  end
4954
4950
 
4955
- if schema_overrides
4956
- raise Todo
4951
+ if schema_overrides && schema_overrides.any?
4952
+ column_dtypes.merge!(schema_overrides)
4957
4953
  end
4958
4954
 
4959
4955
  column_dtypes.each do |col, dtype|
@@ -5056,13 +5052,54 @@ module Polars
5056
5052
  return rbdf
5057
5053
  elsif data[0].is_a?(::Array)
5058
5054
  if orient.nil? && !columns.nil?
5059
- orient = columns.length == data.length ? "col" : "row"
5055
+ first_element = data[0]
5056
+ row_types = first_element.filter_map { |value| value.class }.uniq
5057
+ if row_types.include?(Integer) && row_types.include?(Float)
5058
+ row_types.delete(Integer)
5059
+ end
5060
+ orient = row_types.length == 1 ? "col" : "row"
5060
5061
  end
5061
5062
 
5062
5063
  if orient == "row"
5063
- raise Todo
5064
+ column_names, schema_overrides = _unpack_schema(
5065
+ schema, schema_overrides: schema_overrides, n_expected: first_element.length
5066
+ )
5067
+ local_schema_override = (
5068
+ schema_overrides.any? ? (raise Todo) : {}
5069
+ )
5070
+ if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
5071
+ raise ArgumentError, "the row data does not match the number of columns"
5072
+ end
5073
+
5074
+ unpack_nested = false
5075
+ local_schema_override.each do |col, tp|
5076
+ raise Todo
5077
+ end
5078
+
5079
+ if unpack_nested
5080
+ raise Todo
5081
+ else
5082
+ rbdf = RbDataFrame.read_rows(
5083
+ data,
5084
+ infer_schema_length,
5085
+ local_schema_override.any? ? local_schema_override : nil
5086
+ )
5087
+ end
5088
+ if column_names.any? || schema_overrides.any?
5089
+ rbdf = _post_apply_columns(
5090
+ rbdf, column_names, schema_overrides: schema_overrides
5091
+ )
5092
+ end
5093
+ return rbdf
5064
5094
  elsif orient == "col" || orient.nil?
5065
- raise Todo
5095
+ column_names, schema_overrides = _unpack_schema(
5096
+ schema, schema_overrides: schema_overrides, n_expected: data.length
5097
+ )
5098
+ data_series =
5099
+ data.map.with_index do |element, i|
5100
+ Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]])._s
5101
+ end
5102
+ return RbDataFrame.new(data_series)
5066
5103
  else
5067
5104
  raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
5068
5105
  end
@@ -5108,10 +5145,10 @@ module Polars
5108
5145
 
5109
5146
  def _compare_to_other_df(other, op)
5110
5147
  if columns != other.columns
5111
- raise ArgmentError, "DataFrame columns do not match"
5148
+ raise ArgumentError, "DataFrame columns do not match"
5112
5149
  end
5113
5150
  if shape != other.shape
5114
- raise ArgmentError, "DataFrame dimensions do not match"
5151
+ raise ArgumentError, "DataFrame dimensions do not match"
5115
5152
  end
5116
5153
 
5117
5154
  suffix = "__POLARS_CMP_OTHER"