polars-df 0.9.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/Cargo.lock +144 -57
  4. data/README.md +7 -6
  5. data/ext/polars/Cargo.toml +10 -6
  6. data/ext/polars/src/batched_csv.rs +53 -50
  7. data/ext/polars/src/conversion/anyvalue.rs +3 -2
  8. data/ext/polars/src/conversion/mod.rs +31 -67
  9. data/ext/polars/src/dataframe/construction.rs +186 -0
  10. data/ext/polars/src/dataframe/export.rs +48 -0
  11. data/ext/polars/src/dataframe/general.rs +607 -0
  12. data/ext/polars/src/dataframe/io.rs +463 -0
  13. data/ext/polars/src/dataframe/mod.rs +26 -0
  14. data/ext/polars/src/expr/array.rs +6 -2
  15. data/ext/polars/src/expr/datetime.rs +13 -4
  16. data/ext/polars/src/expr/general.rs +50 -9
  17. data/ext/polars/src/expr/list.rs +6 -2
  18. data/ext/polars/src/expr/rolling.rs +185 -69
  19. data/ext/polars/src/expr/string.rs +12 -33
  20. data/ext/polars/src/file.rs +158 -11
  21. data/ext/polars/src/functions/lazy.rs +20 -3
  22. data/ext/polars/src/functions/range.rs +74 -0
  23. data/ext/polars/src/functions/whenthen.rs +47 -17
  24. data/ext/polars/src/interop/mod.rs +1 -0
  25. data/ext/polars/src/interop/numo/mod.rs +2 -0
  26. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  27. data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
  28. data/ext/polars/src/lazyframe/mod.rs +111 -56
  29. data/ext/polars/src/lib.rs +68 -34
  30. data/ext/polars/src/map/dataframe.rs +17 -9
  31. data/ext/polars/src/map/lazy.rs +5 -25
  32. data/ext/polars/src/map/series.rs +7 -1
  33. data/ext/polars/src/series/aggregation.rs +47 -30
  34. data/ext/polars/src/series/export.rs +131 -49
  35. data/ext/polars/src/series/mod.rs +13 -133
  36. data/lib/polars/array_expr.rb +6 -2
  37. data/lib/polars/batched_csv_reader.rb +11 -3
  38. data/lib/polars/convert.rb +6 -1
  39. data/lib/polars/data_frame.rb +225 -370
  40. data/lib/polars/date_time_expr.rb +11 -4
  41. data/lib/polars/date_time_name_space.rb +14 -4
  42. data/lib/polars/dynamic_group_by.rb +2 -2
  43. data/lib/polars/exceptions.rb +4 -0
  44. data/lib/polars/expr.rb +1171 -54
  45. data/lib/polars/functions/lazy.rb +3 -3
  46. data/lib/polars/functions/range/date_range.rb +92 -0
  47. data/lib/polars/functions/range/datetime_range.rb +149 -0
  48. data/lib/polars/functions/range/time_range.rb +141 -0
  49. data/lib/polars/functions/whenthen.rb +74 -5
  50. data/lib/polars/group_by.rb +88 -23
  51. data/lib/polars/io/avro.rb +24 -0
  52. data/lib/polars/{io.rb → io/csv.rb} +307 -489
  53. data/lib/polars/io/database.rb +73 -0
  54. data/lib/polars/io/ipc.rb +247 -0
  55. data/lib/polars/io/json.rb +18 -0
  56. data/lib/polars/io/ndjson.rb +69 -0
  57. data/lib/polars/io/parquet.rb +226 -0
  58. data/lib/polars/lazy_frame.rb +55 -195
  59. data/lib/polars/lazy_group_by.rb +100 -3
  60. data/lib/polars/list_expr.rb +6 -2
  61. data/lib/polars/rolling_group_by.rb +2 -2
  62. data/lib/polars/series.rb +14 -12
  63. data/lib/polars/string_expr.rb +38 -36
  64. data/lib/polars/utils.rb +89 -1
  65. data/lib/polars/version.rb +1 -1
  66. data/lib/polars/whenthen.rb +83 -0
  67. data/lib/polars.rb +10 -3
  68. metadata +23 -8
  69. data/ext/polars/src/dataframe.rs +0 -1182
  70. data/lib/polars/when.rb +0 -16
  71. data/lib/polars/when_then.rb +0 -19
@@ -27,145 +27,6 @@ module Polars
27
27
  ldf
28
28
  end
29
29
 
30
- # @private
31
- def self._scan_csv(
32
- file,
33
- has_header: true,
34
- sep: ",",
35
- comment_char: nil,
36
- quote_char: '"',
37
- skip_rows: 0,
38
- dtypes: nil,
39
- null_values: nil,
40
- ignore_errors: false,
41
- cache: true,
42
- with_column_names: nil,
43
- infer_schema_length: 100,
44
- n_rows: nil,
45
- encoding: "utf8",
46
- low_memory: false,
47
- rechunk: true,
48
- skip_rows_after_header: 0,
49
- row_count_name: nil,
50
- row_count_offset: 0,
51
- parse_dates: false,
52
- eol_char: "\n"
53
- )
54
- dtype_list = nil
55
- if !dtypes.nil?
56
- dtype_list = []
57
- dtypes.each do |k, v|
58
- dtype_list << [k, Utils.rb_type_to_dtype(v)]
59
- end
60
- end
61
- processed_null_values = Utils._process_null_values(null_values)
62
-
63
- _from_rbldf(
64
- RbLazyFrame.new_from_csv(
65
- file,
66
- sep,
67
- has_header,
68
- ignore_errors,
69
- skip_rows,
70
- n_rows,
71
- cache,
72
- dtype_list,
73
- low_memory,
74
- comment_char,
75
- quote_char,
76
- processed_null_values,
77
- infer_schema_length,
78
- with_column_names,
79
- rechunk,
80
- skip_rows_after_header,
81
- encoding,
82
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
83
- parse_dates,
84
- eol_char
85
- )
86
- )
87
- end
88
-
89
- # @private
90
- def self._scan_parquet(
91
- file,
92
- n_rows: nil,
93
- cache: true,
94
- parallel: "auto",
95
- rechunk: true,
96
- row_count_name: nil,
97
- row_count_offset: 0,
98
- storage_options: nil,
99
- low_memory: false,
100
- use_statistics: true,
101
- hive_partitioning: true
102
- )
103
- _from_rbldf(
104
- RbLazyFrame.new_from_parquet(
105
- file,
106
- n_rows,
107
- cache,
108
- parallel,
109
- rechunk,
110
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
111
- low_memory,
112
- use_statistics,
113
- hive_partitioning
114
- )
115
- )
116
- end
117
-
118
- # @private
119
- def self._scan_ipc(
120
- file,
121
- n_rows: nil,
122
- cache: true,
123
- rechunk: true,
124
- row_count_name: nil,
125
- row_count_offset: 0,
126
- storage_options: nil,
127
- memory_map: true
128
- )
129
- if Utils.pathlike?(file)
130
- file = Utils.normalise_filepath(file)
131
- end
132
-
133
- _from_rbldf(
134
- RbLazyFrame.new_from_ipc(
135
- file,
136
- n_rows,
137
- cache,
138
- rechunk,
139
- Utils._prepare_row_count_args(row_count_name, row_count_offset),
140
- memory_map
141
- )
142
- )
143
- end
144
-
145
- # @private
146
- def self._scan_ndjson(
147
- file,
148
- infer_schema_length: nil,
149
- batch_size: nil,
150
- n_rows: nil,
151
- low_memory: false,
152
- rechunk: true,
153
- row_count_name: nil,
154
- row_count_offset: 0
155
- )
156
- _from_rbldf(
157
- RbLazyFrame.new_from_ndjson(
158
- file,
159
- infer_schema_length,
160
- batch_size,
161
- n_rows,
162
- low_memory,
163
- rechunk,
164
- Utils._prepare_row_count_args(row_count_name, row_count_offset)
165
- )
166
- )
167
- end
168
-
169
30
  # def self.from_json
170
31
  # end
171
32
 
@@ -177,7 +38,7 @@ module Polars
177
38
  # @return [LazyFrame]
178
39
  def self.read_json(file)
179
40
  if Utils.pathlike?(file)
180
- file = Utils.normalise_filepath(file)
41
+ file = Utils.normalize_filepath(file)
181
42
  end
182
43
 
183
44
  Utils.wrap_ldf(RbLazyFrame.read_json(file))
@@ -284,7 +145,7 @@ module Polars
284
145
  # @return [nil]
285
146
  def write_json(file)
286
147
  if Utils.pathlike?(file)
287
- file = Utils.normalise_filepath(file)
148
+ file = Utils.normalize_filepath(file)
288
149
  end
289
150
  _ldf.write_json(file)
290
151
  nil
@@ -400,16 +261,16 @@ module Polars
400
261
  # # │ 2 ┆ 7.0 ┆ b │
401
262
  # # │ 1 ┆ 6.0 ┆ a │
402
263
  # # └─────┴─────┴─────┘
403
- def sort(by, reverse: false, nulls_last: false, maintain_order: false)
264
+ def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
404
265
  if by.is_a?(::String)
405
- return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order))
266
+ return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded))
406
267
  end
407
268
  if Utils.bool?(reverse)
408
269
  reverse = [reverse]
409
270
  end
410
271
 
411
272
  by = Utils.selection_to_rbexpr_list(by)
412
- _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order))
273
+ _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded))
413
274
  end
414
275
 
415
276
  # def profile
@@ -1133,7 +994,7 @@ module Polars
1133
994
  # # ┌─────────┐
1134
995
  # # │ literal │
1135
996
  # # │ --- │
1136
- # # │ i64
997
+ # # │ i32
1137
998
  # # ╞═════════╡
1138
999
  # # │ 0 │
1139
1000
  # # │ 0 │
@@ -1445,16 +1306,16 @@ module Polars
1445
1306
  # )
1446
1307
  # # =>
1447
1308
  # # shape: (4, 3)
1448
- # # ┌─────────────────────┬────────────┬───────────────────────────────────┐
1449
- # # │ time ┆ time_count ┆ time_agg_list
1450
- # # │ --- ┆ --- ┆ ---
1451
- # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
1452
- # # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
1453
- # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16…
1454
- # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16…
1455
- # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16…
1456
- # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
1457
- # # └─────────────────────┴────────────┴───────────────────────────────────┘
1309
+ # # ┌─────────────────────┬────────────┬─────────────────────────────────┐
1310
+ # # │ time ┆ time_count ┆ time_agg_list
1311
+ # # │ --- ┆ --- ┆ ---
1312
+ # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
1313
+ # # ╞═════════════════════╪════════════╪═════════════════════════════════╡
1314
+ # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-…
1315
+ # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-…
1316
+ # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-…
1317
+ # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
1318
+ # # └─────────────────────┴────────────┴─────────────────────────────────┘
1458
1319
  #
1459
1320
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
1460
1321
  # df.group_by_dynamic("time", every: "1h", closed: "both").agg(
@@ -1523,12 +1384,13 @@ module Polars
1523
1384
  # closed: "right"
1524
1385
  # ).agg(Polars.col("A").alias("A_agg_list"))
1525
1386
  # # =>
1526
- # # shape: (3, 4)
1387
+ # # shape: (4, 4)
1527
1388
  # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
1528
1389
  # # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
1529
1390
  # # │ --- ┆ --- ┆ --- ┆ --- │
1530
1391
  # # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
1531
1392
  # # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
1393
+ # # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
1532
1394
  # # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
1533
1395
  # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
1534
1396
  # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
@@ -1837,7 +1699,7 @@ module Polars
1837
1699
  if how == "cross"
1838
1700
  return _from_rbldf(
1839
1701
  _ldf.join(
1840
- other._ldf, [], [], allow_parallel, force_parallel, how, suffix
1702
+ other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
1841
1703
  )
1842
1704
  )
1843
1705
  end
@@ -1891,16 +1753,16 @@ module Polars
1891
1753
  # ).collect
1892
1754
  # # =>
1893
1755
  # # shape: (4, 6)
1894
- # # ┌─────┬──────┬───────┬──────┬──────┬───────┐
1895
- # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
1896
- # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1897
- # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
1898
- # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
1899
- # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
1900
- # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
1901
- # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
1902
- # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
1903
- # # └─────┴──────┴───────┴──────┴──────┴───────┘
1756
+ # # ┌─────┬──────┬───────┬─────┬──────┬───────┐
1757
+ # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
1758
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1759
+ # # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
1760
+ # # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
1761
+ # # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
1762
+ # # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
1763
+ # # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
1764
+ # # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
1765
+ # # └─────┴──────┴───────┴─────┴──────┴───────┘
1904
1766
  def with_columns(*exprs, **named_exprs)
1905
1767
  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
1906
1768
  rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify)
@@ -1965,26 +1827,26 @@ module Polars
1965
1827
  # # ┌─────┬─────┬───────────┐
1966
1828
  # # │ a ┆ b ┆ b_squared │
1967
1829
  # # │ --- ┆ --- ┆ --- │
1968
- # # │ i64 ┆ i64 ┆ f64
1830
+ # # │ i64 ┆ i64 ┆ i64
1969
1831
  # # ╞═════╪═════╪═══════════╡
1970
- # # │ 1 ┆ 2 ┆ 4.0
1971
- # # │ 3 ┆ 4 ┆ 16.0
1972
- # # │ 5 ┆ 6 ┆ 36.0
1832
+ # # │ 1 ┆ 2 ┆ 4
1833
+ # # │ 3 ┆ 4 ┆ 16
1834
+ # # │ 5 ┆ 6 ┆ 36
1973
1835
  # # └─────┴─────┴───────────┘
1974
1836
  #
1975
1837
  # @example
1976
1838
  # df.with_column(Polars.col("a") ** 2).collect
1977
1839
  # # =>
1978
1840
  # # shape: (3, 2)
1979
- # # ┌──────┬─────┐
1980
- # # │ a ┆ b │
1981
- # # │ --- ┆ --- │
1982
- # # │ f64 ┆ i64 │
1983
- # # ╞══════╪═════╡
1984
- # # │ 1.0 ┆ 2 │
1985
- # # │ 9.0 ┆ 4 │
1986
- # # │ 25.0 ┆ 6 │
1987
- # # └──────┴─────┘
1841
+ # # ┌─────┬─────┐
1842
+ # # │ a ┆ b │
1843
+ # # │ --- ┆ --- │
1844
+ # # │ i64 ┆ i64 │
1845
+ # # ╞═════╪═════╡
1846
+ # # │ 1 ┆ 2 │
1847
+ # # │ 9 ┆ 4 │
1848
+ # # │ 25 ┆ 6 │
1849
+ # # └─────┴─────┘
1988
1850
  def with_column(column)
1989
1851
  with_columns([column])
1990
1852
  end
@@ -1996,11 +1858,9 @@ module Polars
1996
1858
  # - List of column names.
1997
1859
  #
1998
1860
  # @return [LazyFrame]
1999
- def drop(columns)
2000
- if columns.is_a?(::String)
2001
- columns = [columns]
2002
- end
2003
- _from_rbldf(_ldf.drop(columns))
1861
+ def drop(*columns)
1862
+ drop_cols = Utils._expand_selectors(self, *columns)
1863
+ _from_rbldf(_ldf.drop(drop_cols))
2004
1864
  end
2005
1865
 
2006
1866
  # Rename column names.
@@ -2233,16 +2093,16 @@ module Polars
2233
2093
  # df.with_row_index.collect
2234
2094
  # # =>
2235
2095
  # # shape: (3, 3)
2236
- # # ┌────────┬─────┬─────┐
2237
- # # │ row_nr ┆ a ┆ b │
2238
- # # │ --- ┆ --- ┆ --- │
2239
- # # │ u32 ┆ i64 ┆ i64 │
2240
- # # ╞════════╪═════╪═════╡
2241
- # # │ 0 ┆ 1 ┆ 2 │
2242
- # # │ 1 ┆ 3 ┆ 4 │
2243
- # # │ 2 ┆ 5 ┆ 6 │
2244
- # # └────────┴─────┴─────┘
2245
- def with_row_index(name: "row_nr", offset: 0)
2096
+ # # ┌───────┬─────┬─────┐
2097
+ # # │ index ┆ a ┆ b │
2098
+ # # │ --- ┆ --- ┆ --- │
2099
+ # # │ u32 ┆ i64 ┆ i64 │
2100
+ # # ╞═══════╪═════╪═════╡
2101
+ # # │ 0 ┆ 1 ┆ 2 │
2102
+ # # │ 1 ┆ 3 ┆ 4 │
2103
+ # # │ 2 ┆ 5 ┆ 6 │
2104
+ # # └───────┴─────┴─────┘
2105
+ def with_row_index(name: "index", offset: 0)
2246
2106
  _from_rbldf(_ldf.with_row_index(name, offset))
2247
2107
  end
2248
2108
  alias_method :with_row_count, :with_row_index
@@ -6,11 +6,108 @@ module Polars
6
6
  @lgb = lgb
7
7
  end
8
8
 
9
- # Describe the aggregation that need to be done on a group.
9
+ # Compute aggregations for each group of a group by operation.
10
+ #
11
+ # @param aggs [Array]
12
+ # Aggregations to compute for each group of the group by operation,
13
+ # specified as positional arguments.
14
+ # Accepts expression input. Strings are parsed as column names.
15
+ # @param named_aggs [Hash]
16
+ # Additional aggregations, specified as keyword arguments.
17
+ # The resulting columns will be renamed to the keyword used.
10
18
  #
11
19
  # @return [LazyFrame]
12
- def agg(aggs)
13
- rbexprs = Utils.selection_to_rbexpr_list(aggs)
20
+ #
21
+ # @example Compute the aggregation of the columns for each group.
22
+ # ldf = Polars::DataFrame.new(
23
+ # {
24
+ # "a" => ["a", "b", "a", "b", "c"],
25
+ # "b" => [1, 2, 1, 3, 3],
26
+ # "c" => [5, 4, 3, 2, 1]
27
+ # }
28
+ # ).lazy
29
+ # ldf.group_by("a").agg(
30
+ # [Polars.col("b"), Polars.col("c")]
31
+ # ).collect
32
+ # # =>
33
+ # # shape: (3, 3)
34
+ # # ┌─────┬───────────┬───────────┐
35
+ # # │ a ┆ b ┆ c │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ str ┆ list[i64] ┆ list[i64] │
38
+ # # ╞═════╪═══════════╪═══════════╡
39
+ # # │ a ┆ [1, 1] ┆ [5, 3] │
40
+ # # │ b ┆ [2, 3] ┆ [4, 2] │
41
+ # # │ c ┆ [3] ┆ [1] │
42
+ # # └─────┴───────────┴───────────┘
43
+ #
44
+ # @example Compute the sum of a column for each group.
45
+ # ldf.group_by("a").agg(
46
+ # Polars.col("b").sum
47
+ # ).collect
48
+ # # =>
49
+ # # shape: (3, 2)
50
+ # # ┌─────┬─────┐
51
+ # # │ a ┆ b │
52
+ # # │ --- ┆ --- │
53
+ # # │ str ┆ i64 │
54
+ # # ╞═════╪═════╡
55
+ # # │ a ┆ 2 │
56
+ # # │ b ┆ 5 │
57
+ # # │ c ┆ 3 │
58
+ # # └─────┴─────┘
59
+ #
60
+ # @example Compute multiple aggregates at once by passing a list of expressions.
61
+ # ldf.group_by("a").agg(
62
+ # [Polars.sum("b"), Polars.mean("c")]
63
+ # ).collect
64
+ # # =>
65
+ # # shape: (3, 3)
66
+ # # ┌─────┬─────┬─────┐
67
+ # # │ a ┆ b ┆ c │
68
+ # # │ --- ┆ --- ┆ --- │
69
+ # # │ str ┆ i64 ┆ f64 │
70
+ # # ╞═════╪═════╪═════╡
71
+ # # │ c ┆ 3 ┆ 1.0 │
72
+ # # │ a ┆ 2 ┆ 4.0 │
73
+ # # │ b ┆ 5 ┆ 3.0 │
74
+ # # └─────┴─────┴─────┘
75
+ #
76
+ # @example Or use positional arguments to compute multiple aggregations in the same way.
77
+ # ldf.group_by("a").agg(
78
+ # Polars.sum("b").name.suffix("_sum"),
79
+ # (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
80
+ # ).collect
81
+ # # =>
82
+ # # shape: (3, 3)
83
+ # # ┌─────┬───────┬────────────────┐
84
+ # # │ a ┆ b_sum ┆ c_mean_squared │
85
+ # # │ --- ┆ --- ┆ --- │
86
+ # # │ str ┆ i64 ┆ f64 │
87
+ # # ╞═════╪═══════╪════════════════╡
88
+ # # │ a ┆ 2 ┆ 17.0 │
89
+ # # │ c ┆ 3 ┆ 1.0 │
90
+ # # │ b ┆ 5 ┆ 10.0 │
91
+ # # └─────┴───────┴────────────────┘
92
+ #
93
+ # @example Use keyword arguments to easily name your expression inputs.
94
+ # ldf.group_by("a").agg(
95
+ # b_sum: Polars.sum("b"),
96
+ # c_mean_squared: (Polars.col("c") ** 2).mean
97
+ # ).collect
98
+ # # =>
99
+ # # shape: (3, 3)
100
+ # # ┌─────┬───────┬────────────────┐
101
+ # # │ a ┆ b_sum ┆ c_mean_squared │
102
+ # # │ --- ┆ --- ┆ --- │
103
+ # # │ str ┆ i64 ┆ f64 │
104
+ # # ╞═════╪═══════╪════════════════╡
105
+ # # │ a ┆ 2 ┆ 17.0 │
106
+ # # │ c ┆ 3 ┆ 1.0 │
107
+ # # │ b ┆ 5 ┆ 10.0 │
108
+ # # └─────┴───────┴────────────────┘
109
+ def agg(*aggs, **named_aggs)
110
+ rbexprs = Utils.parse_as_list_of_expressions(*aggs, **named_aggs)
14
111
  Utils.wrap_ldf(@lgb.agg(rbexprs))
15
112
  end
16
113
 
@@ -365,6 +365,10 @@ module Polars
365
365
  #
366
366
  # @param index [Integer]
367
367
  # Index to return per sublist
368
+ # @param null_on_oob [Boolean]
369
+ # Behavior if an index is out of bounds:
370
+ # true -> set as null
371
+ # false -> raise an error
368
372
  #
369
373
  # @return [Expr]
370
374
  #
@@ -382,9 +386,9 @@ module Polars
382
386
  # # │ null │
383
387
  # # │ 1 │
384
388
  # # └──────┘
385
- def get(index)
389
+ def get(index, null_on_oob: true)
386
390
  index = Utils.parse_as_expression(index)
387
- Utils.wrap_expr(_rbexpr.list_get(index))
391
+ Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
388
392
  end
389
393
 
390
394
  # Get the value by index in the sublists.
@@ -25,12 +25,12 @@ module Polars
25
25
  @check_sorted = check_sorted
26
26
  end
27
27
 
28
- def agg(aggs)
28
+ def agg(*aggs, **named_aggs)
29
29
  @df.lazy
30
30
  .group_by_rolling(
31
31
  index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
32
32
  )
33
- .agg(aggs)
33
+ .agg(*aggs, **named_aggs)
34
34
  .collect(no_optimization: true, string_cache: false)
35
35
  end
36
36
  end
data/lib/polars/series.rb CHANGED
@@ -1155,13 +1155,13 @@ module Polars
1155
1155
  # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2)
1156
1156
  # # =>
1157
1157
  # # shape: (5,)
1158
- # # Series: 'values' [f64]
1158
+ # # Series: 'values' [i64]
1159
1159
  # # [
1160
- # # 0.0
1161
- # # -3.0
1162
- # # -8.0
1163
- # # -15.0
1164
- # # -24.0
1160
+ # # 0
1161
+ # # -3
1162
+ # # -8
1163
+ # # -15
1164
+ # # -24
1165
1165
  # # ]
1166
1166
  def cumulative_eval(expr, min_periods: 1, parallel: false)
1167
1167
  super
@@ -1567,12 +1567,12 @@ module Polars
1567
1567
  # # 2
1568
1568
  # # 1
1569
1569
  # # ]
1570
- def sort(reverse: false, nulls_last: false, in_place: false)
1570
+ def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false)
1571
1571
  if in_place
1572
- self._s = _s.sort(reverse, nulls_last)
1572
+ self._s = _s.sort(reverse, nulls_last, multithreaded)
1573
1573
  self
1574
1574
  else
1575
- Utils.wrap_s(_s.sort(reverse, nulls_last))
1575
+ Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded))
1576
1576
  end
1577
1577
  end
1578
1578
 
@@ -1594,7 +1594,7 @@ module Polars
1594
1594
  # # 4
1595
1595
  # # 3
1596
1596
  # # ]
1597
- def top_k(k: 5)
1597
+ def top_k(k: 5, nulls_last: false, multithreaded: true)
1598
1598
  super
1599
1599
  end
1600
1600
 
@@ -1616,7 +1616,7 @@ module Polars
1616
1616
  # # 2
1617
1617
  # # 3
1618
1618
  # # ]
1619
- def bottom_k(k: 5)
1619
+ def bottom_k(k: 5, nulls_last: false, multithreaded: true)
1620
1620
  super
1621
1621
  end
1622
1622
 
@@ -3646,6 +3646,8 @@ module Polars
3646
3646
  # on the order that the values occur in the Series.
3647
3647
  # @param reverse [Boolean]
3648
3648
  # Reverse the operation.
3649
+ # @param seed [Integer]
3650
+ # If `method: "random"`, use this as seed.
3649
3651
  #
3650
3652
  # @return [Series]
3651
3653
  #
@@ -3676,7 +3678,7 @@ module Polars
3676
3678
  # # 2
3677
3679
  # # 5
3678
3680
  # # ]
3679
- def rank(method: "average", reverse: false)
3681
+ def rank(method: "average", reverse: false, seed: nil)
3680
3682
  super
3681
3683
  end
3682
3684