polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -3,7 +3,23 @@ module Polars
3
3
  # Return an expression representing a literal value.
4
4
  #
5
5
  # @return [Expr]
6
- def lit(value, dtype: nil, allow_object: nil)
6
+ #
7
+ # @example Literal scalar values:
8
+ # Polars.lit(1)
9
+ # Polars.lit(5.5)
10
+ # Polars.lit(nil)
11
+ # Polars.lit("foo_bar")
12
+ # Polars.lit(Date.new(2021, 1, 20))
13
+ # Polars.lit(DateTime.new(2023, 3, 31, 10, 30, 45))
14
+ #
15
+ # @example Literal list/Series data (1D):
16
+ # Polars.lit([1, 2, 3])
17
+ # Polars.lit(Polars::Series.new("x", [1, 2, 3]))
18
+ #
19
+ # @example Literal list/Series data (2D):
20
+ # Polars.lit([[1, 2], [3, 4]])
21
+ # Polars.lit(Polars::Series.new("y", [[1, 2], [3, 4]]))
22
+ def lit(value, dtype: nil, allow_object: false)
7
23
  if value.is_a?(::Time) || value.is_a?(::DateTime)
8
24
  time_unit = dtype&.time_unit || "ns"
9
25
  time_zone = dtype.&time_zone
@@ -12,7 +12,7 @@ module Polars
12
12
  # @param step [Integer]
13
13
  # Step size of the range.
14
14
  # @param eager [Boolean]
15
- # If eager evaluation is `True`, a Series is returned instead of an Expr.
15
+ # If eager evaluation is `true`, a Series is returned instead of an Expr.
16
16
  # @param dtype [Symbol]
17
17
  # Apply an explicit integer dtype to the resulting expression (default is `Int64`).
18
18
  #
@@ -28,7 +28,7 @@ module Polars
28
28
  # # 1
29
29
  # # 2
30
30
  # # ]
31
- def int_range(start, stop = nil, step: 1, eager: false, dtype: nil)
31
+ def int_range(start = 0, stop = nil, step: 1, eager: false, dtype: Int64)
32
32
  if stop.nil?
33
33
  stop = start
34
34
  start = 0
@@ -47,5 +47,77 @@ module Polars
47
47
  result
48
48
  end
49
49
  alias_method :arange, :int_range
50
+
51
+ # Generate a range of integers for each row of the input columns.
52
+ #
53
+ # @param start [Integer, Expr, Series]
54
+ # Start of the range (inclusive). Defaults to 0.
55
+ # @param stop [Integer, Expr, Series]
56
+ # End of the range (exclusive). If set to `nil` (default),
57
+ # the value of `start` is used and `start` is set to `0`.
58
+ # @param step [Integer]
59
+ # Step size of the range.
60
+ # @param dtype [Object]
61
+ # Integer data type of the ranges. Defaults to `Int64`.
62
+ # @param eager [Boolean]
63
+ # Evaluate immediately and return a `Series`.
64
+ # If set to `false` (default), return an expression instead.
65
+ #
66
+ # @return [Expr, Series]
67
+ #
68
+ # @example
69
+ # df = Polars::DataFrame.new({"start" => [1, -1], "end" => [3, 2]})
70
+ # df.with_columns(int_range: Polars.int_ranges("start", "end"))
71
+ # # =>
72
+ # # shape: (2, 3)
73
+ # # ┌───────┬─────┬────────────┐
74
+ # # │ start ┆ end ┆ int_range │
75
+ # # │ --- ┆ --- ┆ --- │
76
+ # # │ i64 ┆ i64 ┆ list[i64] │
77
+ # # ╞═══════╪═════╪════════════╡
78
+ # # │ 1 ┆ 3 ┆ [1, 2] │
79
+ # # │ -1 ┆ 2 ┆ [-1, 0, 1] │
80
+ # # └───────┴─────┴────────────┘
81
+ #
82
+ # @example `end` can be omitted for a shorter syntax.
83
+ # df.select("end", int_range: Polars.int_ranges("end"))
84
+ # # =>
85
+ # # shape: (2, 2)
86
+ # # ┌─────┬───────────┐
87
+ # # │ end ┆ int_range │
88
+ # # │ --- ┆ --- │
89
+ # # │ i64 ┆ list[i64] │
90
+ # # ╞═════╪═══════════╡
91
+ # # │ 3 ┆ [0, 1, 2] │
92
+ # # │ 2 ┆ [0, 1] │
93
+ # # └─────┴───────────┘
94
+ def int_ranges(
95
+ start = 0,
96
+ stop = nil,
97
+ step: 1,
98
+ dtype: Int64,
99
+ eager: false
100
+ )
101
+ if stop.nil?
102
+ stop = start
103
+ start = 0
104
+ end
105
+
106
+ dtype_expr = Utils.parse_into_datatype_expr(dtype)
107
+ start_rbexpr = Utils.parse_into_expression(start)
108
+ end_rbexpr = Utils.parse_into_expression(stop)
109
+ step_rbexpr = Utils.parse_into_expression(step)
110
+ result = Utils.wrap_expr(
111
+ Plr.int_ranges(
112
+ start_rbexpr, end_rbexpr, step_rbexpr, dtype_expr._rbdatatype_expr
113
+ )
114
+ )
115
+
116
+ if eager
117
+ return F.select(result).to_series
118
+ end
119
+
120
+ result
121
+ end
50
122
  end
51
123
  end
@@ -0,0 +1,77 @@
1
+ module Polars
2
+ module Functions
3
+ # Generate a sequence of evenly-spaced values for each row between `start` and `end`.
4
+ #
5
+ # The number of values in each sequence is determined by `num_samples`.
6
+ #
7
+ # @param start [Object]
8
+ # Lower bound of the range.
9
+ # @param stop [Object]
10
+ # Upper bound of the range.
11
+ # @param num_samples [Integer]
12
+ # Number of samples in the output sequence.
13
+ # @param closed ['both', 'left', 'right', 'none']
14
+ # Define which sides of the interval are closed (inclusive).
15
+ # @param as_array [Boolean]
16
+ # Return result as a fixed-length `Array`. `num_samples` must be a constant.
17
+ # @param eager [Boolean]
18
+ # Evaluate immediately and return a `Series`.
19
+ # If set to `false` (default), return an expression instead.
20
+ #
21
+ # @return [Expr, Series]
22
+ #
23
+ # @note
24
+ # This functionality is experimental. It may be changed at any point without it
25
+ # being considered a breaking change.
26
+ #
27
+ # @example
28
+ # df = Polars::DataFrame.new({"start" => [1, -1], "end" => [3, 2], "num_samples" => [4, 5]})
29
+ # df.with_columns(ls: Polars.linear_spaces("start", "end", "num_samples"))
30
+ # # =>
31
+ # # shape: (2, 4)
32
+ # # ┌───────┬─────┬─────────────┬────────────────────────┐
33
+ # # │ start ┆ end ┆ num_samples ┆ ls │
34
+ # # │ --- ┆ --- ┆ --- ┆ --- │
35
+ # # │ i64 ┆ i64 ┆ i64 ┆ list[f64] │
36
+ # # ╞═══════╪═════╪═════════════╪════════════════════════╡
37
+ # # │ 1 ┆ 3 ┆ 4 ┆ [1.0, 1.666667, … 3.0] │
38
+ # # │ -1 ┆ 2 ┆ 5 ┆ [-1.0, -0.25, … 2.0] │
39
+ # # └───────┴─────┴─────────────┴────────────────────────┘
40
+ #
41
+ # @example
42
+ # df.with_columns(ls: Polars.linear_spaces("start", "end", 3, as_array: true))
43
+ # # =>
44
+ # # shape: (2, 4)
45
+ # # ┌───────┬─────┬─────────────┬──────────────────┐
46
+ # # │ start ┆ end ┆ num_samples ┆ ls │
47
+ # # │ --- ┆ --- ┆ --- ┆ --- │
48
+ # # │ i64 ┆ i64 ┆ i64 ┆ array[f64, 3] │
49
+ # # ╞═══════╪═════╪═════════════╪══════════════════╡
50
+ # # │ 1 ┆ 3 ┆ 4 ┆ [1.0, 2.0, 3.0] │
51
+ # # │ -1 ┆ 2 ┆ 5 ┆ [-1.0, 0.5, 2.0] │
52
+ # # └───────┴─────┴─────────────┴──────────────────┘
53
+ def linear_spaces(
54
+ start,
55
+ stop,
56
+ num_samples,
57
+ closed: "both",
58
+ as_array: false,
59
+ eager: false
60
+ )
61
+ start_rbexpr = Utils.parse_into_expression(start)
62
+ end_rbexpr = Utils.parse_into_expression(stop)
63
+ num_samples_rbexpr = Utils.parse_into_expression(num_samples)
64
+ result = Utils.wrap_expr(
65
+ Plr.linear_spaces(
66
+ start_rbexpr, end_rbexpr, num_samples_rbexpr, closed, as_array
67
+ )
68
+ )
69
+
70
+ if eager
71
+ return F.select(result).to_series
72
+ end
73
+
74
+ result
75
+ end
76
+ end
77
+ end
@@ -12,7 +12,7 @@ module Polars
12
12
  # Define which sides of the range are closed (inclusive).
13
13
  # @param eager [Boolean]
14
14
  # Evaluate immediately and return a `Series`.
15
- # If set to `False` (default), return an expression instead.
15
+ # If set to `false` (default), return an expression instead.
16
16
  #
17
17
  # @return [Object]
18
18
  #
@@ -12,8 +12,6 @@ module Polars
12
12
  # Int64 is required to fit the given value. Defaults to Float64 for float values.
13
13
  # @param eager [Boolean]
14
14
  # Run eagerly and collect into a `Series`.
15
- # @param name [String]
16
- # Only used in `eager` mode. As expression, use `alias`.
17
15
  #
18
16
  # @return [Object]
19
17
  #
@@ -38,20 +36,13 @@ module Polars
38
36
  # # 3
39
37
  # # 3
40
38
  # # ]
41
- def repeat(value, n, dtype: nil, eager: false, name: nil)
42
- if !name.nil?
43
- warn "the `name` argument is deprecated. Use the `alias` method instead."
44
- end
45
-
39
+ def repeat(value, n, dtype: nil, eager: false)
46
40
  if n.is_a?(Integer)
47
41
  n = lit(n)
48
42
  end
49
43
 
50
44
  value = Utils.parse_into_expression(value, str_as_lit: true)
51
45
  expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
52
- if !name.nil?
53
- expr = expr.alias(name)
54
- end
55
46
  if eager
56
47
  return select(expr).to_series
57
48
  end
@@ -82,7 +73,7 @@ module Polars
82
73
  # # 1
83
74
  # # 1
84
75
  # # ]
85
- def ones(n, dtype: nil, eager: true)
76
+ def ones(n, dtype: Float64, eager: false)
86
77
  if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
87
78
  msg = "invalid dtype for `ones`; found #{dtype}"
88
79
  raise TypeError, msg
@@ -115,7 +106,7 @@ module Polars
115
106
  # # 0
116
107
  # # 0
117
108
  # # ]
118
- def zeros(n, dtype: nil, eager: true)
109
+ def zeros(n, dtype: Float64, eager: false)
119
110
  if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
120
111
  msg = "invalid dtype for `zeros`; found #{dtype}"
121
112
  raise TypeError, msg
@@ -6,7 +6,7 @@ module Polars
6
6
  #
7
7
  # @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
8
8
  # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
9
- # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
9
+ # df.with_columns(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
10
10
  # # =>
11
11
  # # shape: (3, 3)
12
12
  # # ┌─────┬─────┬─────────┐
@@ -40,7 +40,7 @@ module Polars
40
40
  # # │ 4 ┆ 0 ┆ 1 │
41
41
  # # └─────┴─────┴─────┘
42
42
  #
43
- # @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
43
+ # @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to true, are set to `null`:
44
44
  # df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
45
45
  # # =>
46
46
  # # shape: (3, 3)
@@ -2,9 +2,10 @@ module Polars
2
2
  # Starts a new GroupBy operation.
3
3
  class GroupBy
4
4
  # @private
5
- def initialize(df, by, maintain_order: false)
5
+ def initialize(df, by, maintain_order:, **named_by)
6
6
  @df = df
7
7
  @by = by
8
+ @named_by = named_by
8
9
  @maintain_order = maintain_order
9
10
  end
10
11
 
@@ -39,9 +40,9 @@ module Polars
39
40
  groups_df =
40
41
  @df.lazy
41
42
  .with_row_index(name: temp_col)
42
- .group_by(@by, maintain_order: @maintain_order)
43
+ .group_by(@by, **@named_by, maintain_order: @maintain_order)
43
44
  .agg(Polars.col(temp_col))
44
- .collect(no_optimization: true)
45
+ .collect(optimizations: QueryOptFlags.none)
45
46
 
46
47
  group_names = groups_df.select(Polars.all.exclude(temp_col))
47
48
 
@@ -202,9 +203,9 @@ module Polars
202
203
  # # └─────┴───────┴────────────────┘
203
204
  def agg(*aggs, **named_aggs)
204
205
  @df.lazy
205
- .group_by(@by, maintain_order: @maintain_order)
206
+ .group_by(@by, **@named_by, maintain_order: @maintain_order)
206
207
  .agg(*aggs, **named_aggs)
207
- .collect(no_optimization: true)
208
+ .collect(optimizations: QueryOptFlags.none)
208
209
  end
209
210
 
210
211
  # Get the first `n` rows of each group.
@@ -253,9 +254,9 @@ module Polars
253
254
  # # └─────────┴─────┘
254
255
  def head(n = 5)
255
256
  @df.lazy
256
- .group_by(@by, maintain_order: @maintain_order)
257
+ .group_by(@by, **@named_by, maintain_order: @maintain_order)
257
258
  .head(n)
258
- .collect(no_optimization: true)
259
+ .collect(optimizations: QueryOptFlags._eager)
259
260
  end
260
261
 
261
262
  # Get the last `n` rows of each group.
@@ -304,9 +305,71 @@ module Polars
304
305
  # # └─────────┴─────┘
305
306
  def tail(n = 5)
306
307
  @df.lazy
307
- .group_by(@by, maintain_order: @maintain_order)
308
+ .group_by(@by, **@named_by, maintain_order: @maintain_order)
308
309
  .tail(n)
309
- .collect(no_optimization: true)
310
+ .collect(optimizations: QueryOptFlags.none)
311
+ end
312
+
313
+ # Aggregate the groups into Series.
314
+ #
315
+ # @return [DataFrame]
316
+ #
317
+ # @example
318
+ # df = Polars::DataFrame.new({"a" => ["one", "two", "one", "two"], "b" => [1, 2, 3, 4]})
319
+ # df.group_by("a", maintain_order: true).all
320
+ # # =>
321
+ # # shape: (2, 2)
322
+ # # ┌─────┬───────────┐
323
+ # # │ a ┆ b │
324
+ # # │ --- ┆ --- │
325
+ # # │ str ┆ list[i64] │
326
+ # # ╞═════╪═══════════╡
327
+ # # │ one ┆ [1, 3] │
328
+ # # │ two ┆ [2, 4] │
329
+ # # └─────┴───────────┘
330
+ def all
331
+ agg(F.all)
332
+ end
333
+
334
+ # Return the number of rows in each group.
335
+ #
336
+ # @param name [String]
337
+ # Assign a name to the resulting column; if unset, defaults to "len".
338
+ #
339
+ # @return [DataFrame]
340
+ #
341
+ # @example
342
+ # df = Polars::DataFrame.new({"a" => ["Apple", "Apple", "Orange"], "b" => [1, nil, 2]})
343
+ # df.group_by("a").len
344
+ # # =>
345
+ # # shape: (2, 2)
346
+ # # ┌────────┬─────┐
347
+ # # │ a ┆ len │
348
+ # # │ --- ┆ --- │
349
+ # # │ str ┆ u32 │
350
+ # # ╞════════╪═════╡
351
+ # # │ Apple ┆ 2 │
352
+ # # │ Orange ┆ 1 │
353
+ # # └────────┴─────┘
354
+ #
355
+ # @example
356
+ # df.group_by("a").len(name: "n")
357
+ # # =>
358
+ # # shape: (2, 2)
359
+ # # ┌────────┬─────┐
360
+ # # │ a ┆ n │
361
+ # # │ --- ┆ --- │
362
+ # # │ str ┆ u32 │
363
+ # # ╞════════╪═════╡
364
+ # # │ Apple ┆ 2 │
365
+ # # │ Orange ┆ 1 │
366
+ # # └────────┴─────┘
367
+ def len(name: nil)
368
+ len_expr = F.len
369
+ if !name.nil?
370
+ len_expr = len_expr.alias(name)
371
+ end
372
+ agg(len_expr)
310
373
  end
311
374
 
312
375
  # Aggregate the first values in the group.
@@ -598,16 +661,5 @@ module Polars
598
661
  def median
599
662
  agg(Polars.all.median)
600
663
  end
601
-
602
- # Plot data.
603
- #
604
- # @return [Vega::LiteChart]
605
- def plot(*args, **options)
606
- raise ArgumentError, "Multiple groups not supported" if @by.is_a?(::Array) && @by.size > 1
607
- # same message as Ruby
608
- raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
609
-
610
- @df.plot(*args, **options, group: @by)
611
- end
612
664
  end
613
665
  end
@@ -12,11 +12,6 @@ module Polars
12
12
  end
13
13
 
14
14
  def to_lazyframe
15
- # for iceberg < 0.1.3
16
- if !@source.respond_to?(:scan)
17
- return @source.to_polars(snapshot_id: @snapshot_id, storage_options: @storage_options)
18
- end
19
-
20
15
  scan = @source.scan(snapshot_id: @snapshot_id)
21
16
  files = scan.plan_files
22
17
 
@@ -66,7 +61,7 @@ module Polars
66
61
  scan_options = {
67
62
  storage_options: @storage_options,
68
63
  cast_options: Polars::ScanCastOptions._default_iceberg,
69
- allow_missing_columns: true,
64
+ missing_columns: "insert",
70
65
  extra_columns: "ignore",
71
66
  _column_mapping: column_mapping,
72
67
  _deletion_files: deletion_files
@@ -0,0 +1,37 @@
1
+ module Polars
2
+ # A placeholder for an in process query.
3
+ #
4
+ # This can be used to do something else while a query is running.
5
+ # The queries can be cancelled. You can peek if the query is finished,
6
+ # or you can await the result.
7
+ class InProcessQuery
8
+ # @private
9
+ attr_accessor :_inner
10
+
11
+ def initialize(ipq)
12
+ self._inner = ipq
13
+ end
14
+
15
+ # Cancel the query at earliest convenience.
16
+ def cancel
17
+ _inner.cancel
18
+ end
19
+
20
+ # Fetch the result.
21
+ #
22
+ # If it is ready, a materialized DataFrame is returned.
23
+ # If it is not ready it will return `nil`.
24
+ def fetch
25
+ if !(out = _inner.fetch).nil?
26
+ Utils.wrap_df(out)
27
+ else
28
+ nil
29
+ end
30
+ end
31
+
32
+ # Await the result synchronously.
33
+ def fetch_blocking
34
+ Utils.wrap_df(_inner.fetch_blocking)
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,18 @@
1
+ module Polars
2
+ module IO
3
+ private
4
+
5
+ def _init_credential_provider_builder(
6
+ credential_provider,
7
+ source,
8
+ storage_options,
9
+ caller_name
10
+ )
11
+ if credential_provider && credential_provider != "auto"
12
+ raise Todo
13
+ end
14
+
15
+ nil
16
+ end
17
+ end
18
+ end