polars-df 0.13.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,144 @@
1
+ module Polars
2
+ module Functions
3
+ # Repeat a single value n times.
4
+ #
5
+ # @param value [Object]
6
+ # Value to repeat.
7
+ # @param n [Integer]
8
+ # Repeat `n` times.
9
+ # @param eager [Boolean]
10
+ # Run eagerly and collect into a `Series`.
11
+ # @param name [String]
12
+ # Only used in `eager` mode. As expression, use `alias`.
13
+ #
14
+ # @return [Object]
15
+ #
16
+ # @example Construct a column with a repeated value in a lazy context.
17
+ # Polars.select(Polars.repeat("z", 3)).to_series
18
+ # # =>
19
+ # # shape: (3,)
20
+ # # Series: 'repeat' [str]
21
+ # # [
22
+ # # "z"
23
+ # # "z"
24
+ # # "z"
25
+ # # ]
26
+ #
27
+ # @example Generate a Series directly by setting `eager: true`.
28
+ # Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
29
+ # # =>
30
+ # # shape: (3,)
31
+ # # Series: 'repeat' [i8]
32
+ # # [
33
+ # # 3
34
+ # # 3
35
+ # # 3
36
+ # # ]
37
+ def repeat(value, n, dtype: nil, eager: false, name: nil)
38
+ if !name.nil?
39
+ warn "the `name` argument is deprecated. Use the `alias` method instead."
40
+ end
41
+
42
+ if n.is_a?(Integer)
43
+ n = lit(n)
44
+ end
45
+
46
+ value = Utils.parse_into_expression(value, str_as_lit: true)
47
+ expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
48
+ if !name.nil?
49
+ expr = expr.alias(name)
50
+ end
51
+ if eager
52
+ return select(expr).to_series
53
+ end
54
+ expr
55
+ end
56
+
57
+ # Construct a column of length `n` filled with ones.
58
+ #
59
+ # This is syntactic sugar for the `repeat` function.
60
+ #
61
+ # @param n [Integer]
62
+ # Length of the resulting column.
63
+ # @param dtype [Object]
64
+ # Data type of the resulting column. Defaults to Float64.
65
+ # @param eager [Boolean]
66
+ # Evaluate immediately and return a `Series`. If set to `false`,
67
+ # return an expression instead.
68
+ #
69
+ # @return [Object]
70
+ #
71
+ # @example
72
+ # Polars.ones(3, dtype: Polars::Int8, eager: true)
73
+ # # =>
74
+ # # shape: (3,)
75
+ # # Series: 'ones' [i8]
76
+ # # [
77
+ # # 1
78
+ # # 1
79
+ # # 1
80
+ # # ]
81
+ def ones(n, dtype: nil, eager: true)
82
+ if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
83
+ msg = "invalid dtype for `ones`; found #{dtype}"
84
+ raise TypeError, msg
85
+ end
86
+
87
+ repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
88
+ end
89
+
90
+ # Construct a column of length `n` filled with zeros.
91
+ #
92
+ # This is syntactic sugar for the `repeat` function.
93
+ #
94
+ # @param n [Integer]
95
+ # Length of the resulting column.
96
+ # @param dtype [Object]
97
+ # Data type of the resulting column. Defaults to Float64.
98
+ # @param eager [Boolean]
99
+ # Evaluate immediately and return a `Series`. If set to `false`,
100
+ # return an expression instead.
101
+ #
102
+ # @return [Object]
103
+ #
104
+ # @example
105
+ # Polars.zeros(3, dtype: Polars::Int8, eager: true)
106
+ # # =>
107
+ # # shape: (3,)
108
+ # # Series: 'zeros' [i8]
109
+ # # [
110
+ # # 0
111
+ # # 0
112
+ # # 0
113
+ # # ]
114
+ def zeros(n, dtype: nil, eager: true)
115
+ if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
116
+ msg = "invalid dtype for `zeros`; found #{dtype}"
117
+ raise TypeError, msg
118
+ end
119
+
120
+ repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
121
+ end
122
+
123
+ private
124
+
125
+ def _one_or_zero_by_dtype(value, dtype)
126
+ if dtype.integer?
127
+ value
128
+ elsif dtype.float?
129
+ value.to_f
130
+ elsif dtype == Boolean
131
+ value != 0
132
+ elsif dtype == Utf8
133
+ value.to_s
134
+ elsif dtype == Decimal
135
+ Decimal(value.to_s)
136
+ elsif [List, Array].include?(dtype)
137
+ arr_width = dtype.respond_to?(:width) ? dtype.width : 1
138
+ [_one_or_zero_by_dtype(value, dtype.inner)] * arr_width
139
+ else
140
+ nil
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,96 @@
1
+ module Polars
2
+ module Functions
3
+ # Start a "when, then, otherwise" expression.
4
+ #
5
+ # @return [When]
6
+ #
7
+ # @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
8
+ # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
9
+ # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
10
+ # # =>
11
+ # # shape: (3, 3)
12
+ # # ┌─────┬─────┬─────────┐
13
+ # # │ foo ┆ bar ┆ literal │
14
+ # # │ --- ┆ --- ┆ --- │
15
+ # # │ i64 ┆ i64 ┆ i32 │
16
+ # # ╞═════╪═════╪═════════╡
17
+ # # │ 1 ┆ 3 ┆ -1 │
18
+ # # │ 3 ┆ 4 ┆ 1 │
19
+ # # │ 4 ┆ 0 ┆ 1 │
20
+ # # └─────┴─────┴─────────┘
21
+ #
22
+ # @example Or with multiple when-then operations chained:
23
+ # df.with_columns(
24
+ # Polars.when(Polars.col("foo") > 2)
25
+ # .then(1)
26
+ # .when(Polars.col("bar") > 2)
27
+ # .then(4)
28
+ # .otherwise(-1)
29
+ # .alias("val")
30
+ # )
31
+ # # =>
32
+ # # shape: (3, 3)
33
+ # # ┌─────┬─────┬─────┐
34
+ # # │ foo ┆ bar ┆ val │
35
+ # # │ --- ┆ --- ┆ --- │
36
+ # # │ i64 ┆ i64 ┆ i32 │
37
+ # # ╞═════╪═════╪═════╡
38
+ # # │ 1 ┆ 3 ┆ 4 │
39
+ # # │ 3 ┆ 4 ┆ 1 │
40
+ # # │ 4 ┆ 0 ┆ 1 │
41
+ # # └─────┴─────┴─────┘
42
+ #
43
+ # @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
44
+ # df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
45
+ # # =>
46
+ # # shape: (3, 3)
47
+ # # ┌─────┬─────┬──────┐
48
+ # # │ foo ┆ bar ┆ val │
49
+ # # │ --- ┆ --- ┆ --- │
50
+ # # │ i64 ┆ i64 ┆ i32 │
51
+ # # ╞═════╪═════╪══════╡
52
+ # # │ 1 ┆ 3 ┆ null │
53
+ # # │ 3 ┆ 4 ┆ 1 │
54
+ # # │ 4 ┆ 0 ┆ 1 │
55
+ # # └─────┴─────┴──────┘
56
+ #
57
+ # @example Pass multiple predicates, each of which must be met:
58
+ # df.with_columns(
59
+ # val: Polars.when(
60
+ # Polars.col("bar") > 0,
61
+ # Polars.col("foo") % 2 != 0
62
+ # )
63
+ # .then(99)
64
+ # .otherwise(-1)
65
+ # )
66
+ # # =>
67
+ # # shape: (3, 3)
68
+ # # ┌─────┬─────┬─────┐
69
+ # # │ foo ┆ bar ┆ val │
70
+ # # │ --- ┆ --- ┆ --- │
71
+ # # │ i64 ┆ i64 ┆ i32 │
72
+ # # ╞═════╪═════╪═════╡
73
+ # # │ 1 ┆ 3 ┆ 99 │
74
+ # # │ 3 ┆ 4 ┆ 99 │
75
+ # # │ 4 ┆ 0 ┆ -1 │
76
+ # # └─────┴─────┴─────┘
77
+ #
78
+ # @example Pass conditions as keyword arguments:
79
+ # df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
80
+ # # =>
81
+ # # shape: (3, 3)
82
+ # # ┌─────┬─────┬─────┐
83
+ # # │ foo ┆ bar ┆ val │
84
+ # # │ --- ┆ --- ┆ --- │
85
+ # # │ i64 ┆ i64 ┆ i32 │
86
+ # # ╞═════╪═════╪═════╡
87
+ # # │ 1 ┆ 3 ┆ -1 │
88
+ # # │ 3 ┆ 4 ┆ -1 │
89
+ # # │ 4 ┆ 0 ┆ 99 │
90
+ # # └─────┴─────┴─────┘
91
+ def when(*predicates, **constraints)
92
+ condition = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
93
+ When.new(Plr.when(condition))
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,57 @@
1
+ module Polars
2
+ module Functions
3
+ # Convert categorical variables into dummy/indicator variables.
4
+ #
5
+ # @param df [DataFrame]
6
+ # DataFrame to convert.
7
+ # @param columns [Array, nil]
8
+ # A subset of columns to convert to dummy variables. `nil` means
9
+ # "all columns".
10
+ #
11
+ # @return [DataFrame]
12
+ def get_dummies(df, columns: nil)
13
+ df.to_dummies(columns: columns)
14
+ end
15
+
16
+ # Aggregate to list.
17
+ #
18
+ # @return [Expr]
19
+ def to_list(name)
20
+ col(name).list
21
+ end
22
+
23
+ # Compute the spearman rank correlation between two columns.
24
+ #
25
+ # Missing data will be excluded from the computation.
26
+ #
27
+ # @param a [Object]
28
+ # Column name or Expression.
29
+ # @param b [Object]
30
+ # Column name or Expression.
31
+ # @param ddof [Integer]
32
+ # Delta degrees of freedom
33
+ # @param propagate_nans [Boolean]
34
+ # If `True` any `NaN` encountered will lead to `NaN` in the output.
35
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
36
+ # and thus lead to the highest rank.
37
+ #
38
+ # @return [Expr]
39
+ def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
40
+ corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
41
+ end
42
+
43
+ # Compute the pearson's correlation between two columns.
44
+ #
45
+ # @param a [Object]
46
+ # Column name or Expression.
47
+ # @param b [Object]
48
+ # Column name or Expression.
49
+ # @param ddof [Integer]
50
+ # Delta degrees of freedom
51
+ #
52
+ # @return [Expr]
53
+ def pearson_corr(a, b, ddof: 1)
54
+ corr(a, b, method: "pearson", ddof: ddof)
55
+ end
56
+ end
57
+ end