polars-df 0.13.0-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,144 @@
1
+ module Polars
2
+ module Functions
3
+ # Repeat a single value n times.
4
+ #
5
+ # @param value [Object]
6
+ # Value to repeat.
7
+ # @param n [Integer]
8
+ # Repeat `n` times.
9
+ # @param eager [Boolean]
10
+ # Run eagerly and collect into a `Series`.
11
+ # @param name [String]
12
+ # Only used in `eager` mode. As expression, use `alias`.
13
+ #
14
+ # @return [Object]
15
+ #
16
+ # @example Construct a column with a repeated value in a lazy context.
17
+ # Polars.select(Polars.repeat("z", 3)).to_series
18
+ # # =>
19
+ # # shape: (3,)
20
+ # # Series: 'repeat' [str]
21
+ # # [
22
+ # # "z"
23
+ # # "z"
24
+ # # "z"
25
+ # # ]
26
+ #
27
+ # @example Generate a Series directly by setting `eager: true`.
28
+ # Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
29
+ # # =>
30
+ # # shape: (3,)
31
+ # # Series: 'repeat' [i8]
32
+ # # [
33
+ # # 3
34
+ # # 3
35
+ # # 3
36
+ # # ]
37
+ def repeat(value, n, dtype: nil, eager: false, name: nil)
38
+ if !name.nil?
39
+ warn "the `name` argument is deprecated. Use the `alias` method instead."
40
+ end
41
+
42
+ if n.is_a?(Integer)
43
+ n = lit(n)
44
+ end
45
+
46
+ value = Utils.parse_into_expression(value, str_as_lit: true)
47
+ expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
48
+ if !name.nil?
49
+ expr = expr.alias(name)
50
+ end
51
+ if eager
52
+ return select(expr).to_series
53
+ end
54
+ expr
55
+ end
56
+
57
+ # Construct a column of length `n` filled with ones.
58
+ #
59
+ # This is syntactic sugar for the `repeat` function.
60
+ #
61
+ # @param n [Integer]
62
+ # Length of the resulting column.
63
+ # @param dtype [Object]
64
+ # Data type of the resulting column. Defaults to Float64.
65
+ # @param eager [Boolean]
66
+ # Evaluate immediately and return a `Series`. If set to `false`,
67
+ # return an expression instead.
68
+ #
69
+ # @return [Object]
70
+ #
71
+ # @example
72
+ # Polars.ones(3, dtype: Polars::Int8, eager: true)
73
+ # # =>
74
+ # # shape: (3,)
75
+ # # Series: 'ones' [i8]
76
+ # # [
77
+ # # 1
78
+ # # 1
79
+ # # 1
80
+ # # ]
81
+ def ones(n, dtype: nil, eager: true)
82
+ if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
83
+ msg = "invalid dtype for `ones`; found #{dtype}"
84
+ raise TypeError, msg
85
+ end
86
+
87
+ repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
88
+ end
89
+
90
+ # Construct a column of length `n` filled with zeros.
91
+ #
92
+ # This is syntactic sugar for the `repeat` function.
93
+ #
94
+ # @param n [Integer]
95
+ # Length of the resulting column.
96
+ # @param dtype [Object]
97
+ # Data type of the resulting column. Defaults to Float64.
98
+ # @param eager [Boolean]
99
+ # Evaluate immediately and return a `Series`. If set to `false`,
100
+ # return an expression instead.
101
+ #
102
+ # @return [Object]
103
+ #
104
+ # @example
105
+ # Polars.zeros(3, dtype: Polars::Int8, eager: true)
106
+ # # =>
107
+ # # shape: (3,)
108
+ # # Series: 'zeros' [i8]
109
+ # # [
110
+ # # 0
111
+ # # 0
112
+ # # 0
113
+ # # ]
114
+ def zeros(n, dtype: nil, eager: true)
115
+ if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
116
+ msg = "invalid dtype for `zeros`; found #{dtype}"
117
+ raise TypeError, msg
118
+ end
119
+
120
+ repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
121
+ end
122
+
123
+ private
124
+
125
+ def _one_or_zero_by_dtype(value, dtype)
126
+ if dtype.integer?
127
+ value
128
+ elsif dtype.float?
129
+ value.to_f
130
+ elsif dtype == Boolean
131
+ value != 0
132
+ elsif dtype == Utf8
133
+ value.to_s
134
+ elsif dtype == Decimal
135
+ Decimal(value.to_s)
136
+ elsif [List, Array].include?(dtype)
137
+ arr_width = dtype.respond_to?(:width) ? dtype.width : 1
138
+ [_one_or_zero_by_dtype(value, dtype.inner)] * arr_width
139
+ else
140
+ nil
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,96 @@
1
+ module Polars
2
+ module Functions
3
+ # Start a "when, then, otherwise" expression.
4
+ #
5
+ # @return [When]
6
+ #
7
+ # @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
8
+ # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
9
+ # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
10
+ # # =>
11
+ # # shape: (3, 3)
12
+ # # ┌─────┬─────┬─────────┐
13
+ # # │ foo ┆ bar ┆ literal │
14
+ # # │ --- ┆ --- ┆ --- │
15
+ # # │ i64 ┆ i64 ┆ i32 │
16
+ # # ╞═════╪═════╪═════════╡
17
+ # # │ 1 ┆ 3 ┆ -1 │
18
+ # # │ 3 ┆ 4 ┆ 1 │
19
+ # # │ 4 ┆ 0 ┆ 1 │
20
+ # # └─────┴─────┴─────────┘
21
+ #
22
+ # @example Or with multiple when-then operations chained:
23
+ # df.with_columns(
24
+ # Polars.when(Polars.col("foo") > 2)
25
+ # .then(1)
26
+ # .when(Polars.col("bar") > 2)
27
+ # .then(4)
28
+ # .otherwise(-1)
29
+ # .alias("val")
30
+ # )
31
+ # # =>
32
+ # # shape: (3, 3)
33
+ # # ┌─────┬─────┬─────┐
34
+ # # │ foo ┆ bar ┆ val │
35
+ # # │ --- ┆ --- ┆ --- │
36
+ # # │ i64 ┆ i64 ┆ i32 │
37
+ # # ╞═════╪═════╪═════╡
38
+ # # │ 1 ┆ 3 ┆ 4 │
39
+ # # │ 3 ┆ 4 ┆ 1 │
40
+ # # │ 4 ┆ 0 ┆ 1 │
41
+ # # └─────┴─────┴─────┘
42
+ #
43
+ # @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
44
+ # df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
45
+ # # =>
46
+ # # shape: (3, 3)
47
+ # # ┌─────┬─────┬──────┐
48
+ # # │ foo ┆ bar ┆ val │
49
+ # # │ --- ┆ --- ┆ --- │
50
+ # # │ i64 ┆ i64 ┆ i32 │
51
+ # # ╞═════╪═════╪══════╡
52
+ # # │ 1 ┆ 3 ┆ null │
53
+ # # │ 3 ┆ 4 ┆ 1 │
54
+ # # │ 4 ┆ 0 ┆ 1 │
55
+ # # └─────┴─────┴──────┘
56
+ #
57
+ # @example Pass multiple predicates, each of which must be met:
58
+ # df.with_columns(
59
+ # val: Polars.when(
60
+ # Polars.col("bar") > 0,
61
+ # Polars.col("foo") % 2 != 0
62
+ # )
63
+ # .then(99)
64
+ # .otherwise(-1)
65
+ # )
66
+ # # =>
67
+ # # shape: (3, 3)
68
+ # # ┌─────┬─────┬─────┐
69
+ # # │ foo ┆ bar ┆ val │
70
+ # # │ --- ┆ --- ┆ --- │
71
+ # # │ i64 ┆ i64 ┆ i32 │
72
+ # # ╞═════╪═════╪═════╡
73
+ # # │ 1 ┆ 3 ┆ 99 │
74
+ # # │ 3 ┆ 4 ┆ 99 │
75
+ # # │ 4 ┆ 0 ┆ -1 │
76
+ # # └─────┴─────┴─────┘
77
+ #
78
+ # @example Pass conditions as keyword arguments:
79
+ # df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
80
+ # # =>
81
+ # # shape: (3, 3)
82
+ # # ┌─────┬─────┬─────┐
83
+ # # │ foo ┆ bar ┆ val │
84
+ # # │ --- ┆ --- ┆ --- │
85
+ # # │ i64 ┆ i64 ┆ i32 │
86
+ # # ╞═════╪═════╪═════╡
87
+ # # │ 1 ┆ 3 ┆ -1 │
88
+ # # │ 3 ┆ 4 ┆ -1 │
89
+ # # │ 4 ┆ 0 ┆ 99 │
90
+ # # └─────┴─────┴─────┘
91
+ def when(*predicates, **constraints)
92
+ condition = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
93
+ When.new(Plr.when(condition))
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,57 @@
1
+ module Polars
2
+ module Functions
3
+ # Convert categorical variables into dummy/indicator variables.
4
+ #
5
+ # @param df [DataFrame]
6
+ # DataFrame to convert.
7
+ # @param columns [Array, nil]
8
+ # A subset of columns to convert to dummy variables. `nil` means
9
+ # "all columns".
10
+ #
11
+ # @return [DataFrame]
12
+ def get_dummies(df, columns: nil)
13
+ df.to_dummies(columns: columns)
14
+ end
15
+
16
+ # Aggregate to list.
17
+ #
18
+ # @return [Expr]
19
+ def to_list(name)
20
+ col(name).list
21
+ end
22
+
23
+ # Compute the spearman rank correlation between two columns.
24
+ #
25
+ # Missing data will be excluded from the computation.
26
+ #
27
+ # @param a [Object]
28
+ # Column name or Expression.
29
+ # @param b [Object]
30
+ # Column name or Expression.
31
+ # @param ddof [Integer]
32
+ # Delta degrees of freedom
33
+ # @param propagate_nans [Boolean]
34
+ # If `True` any `NaN` encountered will lead to `NaN` in the output.
35
+ # Defaults to `False` where `NaN` are regarded as larger than any finite number
36
+ # and thus lead to the highest rank.
37
+ #
38
+ # @return [Expr]
39
+ def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
40
+ corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
41
+ end
42
+
43
+ # Compute the pearson's correlation between two columns.
44
+ #
45
+ # @param a [Object]
46
+ # Column name or Expression.
47
+ # @param b [Object]
48
+ # Column name or Expression.
49
+ # @param ddof [Integer]
50
+ # Delta degrees of freedom
51
+ #
52
+ # @return [Expr]
53
+ def pearson_corr(a, b, ddof: 1)
54
+ corr(a, b, method: "pearson", ddof: ddof)
55
+ end
56
+ end
57
+ end