polars-df 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +272 -191
  4. data/Cargo.toml +0 -1
  5. data/README.md +2 -2
  6. data/ext/polars/Cargo.toml +8 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +7 -5
  11. data/ext/polars/src/conversion.rs +106 -4
  12. data/ext/polars/src/dataframe.rs +19 -17
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/expr/general.rs +933 -0
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +205 -303
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +99 -539
  38. data/lib/polars/convert.rb +2 -2
  39. data/lib/polars/data_frame.rb +201 -50
  40. data/lib/polars/data_types.rb +6 -4
  41. data/lib/polars/date_time_expr.rb +142 -2
  42. data/lib/polars/expr.rb +70 -10
  43. data/lib/polars/lazy_frame.rb +4 -3
  44. data/lib/polars/lazy_functions.rb +4 -1
  45. data/lib/polars/list_expr.rb +68 -19
  46. data/lib/polars/series.rb +181 -73
  47. data/lib/polars/string_expr.rb +149 -43
  48. data/lib/polars/string_name_space.rb +4 -4
  49. data/lib/polars/struct_name_space.rb +32 -0
  50. data/lib/polars/utils.rb +41 -7
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +2 -2
  53. metadata +26 -11
  54. data/ext/polars/src/lazy/dsl.rs +0 -1775
  55. data/ext/polars/src/lazy/mod.rs +0 -5
  56. data/ext/polars/src/lazy/utils.rs +0 -13
  57. data/ext/polars/src/list_construction.rs +0 -100
  58. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  59. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -60,5 +60,37 @@ module Polars
60
60
  def rename_fields(names)
61
61
  super
62
62
  end
63
+
64
+ # Get the struct definition as a name/dtype schema dict.
65
+ #
66
+ # @return [Object]
67
+ def schema
68
+ if _s.nil?
69
+ {}
70
+ else
71
+ _s.dtype.to_schema
72
+ end
73
+ end
74
+
75
+ # Convert this struct Series to a DataFrame with a separate column for each field.
76
+ #
77
+ # @return [DataFrame]
78
+ #
79
+ # @example
80
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
81
+ # s.struct.unnest
82
+ # # =>
83
+ # # shape: (2, 2)
84
+ # # ┌─────┬─────┐
85
+ # # │ a ┆ b │
86
+ # # │ --- ┆ --- │
87
+ # # │ i64 ┆ i64 │
88
+ # # ╞═════╪═════╡
89
+ # # │ 1 ┆ 2 │
90
+ # # │ 3 ┆ 4 │
91
+ # # └─────┴─────┘
92
+ def unnest
93
+ Utils.wrap_df(_s.struct_unnest)
94
+ end
63
95
  end
64
96
  end
data/lib/polars/utils.rb CHANGED
@@ -23,24 +23,42 @@ module Polars
23
23
  Polars.col(name)
24
24
  end
25
25
 
26
+ def self.arrlen(obj)
27
+ if obj.is_a?(Range)
28
+ # size only works for numeric ranges
29
+ obj.to_a.length
30
+ elsif obj.is_a?(String)
31
+ nil
32
+ else
33
+ obj.length
34
+ end
35
+ rescue
36
+ nil
37
+ end
38
+
26
39
  def self._timedelta_to_pl_duration(td)
27
40
  td
28
41
  end
29
42
 
30
43
  def self._datetime_to_pl_timestamp(dt, tu)
31
44
  if tu == "ns"
32
- (dt.to_datetime.utc.to_f * 1e9).to_i
45
+ (dt.to_datetime.to_time.to_f * 1e9).to_i
33
46
  elsif tu == "us"
34
- (dt.to_datetime.utc.to_f * 1e6).to_i
47
+ (dt.to_datetime.to_time.to_f * 1e6).to_i
35
48
  elsif tu == "ms"
36
- (dt.to_datetime.utc.to_f * 1e3).to_i
49
+ (dt.to_datetime.to_time.to_f * 1e3).to_i
37
50
  elsif tu.nil?
38
- (dt.to_datetime.utc.to_f * 1e6).to_i
51
+ (dt.to_datetime.to_time.to_f * 1e6).to_i
39
52
  else
40
53
  raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
41
54
  end
42
55
  end
43
56
 
57
+ def self._date_to_pl_date(d)
58
+ dt = d.to_datetime.to_time
59
+ dt.to_i / (3600 * 24)
60
+ end
61
+
44
62
  def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
63
  if dtype == :date || dtype == Date
46
64
  # days to seconds
@@ -69,6 +87,18 @@ module Polars
69
87
  end
70
88
  end
71
89
 
90
+ def self._to_ruby_duration(value, tu = "ns")
91
+ if tu == "ns"
92
+ value / 1e9
93
+ elsif tu == "us"
94
+ value / 1e6
95
+ elsif tu == "ms"
96
+ value / 1e3
97
+ else
98
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
99
+ end
100
+ end
101
+
72
102
  def self.selection_to_rbexpr_list(exprs)
73
103
  if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
104
  exprs = [exprs]
@@ -102,7 +132,10 @@ module Polars
102
132
  end
103
133
 
104
134
  # TODO fix
105
- def self.is_polars_dtype(data_type)
135
+ def self.is_polars_dtype(data_type, include_unknown: false)
136
+ if data_type == Unknown
137
+ return include_unknown
138
+ end
106
139
  data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
107
140
  end
108
141
 
@@ -113,7 +146,8 @@ module Polars
113
146
  TrueClass => :bool,
114
147
  FalseClass => :bool,
115
148
  ::Date => :date,
116
- ::DateTime => :datetime
149
+ ::DateTime => :datetime,
150
+ ::Time => :datetime
117
151
  }
118
152
 
119
153
  # TODO fix
@@ -178,7 +212,7 @@ module Polars
178
212
  end
179
213
 
180
214
  def self.bool?(value)
181
- value == true || value == false
215
+ value.is_a?(TrueClass) || value.is_a?(FalseClass)
182
216
  end
183
217
 
184
218
  def self.strlike?(value)
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.4.0"
3
+ VERSION = "0.5.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # ext
2
2
  begin
3
- require_relative "polars/#{RUBY_VERSION.to_f}/polars"
3
+ require "polars/#{RUBY_VERSION.to_f}/polars"
4
4
  rescue LoadError
5
- require_relative "polars/polars"
5
+ require "polars/polars"
6
6
  end
7
7
 
8
8
  # stdlib
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-01 00:00:00.000000000 Z
11
+ date: 2023-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -40,27 +40,42 @@ files:
40
40
  - ext/polars/Cargo.toml
41
41
  - ext/polars/extconf.rb
42
42
  - ext/polars/src/apply/dataframe.rs
43
+ - ext/polars/src/apply/lazy.rs
43
44
  - ext/polars/src/apply/mod.rs
44
45
  - ext/polars/src/apply/series.rs
45
46
  - ext/polars/src/batched_csv.rs
46
47
  - ext/polars/src/conversion.rs
47
48
  - ext/polars/src/dataframe.rs
48
49
  - ext/polars/src/error.rs
50
+ - ext/polars/src/expr.rs
51
+ - ext/polars/src/expr/binary.rs
52
+ - ext/polars/src/expr/categorical.rs
53
+ - ext/polars/src/expr/datetime.rs
54
+ - ext/polars/src/expr/general.rs
55
+ - ext/polars/src/expr/list.rs
56
+ - ext/polars/src/expr/meta.rs
57
+ - ext/polars/src/expr/string.rs
58
+ - ext/polars/src/expr/struct.rs
49
59
  - ext/polars/src/file.rs
50
- - ext/polars/src/lazy/apply.rs
51
- - ext/polars/src/lazy/dataframe.rs
52
- - ext/polars/src/lazy/dsl.rs
53
- - ext/polars/src/lazy/meta.rs
54
- - ext/polars/src/lazy/mod.rs
55
- - ext/polars/src/lazy/utils.rs
60
+ - ext/polars/src/functions/eager.rs
61
+ - ext/polars/src/functions/io.rs
62
+ - ext/polars/src/functions/lazy.rs
63
+ - ext/polars/src/functions/meta.rs
64
+ - ext/polars/src/functions/mod.rs
65
+ - ext/polars/src/functions/whenthen.rs
66
+ - ext/polars/src/lazyframe.rs
67
+ - ext/polars/src/lazygroupby.rs
56
68
  - ext/polars/src/lib.rs
57
- - ext/polars/src/list_construction.rs
58
- - ext/polars/src/numo.rs
59
69
  - ext/polars/src/object.rs
60
70
  - ext/polars/src/prelude.rs
61
71
  - ext/polars/src/rb_modules.rs
62
72
  - ext/polars/src/series.rs
63
- - ext/polars/src/set.rs
73
+ - ext/polars/src/series/aggregation.rs
74
+ - ext/polars/src/series/arithmetic.rs
75
+ - ext/polars/src/series/comparison.rs
76
+ - ext/polars/src/series/construction.rs
77
+ - ext/polars/src/series/export.rs
78
+ - ext/polars/src/series/set_at_idx.rs
64
79
  - ext/polars/src/utils.rs
65
80
  - lib/polars-df.rb
66
81
  - lib/polars.rb