polars-df 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
@@ -1,11 +1,17 @@
1
1
  module Polars
2
+ # Namespace for struct related expressions.
2
3
  class StructExpr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def initialize(expr)
6
9
  self._rbexpr = expr._rbexpr
7
10
  end
8
11
 
12
+ # Retrieve one of the fields of this `Struct` as a new Series.
13
+ #
14
+ # @return [Expr]
9
15
  def [](item)
10
16
  if item.is_a?(String)
11
17
  field(item)
@@ -16,10 +22,77 @@ module Polars
16
22
  end
17
23
  end
18
24
 
25
+ # Retrieve one of the fields of this `Struct` as a new Series.
26
+ #
27
+ # @param name [String]
28
+ # Name of the field
29
+ #
30
+ # @return [Expr]
31
+ #
32
+ # @example
33
+ # df = (
34
+ # Polars::DataFrame.new(
35
+ # {
36
+ # "int" => [1, 2],
37
+ # "str" => ["a", "b"],
38
+ # "bool" => [true, nil],
39
+ # "list" => [[1, 2], [3]]
40
+ # }
41
+ # )
42
+ # .to_struct("my_struct")
43
+ # .to_frame
44
+ # )
45
+ # df.select(Polars.col("my_struct").struct.field("str"))
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ str │
50
+ # # │ --- │
51
+ # # │ str │
52
+ # # ╞═════╡
53
+ # # │ a │
54
+ # # ├╌╌╌╌╌┤
55
+ # # │ b │
56
+ # # └─────┘
19
57
  def field(name)
20
58
  Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
21
59
  end
22
60
 
61
+ # Rename the fields of the struct.
62
+ #
63
+ # @param names [Array]
64
+ # New names in the order of the struct's fields
65
+ #
66
+ # @return [Expr]
67
+ #
68
+ # @example
69
+ # df = (
70
+ # Polars::DataFrame.new(
71
+ # {
72
+ # "int" => [1, 2],
73
+ # "str" => ["a", "b"],
74
+ # "bool" => [true, nil],
75
+ # "list" => [[1, 2], [3]]
76
+ # }
77
+ # )
78
+ # .to_struct("my_struct")
79
+ # .to_frame
80
+ # )
81
+ # df = df.with_column(
82
+ # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
83
+ # )
84
+ # df.select(Polars.col("my_struct").struct.field("INT"))
85
+ # # =>
86
+ # # shape: (2, 1)
87
+ # # ┌─────┐
88
+ # # │ INT │
89
+ # # │ --- │
90
+ # # │ i64 │
91
+ # # ╞═════╡
92
+ # # │ 1 │
93
+ # # ├╌╌╌╌╌┤
94
+ # # │ 2 │
95
+ # # └─────┘
23
96
  def rename_fields(names)
24
97
  Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
25
98
  end
@@ -0,0 +1,64 @@
1
+ module Polars
2
+ # Series.struct namespace.
3
+ class StructNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "struct"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Retrieve one of the fields of this `Struct` as a new Series.
14
+ #
15
+ # @return [Series]
16
+ def [](item)
17
+ if item.is_a?(Integer)
18
+ field(fields[item])
19
+ elsif item.is_a?(String)
20
+ field(item)
21
+ else
22
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
23
+ end
24
+ end
25
+
26
+ # Convert this Struct Series to a DataFrame.
27
+ #
28
+ # @return [DataFrame]
29
+ def to_frame
30
+ Utils.wrap_df(_s.struct_to_frame)
31
+ end
32
+
33
+ # Get the names of the fields.
34
+ #
35
+ # @return [Array]
36
+ def fields
37
+ if _s.nil?
38
+ []
39
+ else
40
+ _s.struct_fields
41
+ end
42
+ end
43
+
44
+ # Retrieve one of the fields of this `Struct` as a new Series.
45
+ #
46
+ # @param name [String]
47
+ # Name of the field
48
+ #
49
+ # @return [Series]
50
+ def field(name)
51
+ super
52
+ end
53
+
54
+ # Rename the fields of the struct.
55
+ #
56
+ # @param names [Array]
57
+ # New names in the order of the struct's fields
58
+ #
59
+ # @return [Series]
60
+ def rename_fields(names)
61
+ super
62
+ end
63
+ end
64
+ end
data/lib/polars/utils.rb CHANGED
@@ -19,6 +19,52 @@ module Polars
19
19
  Polars.col(name)
20
20
  end
21
21
 
22
+ def self._timedelta_to_pl_duration(td)
23
+ td
24
+ end
25
+
26
+ def self._datetime_to_pl_timestamp(dt, tu)
27
+ if tu == "ns"
28
+ (dt.to_datetime.utc.to_f * 1e9).to_i
29
+ elsif tu == "us"
30
+ (dt.to_datetime.utc.to_f * 1e6).to_i
31
+ elsif tu == "ms"
32
+ (dt.to_datetime.utc.to_f * 1e3).to_i
33
+ elsif tu.nil?
34
+ (dt.to_datetime.utc.to_f * 1e6).to_i
35
+ else
36
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
37
+ end
38
+ end
39
+
40
+ def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
41
+ if dtype == :date
42
+ # days to seconds
43
+ # important to create from utc. Not doing this leads
44
+ # to inconsistencies dependent on the timezone you are in.
45
+ Time.at(value * 86400).utc.to_date
46
+ # TODO fix dtype
47
+ elsif dtype.to_s.start_with?("datetime[")
48
+ if tz.nil? || tz == ""
49
+ if tu == "ns"
50
+ raise Todo
51
+ elsif tu == "us"
52
+ dt = Time.at(value / 1000000, value % 1000000, :usec).utc
53
+ elsif tu == "ms"
54
+ raise Todo
55
+ else
56
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
57
+ end
58
+ else
59
+ raise Todo
60
+ end
61
+
62
+ dt
63
+ else
64
+ raise NotImplementedError
65
+ end
66
+ end
67
+
22
68
  def self.selection_to_rbexpr_list(exprs)
23
69
  if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
24
70
  exprs = [exprs]
@@ -49,12 +95,30 @@ module Polars
49
95
 
50
96
  # TODO fix
51
97
  def self.is_polars_dtype(data_type)
52
- true
98
+ data_type.is_a?(Symbol) || data_type.is_a?(String)
53
99
  end
54
100
 
101
+ RB_TYPE_TO_DTYPE = {
102
+ Float => :f64,
103
+ Integer => :i64,
104
+ String => :str,
105
+ TrueClass => :bool,
106
+ FalseClass => :bool,
107
+ Date => :date,
108
+ DateTime => :datetime
109
+ }
110
+
55
111
  # TODO fix
56
- def self.rb_type_to_dtype(dtype)
57
- dtype.to_s
112
+ def self.rb_type_to_dtype(data_type)
113
+ if is_polars_dtype(data_type)
114
+ return data_type.to_s
115
+ end
116
+
117
+ begin
118
+ RB_TYPE_TO_DTYPE.fetch(data_type).to_s
119
+ rescue KeyError
120
+ raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
121
+ end
58
122
  end
59
123
 
60
124
  def self._process_null_values(null_values)
@@ -103,5 +167,9 @@ module Polars
103
167
  sz
104
168
  end
105
169
  end
170
+
171
+ def self.bool?(value)
172
+ value == true || value == false
173
+ end
106
174
  end
107
175
  end
@@ -1,3 +1,4 @@
1
1
  module Polars
2
- VERSION = "0.1.2"
2
+ # @private
3
+ VERSION = "0.1.4"
3
4
  end
data/lib/polars/when.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class When
3
4
  attr_accessor :_rbwhen
4
5
 
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class WhenThen
3
4
  attr_accessor :_rbwhenthen
4
5
 
data/lib/polars.rb CHANGED
@@ -1,38 +1,40 @@
1
1
  # ext
2
2
  require "polars/polars"
3
3
 
4
+ # stdlib
5
+ require "date"
6
+
4
7
  # modules
8
+ require "polars/expr_dispatch"
5
9
  require "polars/batched_csv_reader"
6
10
  require "polars/cat_expr"
11
+ require "polars/cat_name_space"
7
12
  require "polars/data_frame"
8
13
  require "polars/date_time_expr"
14
+ require "polars/date_time_name_space"
15
+ require "polars/exceptions"
9
16
  require "polars/expr"
10
17
  require "polars/functions"
18
+ require "polars/group_by"
11
19
  require "polars/io"
12
20
  require "polars/lazy_frame"
13
21
  require "polars/lazy_functions"
14
22
  require "polars/lazy_group_by"
15
23
  require "polars/list_expr"
24
+ require "polars/list_name_space"
16
25
  require "polars/meta_expr"
17
26
  require "polars/series"
27
+ require "polars/slice"
18
28
  require "polars/string_expr"
29
+ require "polars/string_name_space"
19
30
  require "polars/struct_expr"
31
+ require "polars/struct_name_space"
20
32
  require "polars/utils"
21
33
  require "polars/version"
22
34
  require "polars/when"
23
35
  require "polars/when_then"
24
36
 
25
37
  module Polars
26
- # @private
27
- class Error < StandardError; end
28
-
29
- # @private
30
- class Todo < Error
31
- def message
32
- "not implemented yet"
33
- end
34
- end
35
-
36
38
  extend Functions
37
39
  extend IO
38
40
  extend LazyFunctions
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-26 00:00:00.000000000 Z
11
+ date: 2022-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -31,6 +31,7 @@ extensions:
31
31
  - ext/polars/extconf.rb
32
32
  extra_rdoc_files: []
33
33
  files:
34
+ - ".yardopts"
34
35
  - CHANGELOG.md
35
36
  - Cargo.lock
36
37
  - Cargo.toml
@@ -50,24 +51,36 @@ files:
50
51
  - ext/polars/src/lazy/mod.rs
51
52
  - ext/polars/src/lazy/utils.rs
52
53
  - ext/polars/src/lib.rs
54
+ - ext/polars/src/list_construction.rs
53
55
  - ext/polars/src/series.rs
56
+ - ext/polars/src/set.rs
57
+ - ext/polars/src/utils.rs
54
58
  - lib/polars-df.rb
55
59
  - lib/polars.rb
56
60
  - lib/polars/batched_csv_reader.rb
57
61
  - lib/polars/cat_expr.rb
62
+ - lib/polars/cat_name_space.rb
58
63
  - lib/polars/data_frame.rb
59
64
  - lib/polars/date_time_expr.rb
65
+ - lib/polars/date_time_name_space.rb
66
+ - lib/polars/exceptions.rb
60
67
  - lib/polars/expr.rb
68
+ - lib/polars/expr_dispatch.rb
61
69
  - lib/polars/functions.rb
70
+ - lib/polars/group_by.rb
62
71
  - lib/polars/io.rb
63
72
  - lib/polars/lazy_frame.rb
64
73
  - lib/polars/lazy_functions.rb
65
74
  - lib/polars/lazy_group_by.rb
66
75
  - lib/polars/list_expr.rb
76
+ - lib/polars/list_name_space.rb
67
77
  - lib/polars/meta_expr.rb
68
78
  - lib/polars/series.rb
79
+ - lib/polars/slice.rb
69
80
  - lib/polars/string_expr.rb
81
+ - lib/polars/string_name_space.rb
70
82
  - lib/polars/struct_expr.rb
83
+ - lib/polars/struct_name_space.rb
71
84
  - lib/polars/utils.rb
72
85
  - lib/polars/version.rb
73
86
  - lib/polars/when.rb