polars-df 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
@@ -1,11 +1,17 @@
1
1
  module Polars
2
+ # Namespace for struct related expressions.
2
3
  class StructExpr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def initialize(expr)
6
9
  self._rbexpr = expr._rbexpr
7
10
  end
8
11
 
12
+ # Retrieve one of the fields of this `Struct` as a new Series.
13
+ #
14
+ # @return [Expr]
9
15
  def [](item)
10
16
  if item.is_a?(String)
11
17
  field(item)
@@ -16,10 +22,77 @@ module Polars
16
22
  end
17
23
  end
18
24
 
25
+ # Retrieve one of the fields of this `Struct` as a new Series.
26
+ #
27
+ # @param name [String]
28
+ # Name of the field
29
+ #
30
+ # @return [Expr]
31
+ #
32
+ # @example
33
+ # df = (
34
+ # Polars::DataFrame.new(
35
+ # {
36
+ # "int" => [1, 2],
37
+ # "str" => ["a", "b"],
38
+ # "bool" => [true, nil],
39
+ # "list" => [[1, 2], [3]]
40
+ # }
41
+ # )
42
+ # .to_struct("my_struct")
43
+ # .to_frame
44
+ # )
45
+ # df.select(Polars.col("my_struct").struct.field("str"))
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ str │
50
+ # # │ --- │
51
+ # # │ str │
52
+ # # ╞═════╡
53
+ # # │ a │
54
+ # # ├╌╌╌╌╌┤
55
+ # # │ b │
56
+ # # └─────┘
19
57
  def field(name)
20
58
  Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
21
59
  end
22
60
 
61
+ # Rename the fields of the struct.
62
+ #
63
+ # @param names [Array]
64
+ # New names in the order of the struct's fields
65
+ #
66
+ # @return [Expr]
67
+ #
68
+ # @example
69
+ # df = (
70
+ # Polars::DataFrame.new(
71
+ # {
72
+ # "int" => [1, 2],
73
+ # "str" => ["a", "b"],
74
+ # "bool" => [true, nil],
75
+ # "list" => [[1, 2], [3]]
76
+ # }
77
+ # )
78
+ # .to_struct("my_struct")
79
+ # .to_frame
80
+ # )
81
+ # df = df.with_column(
82
+ # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
83
+ # )
84
+ # df.select(Polars.col("my_struct").struct.field("INT"))
85
+ # # =>
86
+ # # shape: (2, 1)
87
+ # # ┌─────┐
88
+ # # │ INT │
89
+ # # │ --- │
90
+ # # │ i64 │
91
+ # # ╞═════╡
92
+ # # │ 1 │
93
+ # # ├╌╌╌╌╌┤
94
+ # # │ 2 │
95
+ # # └─────┘
23
96
  def rename_fields(names)
24
97
  Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
25
98
  end
@@ -0,0 +1,64 @@
1
+ module Polars
2
+ # Series.struct namespace.
3
+ class StructNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "struct"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Retrieve one of the fields of this `Struct` as a new Series.
14
+ #
15
+ # @return [Series]
16
+ def [](item)
17
+ if item.is_a?(Integer)
18
+ field(fields[item])
19
+ elsif item.is_a?(String)
20
+ field(item)
21
+ else
22
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
23
+ end
24
+ end
25
+
26
+ # Convert this Struct Series to a DataFrame.
27
+ #
28
+ # @return [DataFrame]
29
+ def to_frame
30
+ Utils.wrap_df(_s.struct_to_frame)
31
+ end
32
+
33
+ # Get the names of the fields.
34
+ #
35
+ # @return [Array]
36
+ def fields
37
+ if _s.nil?
38
+ []
39
+ else
40
+ _s.struct_fields
41
+ end
42
+ end
43
+
44
+ # Retrieve one of the fields of this `Struct` as a new Series.
45
+ #
46
+ # @param name [String]
47
+ # Name of the field
48
+ #
49
+ # @return [Series]
50
+ def field(name)
51
+ super
52
+ end
53
+
54
+ # Rename the fields of the struct.
55
+ #
56
+ # @param names [Array]
57
+ # New names in the order of the struct's fields
58
+ #
59
+ # @return [Series]
60
+ def rename_fields(names)
61
+ super
62
+ end
63
+ end
64
+ end
data/lib/polars/utils.rb CHANGED
@@ -19,6 +19,52 @@ module Polars
19
19
  Polars.col(name)
20
20
  end
21
21
 
22
+ def self._timedelta_to_pl_duration(td)
23
+ td
24
+ end
25
+
26
+ def self._datetime_to_pl_timestamp(dt, tu)
27
+ if tu == "ns"
28
+ (dt.to_datetime.utc.to_f * 1e9).to_i
29
+ elsif tu == "us"
30
+ (dt.to_datetime.utc.to_f * 1e6).to_i
31
+ elsif tu == "ms"
32
+ (dt.to_datetime.utc.to_f * 1e3).to_i
33
+ elsif tu.nil?
34
+ (dt.to_datetime.utc.to_f * 1e6).to_i
35
+ else
36
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
37
+ end
38
+ end
39
+
40
+ def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
41
+ if dtype == :date
42
+ # days to seconds
43
+ # important to create from utc. Not doing this leads
44
+ # to inconsistencies dependent on the timezone you are in.
45
+ Time.at(value * 86400).utc.to_date
46
+ # TODO fix dtype
47
+ elsif dtype.to_s.start_with?("datetime[")
48
+ if tz.nil? || tz == ""
49
+ if tu == "ns"
50
+ raise Todo
51
+ elsif tu == "us"
52
+ dt = Time.at(value / 1000000, value % 1000000, :usec).utc
53
+ elsif tu == "ms"
54
+ raise Todo
55
+ else
56
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
57
+ end
58
+ else
59
+ raise Todo
60
+ end
61
+
62
+ dt
63
+ else
64
+ raise NotImplementedError
65
+ end
66
+ end
67
+
22
68
  def self.selection_to_rbexpr_list(exprs)
23
69
  if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
24
70
  exprs = [exprs]
@@ -49,12 +95,30 @@ module Polars
49
95
 
50
96
  # TODO fix
51
97
  def self.is_polars_dtype(data_type)
52
- true
98
+ data_type.is_a?(Symbol) || data_type.is_a?(String)
53
99
  end
54
100
 
101
+ RB_TYPE_TO_DTYPE = {
102
+ Float => :f64,
103
+ Integer => :i64,
104
+ String => :str,
105
+ TrueClass => :bool,
106
+ FalseClass => :bool,
107
+ Date => :date,
108
+ DateTime => :datetime
109
+ }
110
+
55
111
  # TODO fix
56
- def self.rb_type_to_dtype(dtype)
57
- dtype.to_s
112
+ def self.rb_type_to_dtype(data_type)
113
+ if is_polars_dtype(data_type)
114
+ return data_type.to_s
115
+ end
116
+
117
+ begin
118
+ RB_TYPE_TO_DTYPE.fetch(data_type).to_s
119
+ rescue KeyError
120
+ raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
121
+ end
58
122
  end
59
123
 
60
124
  def self._process_null_values(null_values)
@@ -103,5 +167,9 @@ module Polars
103
167
  sz
104
168
  end
105
169
  end
170
+
171
+ def self.bool?(value)
172
+ value == true || value == false
173
+ end
106
174
  end
107
175
  end
@@ -1,3 +1,4 @@
1
1
  module Polars
2
- VERSION = "0.1.2"
2
+ # @private
3
+ VERSION = "0.1.4"
3
4
  end
data/lib/polars/when.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class When
3
4
  attr_accessor :_rbwhen
4
5
 
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class WhenThen
3
4
  attr_accessor :_rbwhenthen
4
5
 
data/lib/polars.rb CHANGED
@@ -1,38 +1,40 @@
1
1
  # ext
2
2
  require "polars/polars"
3
3
 
4
+ # stdlib
5
+ require "date"
6
+
4
7
  # modules
8
+ require "polars/expr_dispatch"
5
9
  require "polars/batched_csv_reader"
6
10
  require "polars/cat_expr"
11
+ require "polars/cat_name_space"
7
12
  require "polars/data_frame"
8
13
  require "polars/date_time_expr"
14
+ require "polars/date_time_name_space"
15
+ require "polars/exceptions"
9
16
  require "polars/expr"
10
17
  require "polars/functions"
18
+ require "polars/group_by"
11
19
  require "polars/io"
12
20
  require "polars/lazy_frame"
13
21
  require "polars/lazy_functions"
14
22
  require "polars/lazy_group_by"
15
23
  require "polars/list_expr"
24
+ require "polars/list_name_space"
16
25
  require "polars/meta_expr"
17
26
  require "polars/series"
27
+ require "polars/slice"
18
28
  require "polars/string_expr"
29
+ require "polars/string_name_space"
19
30
  require "polars/struct_expr"
31
+ require "polars/struct_name_space"
20
32
  require "polars/utils"
21
33
  require "polars/version"
22
34
  require "polars/when"
23
35
  require "polars/when_then"
24
36
 
25
37
  module Polars
26
- # @private
27
- class Error < StandardError; end
28
-
29
- # @private
30
- class Todo < Error
31
- def message
32
- "not implemented yet"
33
- end
34
- end
35
-
36
38
  extend Functions
37
39
  extend IO
38
40
  extend LazyFunctions
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-26 00:00:00.000000000 Z
11
+ date: 2022-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -31,6 +31,7 @@ extensions:
31
31
  - ext/polars/extconf.rb
32
32
  extra_rdoc_files: []
33
33
  files:
34
+ - ".yardopts"
34
35
  - CHANGELOG.md
35
36
  - Cargo.lock
36
37
  - Cargo.toml
@@ -50,24 +51,36 @@ files:
50
51
  - ext/polars/src/lazy/mod.rs
51
52
  - ext/polars/src/lazy/utils.rs
52
53
  - ext/polars/src/lib.rs
54
+ - ext/polars/src/list_construction.rs
53
55
  - ext/polars/src/series.rs
56
+ - ext/polars/src/set.rs
57
+ - ext/polars/src/utils.rs
54
58
  - lib/polars-df.rb
55
59
  - lib/polars.rb
56
60
  - lib/polars/batched_csv_reader.rb
57
61
  - lib/polars/cat_expr.rb
62
+ - lib/polars/cat_name_space.rb
58
63
  - lib/polars/data_frame.rb
59
64
  - lib/polars/date_time_expr.rb
65
+ - lib/polars/date_time_name_space.rb
66
+ - lib/polars/exceptions.rb
60
67
  - lib/polars/expr.rb
68
+ - lib/polars/expr_dispatch.rb
61
69
  - lib/polars/functions.rb
70
+ - lib/polars/group_by.rb
62
71
  - lib/polars/io.rb
63
72
  - lib/polars/lazy_frame.rb
64
73
  - lib/polars/lazy_functions.rb
65
74
  - lib/polars/lazy_group_by.rb
66
75
  - lib/polars/list_expr.rb
76
+ - lib/polars/list_name_space.rb
67
77
  - lib/polars/meta_expr.rb
68
78
  - lib/polars/series.rb
79
+ - lib/polars/slice.rb
69
80
  - lib/polars/string_expr.rb
81
+ - lib/polars/string_name_space.rb
70
82
  - lib/polars/struct_expr.rb
83
+ - lib/polars/struct_name_space.rb
71
84
  - lib/polars/utils.rb
72
85
  - lib/polars/version.rb
73
86
  - lib/polars/when.rb