polars-df 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -198,7 +198,7 @@ module Polars
198
198
  # }
199
199
  # ).lazy
200
200
  # lf.dtypes
201
- # # => [:i64, :f64, :str]
201
+ # # => [Polars::Int64, Polars::Float64, Polars::Utf8]
202
202
  def dtypes
203
203
  _ldf.dtypes
204
204
  end
@@ -216,7 +216,7 @@ module Polars
216
216
  # }
217
217
  # ).lazy
218
218
  # lf.schema
219
- # # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
219
+ # # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::Utf8}
220
220
  def schema
221
221
  _ldf.schema
222
222
  end
@@ -1173,7 +1173,8 @@ module Polars
1173
1173
  truncate: true,
1174
1174
  include_boundaries: false,
1175
1175
  closed: "left",
1176
- by: nil
1176
+ by: nil,
1177
+ start_by: "window"
1177
1178
  )
1178
1179
  if offset.nil?
1179
1180
  if period.nil?
@@ -1200,7 +1201,8 @@ module Polars
1200
1201
  truncate,
1201
1202
  include_boundaries,
1202
1203
  closed,
1203
- rbexprs_by
1204
+ rbexprs_by,
1205
+ start_by
1204
1206
  )
1205
1207
  LazyGroupBy.new(lgb, self.class)
1206
1208
  end
@@ -2165,7 +2167,8 @@ module Polars
2165
2167
  # # │ 3.0 ┆ 1.0 │
2166
2168
  # # └─────┴─────┘
2167
2169
  def quantile(quantile, interpolation: "nearest")
2168
- _from_rbldf(_ldf.quantile(quantile, interpolation))
2170
+ quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
2171
+ _from_rbldf(_ldf.quantile(quantile._rbexpr, interpolation))
2169
2172
  end
2170
2173
 
2171
2174
  # Explode lists to long format.
@@ -8,13 +8,18 @@ module Polars
8
8
  name = name.to_a
9
9
  end
10
10
 
11
- if name.is_a?(Array)
11
+ if name.is_a?(Class) && name < DataType
12
+ name = [name]
13
+ end
14
+
15
+ if name.is_a?(DataType)
16
+ Utils.wrap_expr(_dtype_cols([name]))
17
+ elsif name.is_a?(Array)
12
18
  if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
13
19
  name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
14
20
  Utils.wrap_expr(RbExpr.cols(name))
15
21
  elsif Utils.is_polars_dtype(name[0])
16
- raise Todo
17
- # Utils.wrap_expr(_dtype_cols(name))
22
+ Utils.wrap_expr(_dtype_cols(name))
18
23
  else
19
24
  raise ArgumentError, "Expected list values to be all `str` or all `DataType`"
20
25
  end
@@ -568,7 +568,7 @@ module Polars
568
568
  # # └────────────┘
569
569
  def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
570
570
  raise Todo if name_generator
571
- Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator))
571
+ Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator, 0))
572
572
  end
573
573
 
574
574
  # Run any polars expression against the lists' elements.
data/lib/polars/series.rb CHANGED
@@ -23,9 +23,9 @@ module Polars
23
23
  # @example Constructing a Series by specifying name and values positionally:
24
24
  # s = Polars::Series.new("a", [1, 2, 3])
25
25
  #
26
- # @example Notice that the dtype is automatically inferred as a polars `:i64`:
26
+ # @example Notice that the dtype is automatically inferred as a polars `Int64`:
27
27
  # s.dtype
28
- # # => :i64
28
+ # # => Polars::Int64
29
29
  #
30
30
  # @example Constructing a Series with a specific dtype:
31
31
  # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
@@ -1751,7 +1751,7 @@ module Polars
1751
1751
  # s.is_numeric
1752
1752
  # # => true
1753
1753
  def is_numeric
1754
- [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
1754
+ [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
1755
1755
  end
1756
1756
  alias_method :numeric?, :is_numeric
1757
1757
 
@@ -1764,7 +1764,7 @@ module Polars
1764
1764
  # s.is_datelike
1765
1765
  # # => true
1766
1766
  def is_datelike
1767
- [:date, :datetime, :duration, :time].include?(dtype)
1767
+ [Date, Datetime, Duration, Time].include?(dtype)
1768
1768
  end
1769
1769
 
1770
1770
  # Check if this Series has floating point numbers.
@@ -1776,7 +1776,7 @@ module Polars
1776
1776
  # s.is_float
1777
1777
  # # => true
1778
1778
  def is_float
1779
- [:f32, :f64].include?(dtype)
1779
+ [Float32, Float64].include?(dtype)
1780
1780
  end
1781
1781
  alias_method :float?, :is_float
1782
1782
 
@@ -1789,7 +1789,7 @@ module Polars
1789
1789
  # s.is_boolean
1790
1790
  # # => true
1791
1791
  def is_boolean
1792
- dtype == :bool
1792
+ dtype == Boolean
1793
1793
  end
1794
1794
  alias_method :boolean?, :is_boolean
1795
1795
  alias_method :is_bool, :is_boolean
@@ -1804,7 +1804,7 @@ module Polars
1804
1804
  # s.is_utf8
1805
1805
  # # => true
1806
1806
  def is_utf8
1807
- dtype == :str
1807
+ dtype == Utf8
1808
1808
  end
1809
1809
  alias_method :utf8?, :is_utf8
1810
1810
 
@@ -1840,7 +1840,7 @@ module Polars
1840
1840
  # # 3
1841
1841
  # # ]
1842
1842
  def set(filter, value)
1843
- Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value))
1843
+ Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype)}", filter._s, value))
1844
1844
  end
1845
1845
 
1846
1846
  # Set values at the index locations.
@@ -3101,7 +3101,7 @@ module Polars
3101
3101
  # # 4
3102
3102
  # # 5
3103
3103
  # # ]
3104
- def interpolate
3104
+ def interpolate(method: "linear")
3105
3105
  super
3106
3106
  end
3107
3107
 
@@ -3525,10 +3525,10 @@ module Polars
3525
3525
  return Utils.wrap_s(_s.send(op, other._s))
3526
3526
  end
3527
3527
 
3528
- if dtype == :str
3528
+ if dtype == Utf8
3529
3529
  raise Todo
3530
3530
  end
3531
- Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
3531
+ Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
3532
3532
  end
3533
3533
 
3534
3534
  def _arithmetic(other, op)
@@ -3539,15 +3539,39 @@ module Polars
3539
3539
  return Utils.wrap_s(_s.send(op, other._s))
3540
3540
  end
3541
3541
 
3542
- if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String)
3542
+ if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
3543
3543
  raise Todo
3544
3544
  end
3545
3545
  if other.is_a?(Float) && !is_float
3546
3546
  raise Todo
3547
3547
  end
3548
3548
 
3549
- Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
3550
- end
3549
+ Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
3550
+ end
3551
+
3552
+ DTYPE_TO_FFINAME = {
3553
+ Int8 => "i8",
3554
+ Int16 => "i16",
3555
+ Int32 => "i32",
3556
+ Int64 => "i64",
3557
+ UInt8 => "u8",
3558
+ UInt16 => "u16",
3559
+ UInt32 => "u32",
3560
+ UInt64 => "u64",
3561
+ Float32 => "f32",
3562
+ Float64 => "f64",
3563
+ Boolean => "bool",
3564
+ Utf8 => "str",
3565
+ List => "list",
3566
+ Date => "date",
3567
+ Datetime => "datetime",
3568
+ Duration => "duration",
3569
+ Time => "time",
3570
+ Object => "object",
3571
+ Categorical => "categorical",
3572
+ Struct => "struct",
3573
+ Binary => "binary"
3574
+ }
3551
3575
 
3552
3576
  def series_to_rbseries(name, values)
3553
3577
  # should not be in-place?
@@ -3571,9 +3595,9 @@ module Polars
3571
3595
  end
3572
3596
 
3573
3597
  rb_temporal_types = []
3574
- rb_temporal_types << Date if defined?(Date)
3575
- rb_temporal_types << DateTime if defined?(DateTime)
3576
- rb_temporal_types << Time if defined?(Time)
3598
+ rb_temporal_types << ::Date if defined?(::Date)
3599
+ rb_temporal_types << ::DateTime if defined?(::DateTime)
3600
+ rb_temporal_types << ::Time if defined?(::Time)
3577
3601
 
3578
3602
  value = _get_first_non_none(values)
3579
3603
 
@@ -3599,11 +3623,11 @@ module Polars
3599
3623
  # dtype = rb_type_to_dtype(dtype)
3600
3624
  # end
3601
3625
 
3602
- if ruby_dtype == Date
3626
+ if ruby_dtype == ::Date
3603
3627
  RbSeries.new_opt_date(name, values, strict)
3604
- elsif ruby_dtype == Time
3628
+ elsif ruby_dtype == ::Time
3605
3629
  RbSeries.new_opt_datetime(name, values, strict)
3606
- elsif ruby_dtype == DateTime
3630
+ elsif ruby_dtype == ::DateTime
3607
3631
  RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
3608
3632
  else
3609
3633
  raise Todo
@@ -3646,6 +3670,21 @@ module Polars
3646
3670
  end
3647
3671
 
3648
3672
  POLARS_TYPE_TO_CONSTRUCTOR = {
3673
+ Float32 => RbSeries.method(:new_opt_f32),
3674
+ Float64 => RbSeries.method(:new_opt_f64),
3675
+ Int8 => RbSeries.method(:new_opt_i8),
3676
+ Int16 => RbSeries.method(:new_opt_i16),
3677
+ Int32 => RbSeries.method(:new_opt_i32),
3678
+ Int64 => RbSeries.method(:new_opt_i64),
3679
+ UInt8 => RbSeries.method(:new_opt_u8),
3680
+ UInt16 => RbSeries.method(:new_opt_u16),
3681
+ UInt32 => RbSeries.method(:new_opt_u32),
3682
+ UInt64 => RbSeries.method(:new_opt_u64),
3683
+ Boolean => RbSeries.method(:new_opt_bool),
3684
+ Utf8 => RbSeries.method(:new_str)
3685
+ }
3686
+
3687
+ SYM_TYPE_TO_CONSTRUCTOR = {
3649
3688
  f32: RbSeries.method(:new_opt_f32),
3650
3689
  f64: RbSeries.method(:new_opt_f64),
3651
3690
  i8: RbSeries.method(:new_opt_i8),
@@ -3661,7 +3700,11 @@ module Polars
3661
3700
  }
3662
3701
 
3663
3702
  def polars_type_to_constructor(dtype)
3664
- POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
3703
+ if dtype.is_a?(Class) && dtype < DataType
3704
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
3705
+ else
3706
+ SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
3707
+ end
3665
3708
  rescue KeyError
3666
3709
  raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
3667
3710
  end
@@ -64,24 +64,24 @@ module Polars
64
64
  # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
65
65
  # # │ 2001-07-08 │
66
66
  # # └────────────┘
67
- def strptime(datatype, fmt = nil, strict: true, exact: true)
67
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false)
68
68
  if !Utils.is_polars_dtype(datatype)
69
69
  raise ArgumentError, "expected: {DataType} got: #{datatype}"
70
70
  end
71
71
 
72
72
  if datatype == :date
73
- Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact))
73
+ Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
74
74
  elsif datatype == :datetime
75
75
  # TODO fix
76
76
  tu = nil # datatype.tu
77
- dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact))
77
+ dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact, cache, tz_aware))
78
78
  if tu.nil?
79
79
  dtcol
80
80
  else
81
81
  dtcol.dt.cast_time_unit(tu)
82
82
  end
83
83
  elsif datatype == :time
84
- Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact))
84
+ Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
85
85
  else
86
86
  raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
87
87
  end
@@ -725,7 +725,8 @@ module Polars
725
725
  # # │ ["678", "910"] │
726
726
  # # └────────────────┘
727
727
  def extract_all(pattern)
728
- Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
728
+ pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
729
+ Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
729
730
  end
730
731
 
731
732
  # Count all successive non-overlapping regex matches.
@@ -60,7 +60,7 @@ module Polars
60
60
  # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
61
61
  # # │ 2001-07-08 │
62
62
  # # └────────────┘
63
- def strptime(datatype, fmt = nil, strict: true, exact: true)
63
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false)
64
64
  super
65
65
  end
66
66
 
data/lib/polars/utils.rb CHANGED
@@ -42,18 +42,18 @@ module Polars
42
42
  end
43
43
 
44
44
  def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
- if dtype == :date
45
+ if dtype == :date || dtype == Date
46
46
  # days to seconds
47
47
  # important to create from utc. Not doing this leads
48
48
  # to inconsistencies dependent on the timezone you are in.
49
- Time.at(value * 86400).utc.to_date
49
+ ::Time.at(value * 86400).utc.to_date
50
50
  # TODO fix dtype
51
- elsif dtype.to_s.start_with?("datetime[")
51
+ elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
52
52
  if tz.nil? || tz == ""
53
53
  if tu == "ns"
54
54
  raise Todo
55
55
  elsif tu == "us"
56
- dt = Time.at(value / 1000000, value % 1000000, :usec).utc
56
+ dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
57
57
  elsif tu == "ms"
58
58
  raise Todo
59
59
  else
@@ -99,7 +99,7 @@ module Polars
99
99
 
100
100
  # TODO fix
101
101
  def self.is_polars_dtype(data_type)
102
- data_type.is_a?(Symbol) || data_type.is_a?(String)
102
+ data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
103
103
  end
104
104
 
105
105
  RB_TYPE_TO_DTYPE = {
@@ -108,14 +108,15 @@ module Polars
108
108
  String => :str,
109
109
  TrueClass => :bool,
110
110
  FalseClass => :bool,
111
- Date => :date,
112
- DateTime => :datetime
111
+ ::Date => :date,
112
+ ::DateTime => :datetime
113
113
  }
114
114
 
115
115
  # TODO fix
116
116
  def self.rb_type_to_dtype(data_type)
117
117
  if is_polars_dtype(data_type)
118
- return data_type.to_s
118
+ data_type = data_type.to_s if data_type.is_a?(Symbol)
119
+ return data_type
119
120
  end
120
121
 
121
122
  begin
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.1.5"
3
+ VERSION = "0.2.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -1,45 +1,46 @@
1
1
  # ext
2
2
  begin
3
- require "polars/#{RUBY_VERSION.to_f}/polars"
3
+ require_relative "polars/#{RUBY_VERSION.to_f}/polars"
4
4
  rescue LoadError
5
- require "polars/polars"
5
+ require_relative "polars/polars"
6
6
  end
7
7
 
8
8
  # stdlib
9
9
  require "date"
10
10
 
11
11
  # modules
12
- require "polars/expr_dispatch"
13
- require "polars/batched_csv_reader"
14
- require "polars/cat_expr"
15
- require "polars/cat_name_space"
16
- require "polars/convert"
17
- require "polars/data_frame"
18
- require "polars/date_time_expr"
19
- require "polars/date_time_name_space"
20
- require "polars/dynamic_group_by"
21
- require "polars/exceptions"
22
- require "polars/expr"
23
- require "polars/functions"
24
- require "polars/group_by"
25
- require "polars/io"
26
- require "polars/lazy_frame"
27
- require "polars/lazy_functions"
28
- require "polars/lazy_group_by"
29
- require "polars/list_expr"
30
- require "polars/list_name_space"
31
- require "polars/meta_expr"
32
- require "polars/rolling_group_by"
33
- require "polars/series"
34
- require "polars/slice"
35
- require "polars/string_expr"
36
- require "polars/string_name_space"
37
- require "polars/struct_expr"
38
- require "polars/struct_name_space"
39
- require "polars/utils"
40
- require "polars/version"
41
- require "polars/when"
42
- require "polars/when_then"
12
+ require_relative "polars/expr_dispatch"
13
+ require_relative "polars/batched_csv_reader"
14
+ require_relative "polars/cat_expr"
15
+ require_relative "polars/cat_name_space"
16
+ require_relative "polars/convert"
17
+ require_relative "polars/data_frame"
18
+ require_relative "polars/data_types"
19
+ require_relative "polars/date_time_expr"
20
+ require_relative "polars/date_time_name_space"
21
+ require_relative "polars/dynamic_group_by"
22
+ require_relative "polars/exceptions"
23
+ require_relative "polars/expr"
24
+ require_relative "polars/functions"
25
+ require_relative "polars/group_by"
26
+ require_relative "polars/io"
27
+ require_relative "polars/lazy_frame"
28
+ require_relative "polars/lazy_functions"
29
+ require_relative "polars/lazy_group_by"
30
+ require_relative "polars/list_expr"
31
+ require_relative "polars/list_name_space"
32
+ require_relative "polars/meta_expr"
33
+ require_relative "polars/rolling_group_by"
34
+ require_relative "polars/series"
35
+ require_relative "polars/slice"
36
+ require_relative "polars/string_expr"
37
+ require_relative "polars/string_name_space"
38
+ require_relative "polars/struct_expr"
39
+ require_relative "polars/struct_name_space"
40
+ require_relative "polars/utils"
41
+ require_relative "polars/version"
42
+ require_relative "polars/when"
43
+ require_relative "polars/when_then"
43
44
 
44
45
  module Polars
45
46
  extend Convert
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-22 00:00:00.000000000 Z
11
+ date: 2023-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -65,6 +65,7 @@ files:
65
65
  - lib/polars/cat_name_space.rb
66
66
  - lib/polars/convert.rb
67
67
  - lib/polars/data_frame.rb
68
+ - lib/polars/data_types.rb
68
69
  - lib/polars/date_time_expr.rb
69
70
  - lib/polars/date_time_name_space.rb
70
71
  - lib/polars/dynamic_group_by.rb
@@ -103,14 +104,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
104
  requirements:
104
105
  - - ">="
105
106
  - !ruby/object:Gem::Version
106
- version: '2.7'
107
+ version: '3.0'
107
108
  required_rubygems_version: !ruby/object:Gem::Requirement
108
109
  requirements:
109
110
  - - ">="
110
111
  - !ruby/object:Gem::Version
111
112
  version: '0'
112
113
  requirements: []
113
- rubygems_version: 3.3.26
114
+ rubygems_version: 3.4.1
114
115
  signing_key:
115
116
  specification_version: 4
116
117
  summary: Blazingly fast DataFrames for Ruby