polars-df 0.2.2-x86_64-darwin → 0.2.3-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +1 -1
- data/README.md +4 -2
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/data_frame.rb +57 -2
- data/lib/polars/data_types.rb +67 -29
- data/lib/polars/series.rb +5 -0
- data/lib/polars/utils.rb +5 -5
- data/lib/polars/version.rb +1 -1
- data/lib/polars-df.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: a67aba6106276247072fe14bcd35b3381e890844ea50321c443cbcc815b84ca3
         | 
| 4 | 
            +
              data.tar.gz: c5c3388ed010e8946903b7d8ed30fc24ddbf2a34b9eba23d1c178b476faebc7a
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 1343d81460555ed4baa13962ff7316814b64e3c7c6c363c6d3d7b01221bd83abd9ad62b9bd8dffb4bd7dbb30716bae5d5fc20d7830e157631767ac186638bd24
         | 
| 7 | 
            +
              data.tar.gz: f02495aea654767675444e57ea764cb44e5e34123049cdf541c3b2b47df9d7ddc1dd9bb0499ddda6ee360e3983b0184f35bc6e5fcc28f93e49af3476960770eb
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/Cargo.lock
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -56,6 +56,8 @@ From Active Record | |
| 56 56 |  | 
| 57 57 | 
             
            ```ruby
         | 
| 58 58 | 
             
            Polars.read_sql(User.all)
         | 
| 59 | 
            +
            # or
         | 
| 60 | 
            +
            Polars.read_sql("SELECT * FROM users")
         | 
| 59 61 | 
             
            ```
         | 
| 60 62 |  | 
| 61 63 | 
             
            From a hash
         | 
| @@ -287,13 +289,13 @@ CSV | |
| 287 289 | 
             
            ```ruby
         | 
| 288 290 | 
             
            df.to_csv
         | 
| 289 291 | 
             
            # or
         | 
| 290 | 
            -
            df.write_csv(" | 
| 292 | 
            +
            df.write_csv("file.csv")
         | 
| 291 293 | 
             
            ```
         | 
| 292 294 |  | 
| 293 295 | 
             
            Parquet
         | 
| 294 296 |  | 
| 295 297 | 
             
            ```ruby
         | 
| 296 | 
            -
            df.write_parquet(" | 
| 298 | 
            +
            df.write_parquet("file.parquet")
         | 
| 297 299 | 
             
            ```
         | 
| 298 300 |  | 
| 299 301 | 
             
            ## Types
         | 
| Binary file | 
| Binary file | 
| Binary file | 
    
        data/lib/polars/data_frame.rb
    CHANGED
    
    | @@ -4746,7 +4746,14 @@ module Polars | |
| 4746 4746 | 
             
                end
         | 
| 4747 4747 |  | 
| 4748 4748 | 
             
                # @private
         | 
| 4749 | 
            -
                def self. | 
| 4749 | 
            +
                def self.include_unknowns(schema, cols)
         | 
| 4750 | 
            +
                  cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
         | 
| 4751 | 
            +
                end
         | 
| 4752 | 
            +
             | 
| 4753 | 
            +
                # @private
         | 
| 4754 | 
            +
                def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
         | 
| 4755 | 
            +
                  raise Todo if schema_overrides
         | 
| 4756 | 
            +
             | 
| 4750 4757 | 
             
                  if columns.is_a?(Hash)
         | 
| 4751 4758 | 
             
                    columns = columns.to_a
         | 
| 4752 4759 | 
             
                  end
         | 
| @@ -4790,8 +4797,48 @@ module Polars | |
| 4790 4797 | 
             
                  end
         | 
| 4791 4798 | 
             
                end
         | 
| 4792 4799 |  | 
| 4800 | 
            +
                def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
         | 
| 4801 | 
            +
                  rbdf_columns = rbdf.columns
         | 
| 4802 | 
            +
                  rbdf_dtypes = rbdf.dtypes
         | 
| 4803 | 
            +
                  columns, dtypes = _unpack_columns(
         | 
| 4804 | 
            +
                    (columns || rbdf_columns), schema_overrides: schema_overrides
         | 
| 4805 | 
            +
                  )
         | 
| 4806 | 
            +
                  column_subset = []
         | 
| 4807 | 
            +
                  if columns != rbdf_columns
         | 
| 4808 | 
            +
                    if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
         | 
| 4809 | 
            +
                      column_subset = columns
         | 
| 4810 | 
            +
                    else
         | 
| 4811 | 
            +
                      rbdf.set_column_names(columns)
         | 
| 4812 | 
            +
                    end
         | 
| 4813 | 
            +
                  end
         | 
| 4814 | 
            +
             | 
| 4815 | 
            +
                  column_casts = []
         | 
| 4816 | 
            +
                  columns.each do |col, i|
         | 
| 4817 | 
            +
                    if dtypes[col] == Categorical # != rbdf_dtypes[i]
         | 
| 4818 | 
            +
                      column_casts << Polars.col(col).cast(Categorical)._rbexpr
         | 
| 4819 | 
            +
                    elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
         | 
| 4820 | 
            +
                      column_casts << Polars.col(col).cast(structs[col])._rbexpr
         | 
| 4821 | 
            +
                    elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
         | 
| 4822 | 
            +
                      column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
         | 
| 4823 | 
            +
                    end
         | 
| 4824 | 
            +
                  end
         | 
| 4825 | 
            +
             | 
| 4826 | 
            +
                  if column_casts.any? || column_subset.any?
         | 
| 4827 | 
            +
                    rbdf = rbdf.lazy
         | 
| 4828 | 
            +
                    if column_casts.any?
         | 
| 4829 | 
            +
                      rbdf = rbdf.with_columns(column_casts)
         | 
| 4830 | 
            +
                    end
         | 
| 4831 | 
            +
                    if column_subset.any?
         | 
| 4832 | 
            +
                      rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
         | 
| 4833 | 
            +
                    end
         | 
| 4834 | 
            +
                    rbdf = rbdf.collect
         | 
| 4835 | 
            +
                  end
         | 
| 4836 | 
            +
             | 
| 4837 | 
            +
                  rbdf
         | 
| 4838 | 
            +
                end
         | 
| 4839 | 
            +
             | 
| 4793 4840 | 
             
                # @private
         | 
| 4794 | 
            -
                def self.sequence_to_rbdf(data, columns: nil, orient: nil)
         | 
| 4841 | 
            +
                def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
         | 
| 4795 4842 | 
             
                  if data.length == 0
         | 
| 4796 4843 | 
             
                    return hash_to_rbdf({}, columns: columns)
         | 
| 4797 4844 | 
             
                  end
         | 
| @@ -4803,6 +4850,14 @@ module Polars | |
| 4803 4850 | 
             
                    data.each do |s|
         | 
| 4804 4851 | 
             
                      data_series << s._s
         | 
| 4805 4852 | 
             
                    end
         | 
| 4853 | 
            +
                  elsif data[0].is_a?(Hash)
         | 
| 4854 | 
            +
                    column_names, dtypes = _unpack_columns(columns)
         | 
| 4855 | 
            +
                    schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
         | 
| 4856 | 
            +
                    rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
         | 
| 4857 | 
            +
                    if column_names
         | 
| 4858 | 
            +
                      rbdf = _post_apply_columns(rbdf, column_names)
         | 
| 4859 | 
            +
                    end
         | 
| 4860 | 
            +
                    return rbdf
         | 
| 4806 4861 | 
             
                  elsif data[0].is_a?(Array)
         | 
| 4807 4862 | 
             
                    if orient.nil? && !columns.nil?
         | 
| 4808 4863 | 
             
                      orient = columns.length == data.length ? "col" : "row"
         | 
    
        data/lib/polars/data_types.rb
    CHANGED
    
    | @@ -3,44 +3,64 @@ module Polars | |
| 3 3 | 
             
              class DataType
         | 
| 4 4 | 
             
              end
         | 
| 5 5 |  | 
| 6 | 
            +
              # Base class for numeric data types.
         | 
| 7 | 
            +
              class NumericType < DataType
         | 
| 8 | 
            +
              end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              # Base class for integral data types.
         | 
| 11 | 
            +
              class IntegralType < NumericType
         | 
| 12 | 
            +
              end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              # Base class for fractional data types.
         | 
| 15 | 
            +
              class FractionalType < NumericType
         | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              # Base class for temporal data types.
         | 
| 19 | 
            +
              class TemporalType < DataType
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              # Base class for nested data types.
         | 
| 23 | 
            +
              class NestedType < DataType
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
             | 
| 6 26 | 
             
              # 8-bit signed integer type.
         | 
| 7 | 
            -
              class Int8 <  | 
| 27 | 
            +
              class Int8 < IntegralType
         | 
| 8 28 | 
             
              end
         | 
| 9 29 |  | 
| 10 30 | 
             
              # 16-bit signed integer type.
         | 
| 11 | 
            -
              class Int16 <  | 
| 31 | 
            +
              class Int16 < IntegralType
         | 
| 12 32 | 
             
              end
         | 
| 13 33 |  | 
| 14 34 | 
             
              # 32-bit signed integer type.
         | 
| 15 | 
            -
              class Int32 <  | 
| 35 | 
            +
              class Int32 < IntegralType
         | 
| 16 36 | 
             
              end
         | 
| 17 37 |  | 
| 18 38 | 
             
              # 64-bit signed integer type.
         | 
| 19 | 
            -
              class Int64 <  | 
| 39 | 
            +
              class Int64 < IntegralType
         | 
| 20 40 | 
             
              end
         | 
| 21 41 |  | 
| 22 42 | 
             
              # 8-bit unsigned integer type.
         | 
| 23 | 
            -
              class UInt8 <  | 
| 43 | 
            +
              class UInt8 < IntegralType
         | 
| 24 44 | 
             
              end
         | 
| 25 45 |  | 
| 26 46 | 
             
              # 16-bit unsigned integer type.
         | 
| 27 | 
            -
              class UInt16 <  | 
| 47 | 
            +
              class UInt16 < IntegralType
         | 
| 28 48 | 
             
              end
         | 
| 29 49 |  | 
| 30 50 | 
             
              # 32-bit unsigned integer type.
         | 
| 31 | 
            -
              class UInt32 <  | 
| 51 | 
            +
              class UInt32 < IntegralType
         | 
| 32 52 | 
             
              end
         | 
| 33 53 |  | 
| 34 54 | 
             
              # 64-bit unsigned integer type.
         | 
| 35 | 
            -
              class UInt64 <  | 
| 55 | 
            +
              class UInt64 < IntegralType
         | 
| 36 56 | 
             
              end
         | 
| 37 57 |  | 
| 38 58 | 
             
              # 32-bit floating point type.
         | 
| 39 | 
            -
              class Float32 <  | 
| 59 | 
            +
              class Float32 < FractionalType
         | 
| 40 60 | 
             
              end
         | 
| 41 61 |  | 
| 42 62 | 
             
              # 64-bit floating point type.
         | 
| 43 | 
            -
              class Float64 <  | 
| 63 | 
            +
              class Float64 < FractionalType
         | 
| 44 64 | 
             
              end
         | 
| 45 65 |  | 
| 46 66 | 
             
              # Boolean type.
         | 
| @@ -51,31 +71,19 @@ module Polars | |
| 51 71 | 
             
              class Utf8 < DataType
         | 
| 52 72 | 
             
              end
         | 
| 53 73 |  | 
| 54 | 
            -
              # Binary type.
         | 
| 55 | 
            -
              class Binary < DataType
         | 
| 56 | 
            -
              end
         | 
| 57 | 
            -
             | 
| 58 | 
            -
              # Type representing Null / None values.
         | 
| 59 | 
            -
              class Null < DataType
         | 
| 60 | 
            -
              end
         | 
| 61 | 
            -
             | 
| 62 | 
            -
              # Type representing Datatype values that could not be determined statically.
         | 
| 63 | 
            -
              class Unknown < DataType
         | 
| 64 | 
            -
              end
         | 
| 65 | 
            -
             | 
| 66 74 | 
             
              # Nested list/array type.
         | 
| 67 | 
            -
              class List <  | 
| 75 | 
            +
              class List < NestedType
         | 
| 68 76 | 
             
                def initialize(inner)
         | 
| 69 77 | 
             
                  @inner = Utils.rb_type_to_dtype(inner)
         | 
| 70 78 | 
             
                end
         | 
| 71 79 | 
             
              end
         | 
| 72 80 |  | 
| 73 81 | 
             
              # Calendar date type.
         | 
| 74 | 
            -
              class Date <  | 
| 82 | 
            +
              class Date < TemporalType
         | 
| 75 83 | 
             
              end
         | 
| 76 84 |  | 
| 77 85 | 
             
              # Calendar date and time type.
         | 
| 78 | 
            -
              class Datetime <  | 
| 86 | 
            +
              class Datetime < TemporalType
         | 
| 79 87 | 
             
                def initialize(time_unit = "us", time_zone = nil)
         | 
| 80 88 | 
             
                  @tu = time_unit || "us"
         | 
| 81 89 | 
             
                  @time_zone = time_zone
         | 
| @@ -83,14 +91,14 @@ module Polars | |
| 83 91 | 
             
              end
         | 
| 84 92 |  | 
| 85 93 | 
             
              # Time duration/delta type.
         | 
| 86 | 
            -
              class Duration <  | 
| 94 | 
            +
              class Duration < TemporalType
         | 
| 87 95 | 
             
                def initialize(time_unit = "us")
         | 
| 88 96 | 
             
                  @tu = time_unit
         | 
| 89 97 | 
             
                end
         | 
| 90 98 | 
             
              end
         | 
| 91 99 |  | 
| 92 100 | 
             
              # Time of day type.
         | 
| 93 | 
            -
              class Time <  | 
| 101 | 
            +
              class Time < TemporalType
         | 
| 94 102 | 
             
              end
         | 
| 95 103 |  | 
| 96 104 | 
             
              # Type for wrapping arbitrary Ruby objects.
         | 
| @@ -102,15 +110,24 @@ module Polars | |
| 102 110 | 
             
              end
         | 
| 103 111 |  | 
| 104 112 | 
             
              # Definition of a single field within a `Struct` DataType.
         | 
| 105 | 
            -
              class Field | 
| 113 | 
            +
              class Field
         | 
| 114 | 
            +
                attr_reader :name, :dtype
         | 
| 115 | 
            +
             | 
| 106 116 | 
             
                def initialize(name, dtype)
         | 
| 107 117 | 
             
                  @name = name
         | 
| 108 118 | 
             
                  @dtype = Utils.rb_type_to_dtype(dtype)
         | 
| 109 119 | 
             
                end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                def inspect
         | 
| 122 | 
            +
                  class_name = self.class.name
         | 
| 123 | 
            +
                  "#{class_name}(#{@name}: #{@dtype})"
         | 
| 124 | 
            +
                end
         | 
| 110 125 | 
             
              end
         | 
| 111 126 |  | 
| 112 127 | 
             
              # Struct composite type.
         | 
| 113 | 
            -
              class Struct <  | 
| 128 | 
            +
              class Struct < NestedType
         | 
| 129 | 
            +
                attr_reader :fields
         | 
| 130 | 
            +
             | 
| 114 131 | 
             
                def initialize(fields)
         | 
| 115 132 | 
             
                  if fields.is_a?(Hash)
         | 
| 116 133 | 
             
                    @fields = fields.map { |n, d| Field.new(n, d) }
         | 
| @@ -118,5 +135,26 @@ module Polars | |
| 118 135 | 
             
                    @fields = fields
         | 
| 119 136 | 
             
                  end
         | 
| 120 137 | 
             
                end
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                def inspect
         | 
| 140 | 
            +
                  class_name = self.class.name
         | 
| 141 | 
            +
                  "#{class_name}(#{@fields})"
         | 
| 142 | 
            +
                end
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                def to_schema
         | 
| 145 | 
            +
                  @fields.to_h { |f| [f.name, f.dtype] }
         | 
| 146 | 
            +
                end
         | 
| 147 | 
            +
              end
         | 
| 148 | 
            +
             | 
| 149 | 
            +
              # Binary type.
         | 
| 150 | 
            +
              class Binary < DataType
         | 
| 151 | 
            +
              end
         | 
| 152 | 
            +
             | 
| 153 | 
            +
              # Type representing Null / None values.
         | 
| 154 | 
            +
              class Null < DataType
         | 
| 155 | 
            +
              end
         | 
| 156 | 
            +
             | 
| 157 | 
            +
              # Type representing Datatype values that could not be determined statically.
         | 
| 158 | 
            +
              class Unknown < DataType
         | 
| 121 159 | 
             
              end
         | 
| 122 160 | 
             
            end
         | 
    
        data/lib/polars/series.rb
    CHANGED
    
    | @@ -3667,6 +3667,11 @@ module Polars | |
| 3667 3667 | 
             
                  rb_temporal_types << ::Time if defined?(::Time)
         | 
| 3668 3668 |  | 
| 3669 3669 | 
             
                  value = _get_first_non_none(values)
         | 
| 3670 | 
            +
                  if !value.nil?
         | 
| 3671 | 
            +
                    if value.is_a?(Hash)
         | 
| 3672 | 
            +
                      return DataFrame.new(values).to_struct(name)._s
         | 
| 3673 | 
            +
                    end
         | 
| 3674 | 
            +
                  end
         | 
| 3670 3675 |  | 
| 3671 3676 | 
             
                  if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
         | 
| 3672 3677 | 
             
                    constructor = polars_type_to_constructor(dtype)
         | 
    
        data/lib/polars/utils.rb
    CHANGED
    
    | @@ -160,11 +160,11 @@ module Polars | |
| 160 160 |  | 
| 161 161 | 
             
                def self.scale_bytes(sz, to:)
         | 
| 162 162 | 
             
                  scaling_factor = {
         | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 | 
            -
             | 
| 163 | 
            +
                    "b" => 1,
         | 
| 164 | 
            +
                    "k" => 1024,
         | 
| 165 | 
            +
                    "m" => 1024 ** 2,
         | 
| 166 | 
            +
                    "g" => 1024 ** 3,
         | 
| 167 | 
            +
                    "t" => 1024 ** 4
         | 
| 168 168 | 
             
                  }[to[0]]
         | 
| 169 169 | 
             
                  if scaling_factor > 1
         | 
| 170 170 | 
             
                    sz / scaling_factor.to_f
         | 
    
        data/lib/polars/version.rb
    CHANGED
    
    
    
        data/lib/polars-df.rb
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            require_relative "polars"
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: polars-df
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.2. | 
| 4 | 
            +
              version: 0.2.3
         | 
| 5 5 | 
             
            platform: x86_64-darwin
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2023-01- | 
| 11 | 
            +
            date: 2023-01-22 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: 
         | 
| 14 14 | 
             
            email: andrew@ankane.org
         |