polars-df 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +16 -0
 - data/Cargo.lock +360 -361
 - data/ext/polars/Cargo.toml +10 -7
 - data/ext/polars/src/batched_csv.rs +1 -1
 - data/ext/polars/src/conversion/any_value.rs +261 -0
 - data/ext/polars/src/conversion/chunked_array.rs +4 -4
 - data/ext/polars/src/conversion/mod.rs +51 -10
 - data/ext/polars/src/dataframe/construction.rs +6 -8
 - data/ext/polars/src/dataframe/general.rs +19 -29
 - data/ext/polars/src/dataframe/io.rs +43 -33
 - data/ext/polars/src/error.rs +26 -4
 - data/ext/polars/src/expr/categorical.rs +0 -10
 - data/ext/polars/src/expr/datetime.rs +4 -12
 - data/ext/polars/src/expr/general.rs +123 -110
 - data/ext/polars/src/expr/mod.rs +2 -2
 - data/ext/polars/src/expr/rolling.rs +17 -9
 - data/ext/polars/src/expr/string.rs +2 -6
 - data/ext/polars/src/functions/eager.rs +10 -10
 - data/ext/polars/src/functions/lazy.rs +21 -21
 - data/ext/polars/src/functions/range.rs +6 -12
 - data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
 - data/ext/polars/src/lazyframe/mod.rs +81 -98
 - data/ext/polars/src/lib.rs +55 -45
 - data/ext/polars/src/map/dataframe.rs +2 -2
 - data/ext/polars/src/rb_modules.rs +25 -1
 - data/ext/polars/src/series/aggregation.rs +4 -2
 - data/ext/polars/src/series/arithmetic.rs +21 -11
 - data/ext/polars/src/series/construction.rs +56 -38
 - data/ext/polars/src/series/export.rs +1 -1
 - data/ext/polars/src/series/mod.rs +31 -10
 - data/ext/polars/src/sql.rs +3 -1
 - data/lib/polars/array_expr.rb +4 -4
 - data/lib/polars/batched_csv_reader.rb +2 -2
 - data/lib/polars/cat_expr.rb +0 -36
 - data/lib/polars/cat_name_space.rb +0 -37
 - data/lib/polars/data_frame.rb +93 -101
 - data/lib/polars/data_types.rb +1 -1
 - data/lib/polars/date_time_expr.rb +525 -573
 - data/lib/polars/date_time_name_space.rb +263 -464
 - data/lib/polars/dynamic_group_by.rb +3 -3
 - data/lib/polars/exceptions.rb +3 -0
 - data/lib/polars/expr.rb +367 -330
 - data/lib/polars/expr_dispatch.rb +1 -1
 - data/lib/polars/functions/aggregation/horizontal.rb +8 -8
 - data/lib/polars/functions/as_datatype.rb +63 -40
 - data/lib/polars/functions/lazy.rb +63 -14
 - data/lib/polars/functions/lit.rb +1 -1
 - data/lib/polars/functions/range/date_range.rb +18 -77
 - data/lib/polars/functions/range/datetime_range.rb +4 -4
 - data/lib/polars/functions/range/int_range.rb +2 -2
 - data/lib/polars/functions/range/time_range.rb +4 -4
 - data/lib/polars/functions/repeat.rb +1 -1
 - data/lib/polars/functions/whenthen.rb +1 -1
 - data/lib/polars/io/csv.rb +8 -8
 - data/lib/polars/io/ipc.rb +3 -3
 - data/lib/polars/io/json.rb +13 -2
 - data/lib/polars/io/ndjson.rb +15 -4
 - data/lib/polars/io/parquet.rb +5 -4
 - data/lib/polars/lazy_frame.rb +120 -106
 - data/lib/polars/lazy_group_by.rb +1 -1
 - data/lib/polars/list_expr.rb +11 -11
 - data/lib/polars/list_name_space.rb +5 -1
 - data/lib/polars/rolling_group_by.rb +5 -7
 - data/lib/polars/series.rb +105 -189
 - data/lib/polars/string_expr.rb +42 -67
 - data/lib/polars/string_name_space.rb +5 -4
 - data/lib/polars/testing.rb +2 -2
 - data/lib/polars/utils/constants.rb +9 -0
 - data/lib/polars/utils/convert.rb +97 -0
 - data/lib/polars/utils/parse.rb +89 -0
 - data/lib/polars/utils/various.rb +76 -0
 - data/lib/polars/utils/wrap.rb +19 -0
 - data/lib/polars/utils.rb +4 -330
 - data/lib/polars/version.rb +1 -1
 - data/lib/polars/whenthen.rb +6 -6
 - data/lib/polars.rb +11 -0
 - metadata +9 -4
 - data/ext/polars/src/conversion/anyvalue.rs +0 -186
 
    
        data/lib/polars/io/csv.rb
    CHANGED
    
    | 
         @@ -104,7 +104,7 @@ module Polars 
     | 
|
| 
       104 
104 
     | 
    
         
             
                  ignore_errors: false,
         
     | 
| 
       105 
105 
     | 
    
         
             
                  parse_dates: false,
         
     | 
| 
       106 
106 
     | 
    
         
             
                  n_threads: nil,
         
     | 
| 
       107 
     | 
    
         
            -
                  infer_schema_length:  
     | 
| 
      
 107 
     | 
    
         
            +
                  infer_schema_length: N_INFER_DEFAULT,
         
     | 
| 
       108 
108 
     | 
    
         
             
                  batch_size: 8192,
         
     | 
| 
       109 
109 
     | 
    
         
             
                  n_rows: nil,
         
     | 
| 
       110 
110 
     | 
    
         
             
                  encoding: "utf8",
         
     | 
| 
         @@ -192,7 +192,7 @@ module Polars 
     | 
|
| 
       192 
192 
     | 
    
         
             
                  ignore_errors: false,
         
     | 
| 
       193 
193 
     | 
    
         
             
                  parse_dates: false,
         
     | 
| 
       194 
194 
     | 
    
         
             
                  n_threads: nil,
         
     | 
| 
       195 
     | 
    
         
            -
                  infer_schema_length:  
     | 
| 
      
 195 
     | 
    
         
            +
                  infer_schema_length: N_INFER_DEFAULT,
         
     | 
| 
       196 
196 
     | 
    
         
             
                  batch_size: 8192,
         
     | 
| 
       197 
197 
     | 
    
         
             
                  n_rows: nil,
         
     | 
| 
       198 
198 
     | 
    
         
             
                  encoding: "utf8",
         
     | 
| 
         @@ -222,7 +222,7 @@ module Polars 
     | 
|
| 
       222 
222 
     | 
    
         
             
                  if !dtypes.nil?
         
     | 
| 
       223 
223 
     | 
    
         
             
                    if dtypes.is_a?(Hash)
         
     | 
| 
       224 
224 
     | 
    
         
             
                      dtype_list = []
         
     | 
| 
       225 
     | 
    
         
            -
                      dtypes.each do|k, v|
         
     | 
| 
      
 225 
     | 
    
         
            +
                      dtypes.each do |k, v|
         
     | 
| 
       226 
226 
     | 
    
         
             
                        dtype_list << [k, Utils.rb_type_to_dtype(v)]
         
     | 
| 
       227 
227 
     | 
    
         
             
                      end
         
     | 
| 
       228 
228 
     | 
    
         
             
                    elsif dtypes.is_a?(::Array)
         
     | 
| 
         @@ -304,7 +304,7 @@ module Polars 
     | 
|
| 
       304 
304 
     | 
    
         
             
                      missing_utf8_is_empty_string,
         
     | 
| 
       305 
305 
     | 
    
         
             
                      parse_dates,
         
     | 
| 
       306 
306 
     | 
    
         
             
                      skip_rows_after_header,
         
     | 
| 
       307 
     | 
    
         
            -
                      Utils. 
     | 
| 
      
 307 
     | 
    
         
            +
                      Utils.parse_row_index_args(row_count_name, row_count_offset),
         
     | 
| 
       308 
308 
     | 
    
         
             
                      sample_size,
         
     | 
| 
       309 
309 
     | 
    
         
             
                      eol_char,
         
     | 
| 
       310 
310 
     | 
    
         
             
                      raise_if_empty,
         
     | 
| 
         @@ -422,7 +422,7 @@ module Polars 
     | 
|
| 
       422 
422 
     | 
    
         
             
                  ignore_errors: false,
         
     | 
| 
       423 
423 
     | 
    
         
             
                  parse_dates: false,
         
     | 
| 
       424 
424 
     | 
    
         
             
                  n_threads: nil,
         
     | 
| 
       425 
     | 
    
         
            -
                  infer_schema_length:  
     | 
| 
      
 425 
     | 
    
         
            +
                  infer_schema_length: N_INFER_DEFAULT,
         
     | 
| 
       426 
426 
     | 
    
         
             
                  batch_size: 50_000,
         
     | 
| 
       427 
427 
     | 
    
         
             
                  n_rows: nil,
         
     | 
| 
       428 
428 
     | 
    
         
             
                  encoding: "utf8",
         
     | 
| 
         @@ -567,7 +567,7 @@ module Polars 
     | 
|
| 
       567 
567 
     | 
    
         
             
                  ignore_errors: false,
         
     | 
| 
       568 
568 
     | 
    
         
             
                  cache: true,
         
     | 
| 
       569 
569 
     | 
    
         
             
                  with_column_names: nil,
         
     | 
| 
       570 
     | 
    
         
            -
                  infer_schema_length:  
     | 
| 
      
 570 
     | 
    
         
            +
                  infer_schema_length: N_INFER_DEFAULT,
         
     | 
| 
       571 
571 
     | 
    
         
             
                  n_rows: nil,
         
     | 
| 
       572 
572 
     | 
    
         
             
                  encoding: "utf8",
         
     | 
| 
       573 
573 
     | 
    
         
             
                  low_memory: false,
         
     | 
| 
         @@ -629,7 +629,7 @@ module Polars 
     | 
|
| 
       629 
629 
     | 
    
         
             
                  ignore_errors: false,
         
     | 
| 
       630 
630 
     | 
    
         
             
                  cache: true,
         
     | 
| 
       631 
631 
     | 
    
         
             
                  with_column_names: nil,
         
     | 
| 
       632 
     | 
    
         
            -
                  infer_schema_length:  
     | 
| 
      
 632 
     | 
    
         
            +
                  infer_schema_length: N_INFER_DEFAULT,
         
     | 
| 
       633 
633 
     | 
    
         
             
                  n_rows: nil,
         
     | 
| 
       634 
634 
     | 
    
         
             
                  encoding: "utf8",
         
     | 
| 
       635 
635 
     | 
    
         
             
                  low_memory: false,
         
     | 
| 
         @@ -669,7 +669,7 @@ module Polars 
     | 
|
| 
       669 
669 
     | 
    
         
             
                      rechunk,
         
     | 
| 
       670 
670 
     | 
    
         
             
                      skip_rows_after_header,
         
     | 
| 
       671 
671 
     | 
    
         
             
                      encoding,
         
     | 
| 
       672 
     | 
    
         
            -
                      Utils. 
     | 
| 
      
 672 
     | 
    
         
            +
                      Utils.parse_row_index_args(row_count_name, row_count_offset),
         
     | 
| 
       673 
673 
     | 
    
         
             
                      parse_dates,
         
     | 
| 
       674 
674 
     | 
    
         
             
                      eol_char,
         
     | 
| 
       675 
675 
     | 
    
         
             
                      truncate_ragged_lines
         
     | 
    
        data/lib/polars/io/ipc.rb
    CHANGED
    
    | 
         @@ -76,7 +76,7 @@ module Polars 
     | 
|
| 
       76 
76 
     | 
    
         
             
                      columns,
         
     | 
| 
       77 
77 
     | 
    
         
             
                      projection,
         
     | 
| 
       78 
78 
     | 
    
         
             
                      n_rows,
         
     | 
| 
       79 
     | 
    
         
            -
                      Utils. 
     | 
| 
      
 79 
     | 
    
         
            +
                      Utils.parse_row_index_args(row_count_name, row_count_offset),
         
     | 
| 
       80 
80 
     | 
    
         
             
                      memory_map
         
     | 
| 
       81 
81 
     | 
    
         
             
                    )
         
     | 
| 
       82 
82 
     | 
    
         
             
                  Utils.wrap_df(rbdf)
         
     | 
| 
         @@ -149,7 +149,7 @@ module Polars 
     | 
|
| 
       149 
149 
     | 
    
         
             
                    columns,
         
     | 
| 
       150 
150 
     | 
    
         
             
                    projection,
         
     | 
| 
       151 
151 
     | 
    
         
             
                    n_rows,
         
     | 
| 
       152 
     | 
    
         
            -
                    Utils. 
     | 
| 
      
 152 
     | 
    
         
            +
                    Utils.parse_row_index_args(row_index_name, row_index_offset),
         
     | 
| 
       153 
153 
     | 
    
         
             
                    rechunk
         
     | 
| 
       154 
154 
     | 
    
         
             
                  )
         
     | 
| 
       155 
155 
     | 
    
         
             
                  Utils.wrap_df(pydf)
         
     | 
| 
         @@ -238,7 +238,7 @@ module Polars 
     | 
|
| 
       238 
238 
     | 
    
         
             
                      n_rows,
         
     | 
| 
       239 
239 
     | 
    
         
             
                      cache,
         
     | 
| 
       240 
240 
     | 
    
         
             
                      rechunk,
         
     | 
| 
       241 
     | 
    
         
            -
                      Utils. 
     | 
| 
      
 241 
     | 
    
         
            +
                      Utils.parse_row_index_args(row_count_name, row_count_offset),
         
     | 
| 
       242 
242 
     | 
    
         
             
                      memory_map
         
     | 
| 
       243 
243 
     | 
    
         
             
                    )
         
     | 
| 
       244 
244 
     | 
    
         
             
                  Utils.wrap_ldf(rblf)
         
     | 
    
        data/lib/polars/io/json.rb
    CHANGED
    
    | 
         @@ -6,12 +6,23 @@ module Polars 
     | 
|
| 
       6 
6 
     | 
    
         
             
                #   Path to a file or a file-like object.
         
     | 
| 
       7 
7 
     | 
    
         
             
                #
         
     | 
| 
       8 
8 
     | 
    
         
             
                # @return [DataFrame]
         
     | 
| 
       9 
     | 
    
         
            -
                def read_json( 
     | 
| 
      
 9 
     | 
    
         
            +
                def read_json(
         
     | 
| 
      
 10 
     | 
    
         
            +
                  source,
         
     | 
| 
      
 11 
     | 
    
         
            +
                  schema: nil,
         
     | 
| 
      
 12 
     | 
    
         
            +
                  schema_overrides: nil,
         
     | 
| 
      
 13 
     | 
    
         
            +
                  infer_schema_length: N_INFER_DEFAULT
         
     | 
| 
      
 14 
     | 
    
         
            +
                )
         
     | 
| 
       10 
15 
     | 
    
         
             
                  if Utils.pathlike?(source)
         
     | 
| 
       11 
16 
     | 
    
         
             
                    source = Utils.normalize_filepath(source)
         
     | 
| 
       12 
17 
     | 
    
         
             
                  end
         
     | 
| 
       13 
18 
     | 
    
         | 
| 
       14 
     | 
    
         
            -
                  rbdf = 
     | 
| 
      
 19 
     | 
    
         
            +
                  rbdf =
         
     | 
| 
      
 20 
     | 
    
         
            +
                    RbDataFrame.read_json(
         
     | 
| 
      
 21 
     | 
    
         
            +
                      source,
         
     | 
| 
      
 22 
     | 
    
         
            +
                      infer_schema_length,
         
     | 
| 
      
 23 
     | 
    
         
            +
                      schema,
         
     | 
| 
      
 24 
     | 
    
         
            +
                      schema_overrides
         
     | 
| 
      
 25 
     | 
    
         
            +
                    )
         
     | 
| 
       15 
26 
     | 
    
         
             
                  Utils.wrap_df(rbdf)
         
     | 
| 
       16 
27 
     | 
    
         
             
                end
         
     | 
| 
       17 
28 
     | 
    
         
             
              end
         
     | 
    
        data/lib/polars/io/ndjson.rb
    CHANGED
    
    | 
         @@ -6,12 +6,23 @@ module Polars 
     | 
|
| 
       6 
6 
     | 
    
         
             
                #   Path to a file or a file-like object.
         
     | 
| 
       7 
7 
     | 
    
         
             
                #
         
     | 
| 
       8 
8 
     | 
    
         
             
                # @return [DataFrame]
         
     | 
| 
       9 
     | 
    
         
            -
                def read_ndjson( 
     | 
| 
      
 9 
     | 
    
         
            +
                def read_ndjson(
         
     | 
| 
      
 10 
     | 
    
         
            +
                  source,
         
     | 
| 
      
 11 
     | 
    
         
            +
                  schema: nil,
         
     | 
| 
      
 12 
     | 
    
         
            +
                  schema_overrides: nil,
         
     | 
| 
      
 13 
     | 
    
         
            +
                  ignore_errors: false
         
     | 
| 
      
 14 
     | 
    
         
            +
                )
         
     | 
| 
       10 
15 
     | 
    
         
             
                  if Utils.pathlike?(source)
         
     | 
| 
       11 
16 
     | 
    
         
             
                    source = Utils.normalize_filepath(source)
         
     | 
| 
       12 
17 
     | 
    
         
             
                  end
         
     | 
| 
       13 
18 
     | 
    
         | 
| 
       14 
     | 
    
         
            -
                  rbdf = 
     | 
| 
      
 19 
     | 
    
         
            +
                  rbdf =
         
     | 
| 
      
 20 
     | 
    
         
            +
                    RbDataFrame.read_ndjson(
         
     | 
| 
      
 21 
     | 
    
         
            +
                      source,
         
     | 
| 
      
 22 
     | 
    
         
            +
                      ignore_errors,
         
     | 
| 
      
 23 
     | 
    
         
            +
                      schema,
         
     | 
| 
      
 24 
     | 
    
         
            +
                      schema_overrides
         
     | 
| 
      
 25 
     | 
    
         
            +
                    )
         
     | 
| 
       15 
26 
     | 
    
         
             
                  Utils.wrap_df(rbdf)
         
     | 
| 
       16 
27 
     | 
    
         
             
                end
         
     | 
| 
       17 
28 
     | 
    
         | 
| 
         @@ -41,7 +52,7 @@ module Polars 
     | 
|
| 
       41 
52 
     | 
    
         
             
                # @return [LazyFrame]
         
     | 
| 
       42 
53 
     | 
    
         
             
                def scan_ndjson(
         
     | 
| 
       43 
54 
     | 
    
         
             
                  source,
         
     | 
| 
       44 
     | 
    
         
            -
                  infer_schema_length:  
     | 
| 
      
 55 
     | 
    
         
            +
                  infer_schema_length: N_INFER_DEFAULT,
         
     | 
| 
       45 
56 
     | 
    
         
             
                  batch_size: 1024,
         
     | 
| 
       46 
57 
     | 
    
         
             
                  n_rows: nil,
         
     | 
| 
       47 
58 
     | 
    
         
             
                  low_memory: false,
         
     | 
| 
         @@ -61,7 +72,7 @@ module Polars 
     | 
|
| 
       61 
72 
     | 
    
         
             
                      n_rows,
         
     | 
| 
       62 
73 
     | 
    
         
             
                      low_memory,
         
     | 
| 
       63 
74 
     | 
    
         
             
                      rechunk,
         
     | 
| 
       64 
     | 
    
         
            -
                      Utils. 
     | 
| 
      
 75 
     | 
    
         
            +
                      Utils.parse_row_index_args(row_count_name, row_count_offset)
         
     | 
| 
       65 
76 
     | 
    
         
             
                    )
         
     | 
| 
       66 
77 
     | 
    
         
             
                  Utils.wrap_ldf(rblf)
         
     | 
| 
       67 
78 
     | 
    
         
             
                end
         
     | 
    
        data/lib/polars/io/parquet.rb
    CHANGED
    
    | 
         @@ -110,7 +110,7 @@ module Polars 
     | 
|
| 
       110 
110 
     | 
    
         
             
                      projection,
         
     | 
| 
       111 
111 
     | 
    
         
             
                      n_rows,
         
     | 
| 
       112 
112 
     | 
    
         
             
                      parallel,
         
     | 
| 
       113 
     | 
    
         
            -
                      Utils. 
     | 
| 
      
 113 
     | 
    
         
            +
                      Utils.parse_row_index_args(row_count_name, row_count_offset),
         
     | 
| 
       114 
114 
     | 
    
         
             
                      low_memory,
         
     | 
| 
       115 
115 
     | 
    
         
             
                      use_statistics,
         
     | 
| 
       116 
116 
     | 
    
         
             
                      rechunk
         
     | 
| 
         @@ -178,7 +178,7 @@ module Polars 
     | 
|
| 
       178 
178 
     | 
    
         | 
| 
       179 
179 
     | 
    
         
             
                  _scan_parquet_impl(
         
     | 
| 
       180 
180 
     | 
    
         
             
                    source,
         
     | 
| 
       181 
     | 
    
         
            -
                    n_rows:n_rows,
         
     | 
| 
      
 181 
     | 
    
         
            +
                    n_rows: n_rows,
         
     | 
| 
       182 
182 
     | 
    
         
             
                    cache: cache,
         
     | 
| 
       183 
183 
     | 
    
         
             
                    parallel: parallel,
         
     | 
| 
       184 
184 
     | 
    
         
             
                    rechunk: rechunk,
         
     | 
| 
         @@ -202,7 +202,7 @@ module Polars 
     | 
|
| 
       202 
202 
     | 
    
         
             
                  storage_options: nil,
         
     | 
| 
       203 
203 
     | 
    
         
             
                  low_memory: false,
         
     | 
| 
       204 
204 
     | 
    
         
             
                  use_statistics: true,
         
     | 
| 
       205 
     | 
    
         
            -
                  hive_partitioning:  
     | 
| 
      
 205 
     | 
    
         
            +
                  hive_partitioning: nil,
         
     | 
| 
       206 
206 
     | 
    
         
             
                  glob: true
         
     | 
| 
       207 
207 
     | 
    
         
             
                )
         
     | 
| 
       208 
208 
     | 
    
         
             
                  rblf =
         
     | 
| 
         @@ -213,11 +213,12 @@ module Polars 
     | 
|
| 
       213 
213 
     | 
    
         
             
                      cache,
         
     | 
| 
       214 
214 
     | 
    
         
             
                      parallel,
         
     | 
| 
       215 
215 
     | 
    
         
             
                      rechunk,
         
     | 
| 
       216 
     | 
    
         
            -
                      Utils. 
     | 
| 
      
 216 
     | 
    
         
            +
                      Utils.parse_row_index_args(row_count_name, row_count_offset),
         
     | 
| 
       217 
217 
     | 
    
         
             
                      low_memory,
         
     | 
| 
       218 
218 
     | 
    
         
             
                      use_statistics,
         
     | 
| 
       219 
219 
     | 
    
         
             
                      hive_partitioning,
         
     | 
| 
       220 
220 
     | 
    
         
             
                      nil,
         
     | 
| 
      
 221 
     | 
    
         
            +
                      true,
         
     | 
| 
       221 
222 
     | 
    
         
             
                      glob
         
     | 
| 
       222 
223 
     | 
    
         
             
                    )
         
     | 
| 
       223 
224 
     | 
    
         
             
                  Utils.wrap_ldf(rblf)
         
     |