polars-df 0.2.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38856 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.bundle +0 -0
  10. data/lib/polars/3.1/polars.bundle +0 -0
  11. data/lib/polars/3.2/polars.bundle +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2020 Ritchie Vink
2
+ Copyright (c) 2022-2023 Andrew Kane
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,91 @@
1
+ # Polars Ruby
2
+
3
+ :fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
4
+
5
+ [![Build Status](https://github.com/ankane/polars-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/polars-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application’s Gemfile:
10
+
11
+ ```ruby
12
+ gem "polars-df"
13
+ ```
14
+
15
+ ## Getting Started
16
+
17
+ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
18
+
19
+ ```ruby
20
+ Polars.read_csv("iris.csv")
21
+ .lazy
22
+ .filter(Polars.col("sepal_length") > 5)
23
+ .groupby("species")
24
+ .agg(Polars.all.sum)
25
+ .collect
26
+ ```
27
+
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
29
+
30
+ ## Examples
31
+
32
+ ### Creating DataFrames
33
+
34
+ From a CSV
35
+
36
+ ```ruby
37
+ Polars.read_csv("file.csv")
38
+ ```
39
+
40
+ From Parquet
41
+
42
+ ```ruby
43
+ Polars.read_parquet("file.parquet")
44
+ ```
45
+
46
+ From Active Record
47
+
48
+ ```ruby
49
+ Polars::DataFrame.new(User.all)
50
+ ```
51
+
52
+ From a hash
53
+
54
+ ```ruby
55
+ Polars::DataFrame.new({
56
+ a: [1, 2, 3],
57
+ b: ["one", "two", "three"]
58
+ })
59
+ ```
60
+
61
+ From an array of series
62
+
63
+ ```ruby
64
+ Polars::DataFrame.new([
65
+ Polars::Series.new("a", [1, 2, 3]),
66
+ Polars::Series.new("b", ["one", "two", "three"])
67
+ ])
68
+ ```
69
+
70
+ ## History
71
+
72
+ View the [changelog](CHANGELOG.md)
73
+
74
+ ## Contributing
75
+
76
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
77
+
78
+ - [Report bugs](https://github.com/ankane/polars-ruby/issues)
79
+ - Fix bugs and [submit pull requests](https://github.com/ankane/polars-ruby/pulls)
80
+ - Write, clarify, or fix documentation
81
+ - Suggest or add new features
82
+
83
+ To get started with development:
84
+
85
+ ```sh
86
+ git clone https://github.com/ankane/polars-ruby.git
87
+ cd polars-ruby
88
+ bundle install
89
+ bundle exec rake compile
90
+ bundle exec rake test
91
+ ```
Binary file
Binary file
Binary file
@@ -0,0 +1,96 @@
1
+ module Polars
2
+ # @private
3
+ class BatchedCsvReader
4
+ attr_accessor :_reader, :new_columns
5
+
6
+ def initialize(
7
+ file,
8
+ has_header: true,
9
+ columns: nil,
10
+ sep: ",",
11
+ comment_char: nil,
12
+ quote_char: '"',
13
+ skip_rows: 0,
14
+ dtypes: nil,
15
+ null_values: nil,
16
+ ignore_errors: false,
17
+ parse_dates: false,
18
+ n_threads: nil,
19
+ infer_schema_length: 100,
20
+ batch_size: 50_000,
21
+ n_rows: nil,
22
+ encoding: "utf8",
23
+ low_memory: false,
24
+ rechunk: true,
25
+ skip_rows_after_header: 0,
26
+ row_count_name: nil,
27
+ row_count_offset: 0,
28
+ sample_size: 1024,
29
+ eol_char: "\n",
30
+ new_columns: nil
31
+ )
32
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
33
+ path = Utils.format_path(file)
34
+ end
35
+
36
+ dtype_list = nil
37
+ dtype_slice = nil
38
+ if !dtypes.nil?
39
+ if dtypes.is_a?(Hash)
40
+ dtype_list = []
41
+ dtypes.each do|k, v|
42
+ dtype_list << [k, Utils.rb_type_to_dtype(v)]
43
+ end
44
+ elsif dtypes.is_a?(Array)
45
+ dtype_slice = dtypes
46
+ else
47
+ raise ArgumentError, "dtype arg should be list or dict"
48
+ end
49
+ end
50
+
51
+ processed_null_values = Utils._process_null_values(null_values)
52
+ projection, columns = Utils.handle_projection_columns(columns)
53
+
54
+ self._reader = RbBatchedCsv.new(
55
+ infer_schema_length,
56
+ batch_size,
57
+ has_header,
58
+ ignore_errors,
59
+ n_rows,
60
+ skip_rows,
61
+ projection,
62
+ sep,
63
+ rechunk,
64
+ columns,
65
+ encoding,
66
+ n_threads,
67
+ path,
68
+ dtype_list,
69
+ dtype_slice,
70
+ low_memory,
71
+ comment_char,
72
+ quote_char,
73
+ processed_null_values,
74
+ parse_dates,
75
+ skip_rows_after_header,
76
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
77
+ sample_size,
78
+ eol_char
79
+ )
80
+ self.new_columns = new_columns
81
+ end
82
+
83
+ def next_batches(n)
84
+ batches = _reader.next_batches(n)
85
+ if !batches.nil?
86
+ if new_columns
87
+ batches.map { |df| Utils._update_columns(Utils.wrap_df(df), new_columns) }
88
+ else
89
+ batches.map { |df| Utils.wrap_df(df) }
90
+ end
91
+ else
92
+ nil
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,52 @@
1
+ module Polars
2
+ # Namespace for categorical related expressions.
3
+ class CatExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Determine how this categorical series should be sorted.
13
+ #
14
+ # @param ordering ["physical", "lexical"]
15
+ # Ordering type:
16
+ #
17
+ # - 'physical' -> Use the physical representation of the categories to determine the order (default).
18
+ # - 'lexical' -> Use the string values to determine the ordering.
19
+ #
20
+ # @return [Expr]
21
+ #
22
+ # @example
23
+ # df = Polars::DataFrame.new(
24
+ # {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
25
+ # ).with_columns(
26
+ # [
27
+ # Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
28
+ # ]
29
+ # )
30
+ # df.sort(["cats", "vals"])
31
+ # # =>
32
+ # # shape: (5, 2)
33
+ # # ┌──────┬──────┐
34
+ # # │ cats ┆ vals │
35
+ # # │ --- ┆ --- │
36
+ # # │ cat ┆ i64 │
37
+ # # ╞══════╪══════╡
38
+ # # │ a ┆ 2 │
39
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
40
+ # # │ b ┆ 3 │
41
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
42
+ # # │ k ┆ 2 │
43
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
44
+ # # │ z ┆ 1 │
45
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
46
+ # # │ z ┆ 3 │
47
+ # # └──────┴──────┘
48
+ def set_ordering(ordering)
49
+ Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,54 @@
1
+ module Polars
2
+ # Series.cat namespace.
3
+ class CatNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "cat"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Determine how this categorical series should be sorted.
14
+ #
15
+ # @param ordering ["physical", "lexical"]
16
+ # Ordering type:
17
+ #
18
+ # - 'physical' -> Use the physical representation of the categories to
19
+ # determine the order (default).
20
+ # - 'lexical' -> Use the string values to determine the ordering.
21
+ #
22
+ # @return [Series]
23
+ #
24
+ # @example
25
+ # df = Polars::DataFrame.new(
26
+ # {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
27
+ # ).with_columns(
28
+ # [
29
+ # Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
30
+ # ]
31
+ # )
32
+ # df.sort(["cats", "vals"])
33
+ # # =>
34
+ # # shape: (5, 2)
35
+ # # ┌──────┬──────┐
36
+ # # │ cats ┆ vals │
37
+ # # │ --- ┆ --- │
38
+ # # │ cat ┆ i64 │
39
+ # # ╞══════╪══════╡
40
+ # # │ a ┆ 2 │
41
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
42
+ # # │ b ┆ 3 │
43
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
44
+ # # │ k ┆ 2 │
45
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
46
+ # # │ z ┆ 1 │
47
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
48
+ # # │ z ┆ 3 │
49
+ # # └──────┴──────┘
50
+ def set_ordering(ordering)
51
+ super
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,100 @@
1
+ module Polars
2
+ module Convert
3
+ # Construct a DataFrame from a dictionary of sequences.
4
+ #
5
+ # This operation clones data, unless you pass in a `Hash<String, Series>`.
6
+ #
7
+ # @param data [Hash]
8
+ # Two-dimensional data represented as a hash. Hash must contain
9
+ # arrays.
10
+ # @param columns [Array]
11
+ # Column labels to use for resulting DataFrame. If specified, overrides any
12
+ # labels already present in the data. Must match data dimensions.
13
+ #
14
+ # @return [DataFrame]
15
+ #
16
+ # @example
17
+ # data = {"a" => [1, 2], "b" => [3, 4]}
18
+ # Polars.from_hash(data)
19
+ # # =>
20
+ # # shape: (2, 2)
21
+ # # ┌─────┬─────┐
22
+ # # │ a ┆ b │
23
+ # # │ --- ┆ --- │
24
+ # # │ i64 ┆ i64 │
25
+ # # ╞═════╪═════╡
26
+ # # │ 1 ┆ 3 │
27
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
28
+ # # │ 2 ┆ 4 │
29
+ # # └─────┴─────┘
30
+ def from_hash(data, columns: nil)
31
+ DataFrame._from_hash(data, columns: columns)
32
+ end
33
+
34
+ # Construct a DataFrame from a sequence of dictionaries. This operation clones data.
35
+ #
36
+ # @param hashes [Array]
37
+ # Array with hashes mapping column name to value.
38
+ # @param infer_schema_length [Integer]
39
+ # How many hashes/rows to scan to determine the data types
40
+ # if set to `nil` all rows are scanned. This will be slow.
41
+ # @param schema [Object]
42
+ # Schema that (partially) overwrites the inferred schema.
43
+ #
44
+ # @return [DataFrame]
45
+ #
46
+ # @example
47
+ # data = [{"a" => 1, "b" => 4}, {"a" => 2, "b" => 5}, {"a" => 3, "b" => 6}]
48
+ # Polars.from_hashes(data)
49
+ # # =>
50
+ # # shape: (3, 2)
51
+ # # ┌─────┬─────┐
52
+ # # │ a ┆ b │
53
+ # # │ --- ┆ --- │
54
+ # # │ i64 ┆ i64 │
55
+ # # ╞═════╪═════╡
56
+ # # │ 1 ┆ 4 │
57
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
58
+ # # │ 2 ┆ 5 │
59
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
60
+ # # │ 3 ┆ 6 │
61
+ # # └─────┴─────┘
62
+ #
63
+ # @example Overwrite first column name and dtype
64
+ # Polars.from_hashes(data, schema: {"c" => :i32})
65
+ # # =>
66
+ # # shape: (3, 2)
67
+ # # ┌─────┬─────┐
68
+ # # │ c ┆ b │
69
+ # # │ --- ┆ --- │
70
+ # # │ i32 ┆ i64 │
71
+ # # ╞═════╪═════╡
72
+ # # │ 1 ┆ 4 │
73
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
74
+ # # │ 2 ┆ 5 │
75
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
76
+ # # │ 3 ┆ 6 │
77
+ # # └─────┴─────┘
78
+ #
79
+ # @example Let polars infer the dtypes but inform about a 3rd column
80
+ # Polars.from_hashes(data, schema: {"a" => :unknown, "b" => :unknown, "c" => :i32})
81
+ # # shape: (3, 3)
82
+ # # ┌─────┬─────┬──────┐
83
+ # # │ a ┆ b ┆ c │
84
+ # # │ --- ┆ --- ┆ --- │
85
+ # # │ i64 ┆ i64 ┆ i32 │
86
+ # # ╞═════╪═════╪══════╡
87
+ # # │ 1 ┆ 4 ┆ null │
88
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
89
+ # # │ 2 ┆ 5 ┆ null │
90
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
91
+ # # │ 3 ┆ 6 ┆ null │
92
+ # # └─────┴─────┴──────┘
93
+ # def from_hashes(hashes, infer_schema_length: 50, schema: nil)
94
+ # DataFrame._from_hashes(hashes, infer_schema_length: infer_schema_length, schema: schema)
95
+ # end
96
+
97
+ # def from_records
98
+ # end
99
+ end
100
+ end