polars-df 0.13.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,104 @@
1
+ module Polars
2
+ # @private
3
+ class Slice
4
+ def initialize(obj)
5
+ @obj = obj
6
+ end
7
+
8
+ # Apply a slice operation, taking advantage of any potential fast paths.
9
+ def apply(s)
10
+ # normalize slice
11
+ _slice_setup(s)
12
+
13
+ # check for fast-paths / single-operation calls
14
+ if @slice_length == 0
15
+ @obj.cleared
16
+ elsif @is_unbounded && [-1, 1].include?(@stride)
17
+ @stride < 0 ? @obj.reverse : @obj.clone
18
+ elsif @start >= 0 && @stop >= 0 && @stride == 1
19
+ @obj.slice(@start, @slice_length)
20
+ elsif @stride < 0 && @slice_length == 1
21
+ @obj.slice(@stop + 1, 1)
22
+ else
23
+ # multi-operation calls; make lazy
24
+ lazyobj = _lazify(@obj)
25
+ sliced = @stride > 0 ? _slice_positive(lazyobj) : _slice_negative(lazyobj)
26
+ _as_original(sliced, @obj)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ # Return lazy variant back to its original type.
33
+ def _as_original(lazy, original)
34
+ frame = lazy.collect
35
+ original.is_a?(DataFrame) ? frame : frame.to_series
36
+ end
37
+
38
+ # Make lazy to ensure efficient/consistent handling.
39
+ def _lazify(obj)
40
+ obj.is_a?(DataFrame) ? obj.lazy : obj.to_frame.lazy
41
+ end
42
+
43
+ # Logic for slices with positive stride.
44
+ def _slice_positive(obj)
45
+ # note: at this point stride is guaranteed to be > 1
46
+ obj.slice(@start, @slice_length).take_every(@stride)
47
+ end
48
+
49
+ # Logic for slices with negative stride.
50
+ def _slice_negative(obj)
51
+ stride = @stride.abs
52
+ lazyslice = obj.slice(@stop + 1, @slice_length).reverse
53
+ stride > 1 ? lazyslice.take_every(stride) : lazyslice
54
+ end
55
+
56
+ # Normalize slice bounds, identify unbounded and/or zero-length slices.
57
+ def _slice_setup(s)
58
+ # can normalize slice indices as we know object size
59
+ obj_len = @obj.length
60
+ start = if s.begin
61
+ if s.begin < 0
62
+ [s.begin + obj_len, 0].max
63
+ else
64
+ s.begin
65
+ end
66
+ else
67
+ 0
68
+ end
69
+ stop = if s.end
70
+ if s.end < 0
71
+ s.end + (s.exclude_end? ? 0 : 1) + obj_len
72
+ else
73
+ s.end + (s.exclude_end? ? 0 : 1)
74
+ end
75
+ else
76
+ obj_len
77
+ end
78
+ stride = 1
79
+
80
+ # check if slice is actually unbounded
81
+ if stride >= 1
82
+ @is_unbounded = start <= 0 && stop >= obj_len
83
+ else
84
+ @is_unbounded = stop == -1 && start >= obj_len - 1
85
+ end
86
+
87
+ # determine slice length
88
+ if @obj.is_empty
89
+ @slice_length = 0
90
+ elsif @is_unbounded
91
+ @slice_length = obj_len
92
+ else
93
+ @slice_length = if start == stop || (stride > 0 && start > stop) || (stride < 0 && start < stop)
94
+ 0
95
+ else
96
+ (stop - start).abs
97
+ end
98
+ end
99
+ @start = start
100
+ @stop = stop
101
+ @stride = stride
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,194 @@
1
+ module Polars
2
+ # Run SQL queries against DataFrame/LazyFrame data.
3
+ class SQLContext
4
+ # @private
5
+ attr_accessor :_ctxt, :_eager_execution
6
+
7
+ # Initialize a new `SQLContext`.
8
+ def initialize(frames = nil, eager_execution: false, **named_frames)
9
+ self._ctxt = RbSQLContext.new
10
+ self._eager_execution = eager_execution
11
+
12
+ frames = (frames || {}).to_h
13
+
14
+ if frames.any? || named_frames.any?
15
+ register_many(frames, **named_frames)
16
+ end
17
+ end
18
+
19
+ # Parse the given SQL query and execute it against the registered frame data.
20
+ #
21
+ # @param query [String]
22
+ # A valid string SQL query.
23
+ # @param eager [Boolean]
24
+ # Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
25
+ # If unset, the value of the init-time parameter "eager_execution" will be
26
+ # used. (Note that the query itself is always executed in lazy-mode; this
27
+ # parameter only impacts the type of the returned frame).
28
+ #
29
+ # @return [Object]
30
+ #
31
+ # @example Execute a SQL query against the registered frame data:
32
+ # df = Polars::DataFrame.new(
33
+ # [
34
+ # ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
35
+ # ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
36
+ # ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
37
+ # ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
38
+ # ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
39
+ # ],
40
+ # schema: ["title", "release_year", "budget", "gross", "imdb_score"]
41
+ # )
42
+ # ctx = Polars::SQLContext.new(films: df)
43
+ # ctx.execute(
44
+ # "
45
+ # SELECT title, release_year, imdb_score
46
+ # FROM films
47
+ # WHERE release_year > 1990
48
+ # ORDER BY imdb_score DESC
49
+ # ",
50
+ # eager: true
51
+ # )
52
+ # # =>
53
+ # # shape: (4, 3)
54
+ # # ┌──────────────────────────┬──────────────┬────────────┐
55
+ # # │ title ┆ release_year ┆ imdb_score │
56
+ # # │ --- ┆ --- ┆ --- │
57
+ # # │ str ┆ i64 ┆ f64 │
58
+ # # ╞══════════════════════════╪══════════════╪════════════╡
59
+ # # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
60
+ # # │ The Dark Knight ┆ 2008 ┆ 9.0 │
61
+ # # │ Schindler's List ┆ 1993 ┆ 8.9 │
62
+ # # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
63
+ # # └──────────────────────────┴──────────────┴────────────┘
64
+ #
65
+ # @example Execute a GROUP BY query:
66
+ # ctx.execute(
67
+ # "
68
+ # SELECT
69
+ # MAX(release_year / 10) * 10 AS decade,
70
+ # SUM(gross) AS total_gross,
71
+ # COUNT(title) AS n_films,
72
+ # FROM films
73
+ # GROUP BY (release_year / 10) -- decade
74
+ # ORDER BY total_gross DESC
75
+ # ",
76
+ # eager: true
77
+ # )
78
+ # # =>
79
+ # # shape: (3, 3)
80
+ # # ┌────────┬─────────────┬─────────┐
81
+ # # │ decade ┆ total_gross ┆ n_films │
82
+ # # │ --- ┆ --- ┆ --- │
83
+ # # │ i64 ┆ i64 ┆ u32 │
84
+ # # ╞════════╪═════════════╪═════════╡
85
+ # # │ 2000 ┆ 533316061 ┆ 1 │
86
+ # # │ 1990 ┆ 232338648 ┆ 3 │
87
+ # # │ 1970 ┆ 134821952 ┆ 1 │
88
+ # # └────────┴─────────────┴─────────┘
89
+ def execute(query, eager: nil)
90
+ res = Utils.wrap_ldf(_ctxt.execute(query))
91
+ eager || _eager_execution ? res.collect : res
92
+ end
93
+
94
+ # Register a single frame as a table, using the given name.
95
+ #
96
+ # @param name [String]
97
+ # Name of the table.
98
+ # @param frame [Object]
99
+ # eager/lazy frame to associate with this table name.
100
+ #
101
+ # @return [SQLContext]
102
+ #
103
+ # @example
104
+ # df = Polars::DataFrame.new({"hello" => ["world"]})
105
+ # ctx = Polars::SQLContext.new
106
+ # ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
107
+ # # =>
108
+ # # shape: (1, 1)
109
+ # # ┌───────┐
110
+ # # │ hello │
111
+ # # │ --- │
112
+ # # │ str │
113
+ # # ╞═══════╡
114
+ # # │ world │
115
+ # # └───────┘
116
+ def register(name, frame)
117
+ if frame.is_a?(DataFrame)
118
+ frame = frame.lazy
119
+ end
120
+ _ctxt.register(name.to_s, frame._ldf)
121
+ self
122
+ end
123
+
124
+ # Register multiple eager/lazy frames as tables, using the associated names.
125
+ #
126
+ # @param frames [Hash]
127
+ # A `{name:frame, ...}` mapping.
128
+ # @param named_frames [Object]
129
+ # Named eager/lazy frames, provided as kwargs.
130
+ #
131
+ # @return [SQLContext]
132
+ def register_many(frames, **named_frames)
133
+ frames = (frames || {}).to_h
134
+ frames = frames.merge(named_frames)
135
+ frames.each do |name, frame|
136
+ register(name, frame)
137
+ end
138
+ self
139
+ end
140
+
141
+ # Unregister one or more eager/lazy frames by name.
142
+ #
143
+ # @param names [Object]
144
+ # Names of the tables to unregister.
145
+ #
146
+ # @return [SQLContext]
147
+ #
148
+ # @example Register with a SQLContext object:
149
+ # df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
150
+ # lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
151
+ # lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
152
+ # ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
153
+ # ctx.tables
154
+ # # => ["test1", "test2", "test3"]
155
+ #
156
+ # @example Unregister one or more of the tables:
157
+ # ctx.unregister(["test1", "test3"]).tables
158
+ # # => ["test2"]
159
+ def unregister(names)
160
+ if names.is_a?(::String)
161
+ names = [names]
162
+ end
163
+ names.each do |nm|
164
+ _ctxt.unregister(nm)
165
+ end
166
+ self
167
+ end
168
+
169
+ # Return a list of the registered table names.
170
+ #
171
+ # @return [Array]
172
+ #
173
+ # @example Executing as SQL:
174
+ # frame_data = Polars::DataFrame.new({"hello" => ["world"]})
175
+ # ctx = Polars::SQLContext.new(hello_world: frame_data)
176
+ # ctx.execute("SHOW TABLES", eager: true)
177
+ # # =>
178
+ # # shape: (1, 1)
179
+ # # ┌─────────────┐
180
+ # # │ name │
181
+ # # │ --- │
182
+ # # │ str │
183
+ # # ╞═════════════╡
184
+ # # │ hello_world │
185
+ # # └─────────────┘
186
+ #
187
+ # @example Calling the method:
188
+ # ctx.tables
189
+ # # => ["hello_world"]
190
+ def tables
191
+ _ctxt.get_tables.sort
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,75 @@
1
+ module Polars
2
+ # Context manager for enabling and disabling the global string cache.
3
+ class StringCache
4
+ def initialize(&block)
5
+ RbStringCacheHolder.hold(&block)
6
+ end
7
+ end
8
+
9
+ module Functions
10
+ # Enable the global string cache.
11
+ #
12
+ # `Categorical` columns created under the same global string cache have
13
+ # the same underlying physical value when string values are equal. This allows the
14
+ # columns to be concatenated or used in a join operation, for example.
15
+ #
16
+ # @return [nil]
17
+ #
18
+ # @example Construct two Series using the same global string cache.
19
+ # Polars.enable_string_cache
20
+ # s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
21
+ # s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
22
+ # Polars.disable_string_cache
23
+ #
24
+ # @example As both Series are constructed under the same global string cache, they can be concatenated.
25
+ # Polars.concat([s1, s2])
26
+ # # =>
27
+ # # shape: (6,)
28
+ # # Series: 'color' [cat]
29
+ # # [
30
+ # # "red"
31
+ # # "green"
32
+ # # "red"
33
+ # # "blue"
34
+ # # "red"
35
+ # # "green"
36
+ # # ]
37
+ def enable_string_cache
38
+ Plr.enable_string_cache
39
+ end
40
+
41
+ # Disable and clear the global string cache.
42
+ #
43
+ # @return [nil]
44
+ #
45
+ # @example Construct two Series using the same global string cache.
46
+ # Polars.enable_string_cache
47
+ # s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
48
+ # s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
49
+ # Polars.disable_string_cache
50
+ #
51
+ # @example As both Series are constructed under the same global string cache, they can be concatenated.
52
+ # Polars.concat([s1, s2])
53
+ # # =>
54
+ # # shape: (6,)
55
+ # # Series: 'color' [cat]
56
+ # # [
57
+ # # "red"
58
+ # # "green"
59
+ # # "red"
60
+ # # "blue"
61
+ # # "red"
62
+ # # "green"
63
+ # # ]
64
+ def disable_string_cache
65
+ Plr.disable_string_cache
66
+ end
67
+
68
+ # Check whether the global string cache is enabled.
69
+ #
70
+ # @return [Boolean]
71
+ def using_string_cache
72
+ Plr.using_string_cache
73
+ end
74
+ end
75
+ end