polars-df 0.20.0-x64-mingw-ucrt → 0.21.1-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE-THIRD-PARTY.txt +2153 -2532
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/3.4/polars.so +0 -0
  9. data/lib/polars/array_expr.rb +382 -3
  10. data/lib/polars/array_name_space.rb +281 -0
  11. data/lib/polars/binary_expr.rb +67 -0
  12. data/lib/polars/binary_name_space.rb +43 -0
  13. data/lib/polars/cat_expr.rb +224 -0
  14. data/lib/polars/cat_name_space.rb +130 -32
  15. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  16. data/lib/polars/catalog/unity/column_info.rb +31 -0
  17. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  18. data/lib/polars/catalog/unity/table_info.rb +50 -0
  19. data/lib/polars/catalog.rb +448 -0
  20. data/lib/polars/config.rb +2 -2
  21. data/lib/polars/convert.rb +12 -2
  22. data/lib/polars/data_frame.rb +834 -48
  23. data/lib/polars/data_type_expr.rb +52 -0
  24. data/lib/polars/data_types.rb +61 -5
  25. data/lib/polars/date_time_expr.rb +251 -0
  26. data/lib/polars/date_time_name_space.rb +299 -0
  27. data/lib/polars/exceptions.rb +7 -2
  28. data/lib/polars/expr.rb +1247 -211
  29. data/lib/polars/functions/col.rb +6 -5
  30. data/lib/polars/functions/datatype.rb +21 -0
  31. data/lib/polars/functions/lazy.rb +127 -15
  32. data/lib/polars/functions/repeat.rb +4 -0
  33. data/lib/polars/io/csv.rb +19 -1
  34. data/lib/polars/io/json.rb +16 -0
  35. data/lib/polars/io/ndjson.rb +13 -0
  36. data/lib/polars/io/parquet.rb +70 -66
  37. data/lib/polars/io/scan_options.rb +47 -0
  38. data/lib/polars/lazy_frame.rb +1099 -95
  39. data/lib/polars/list_expr.rb +400 -11
  40. data/lib/polars/list_name_space.rb +321 -5
  41. data/lib/polars/meta_expr.rb +71 -22
  42. data/lib/polars/name_expr.rb +36 -0
  43. data/lib/polars/scan_cast_options.rb +64 -0
  44. data/lib/polars/schema.rb +84 -3
  45. data/lib/polars/selector.rb +210 -0
  46. data/lib/polars/selectors.rb +932 -203
  47. data/lib/polars/series.rb +1083 -63
  48. data/lib/polars/string_expr.rb +435 -9
  49. data/lib/polars/string_name_space.rb +729 -45
  50. data/lib/polars/struct_expr.rb +103 -0
  51. data/lib/polars/struct_name_space.rb +19 -1
  52. data/lib/polars/utils/parse.rb +40 -0
  53. data/lib/polars/utils/various.rb +18 -1
  54. data/lib/polars/utils.rb +9 -1
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars.rb +10 -0
  57. metadata +12 -2
@@ -31,58 +31,156 @@ module Polars
31
31
 
32
32
  # Return whether or not the column is a local categorical.
33
33
  #
34
+ # Always returns false.
35
+ #
36
+ # @return [Boolean]
37
+ def is_local
38
+ _s.cat_is_local
39
+ end
40
+
41
+ # Simply returns the column as-is, local representations are deprecated.
42
+ #
43
+ # @return [Series]
44
+ def to_local
45
+ Utils.wrap_s(_s.cat_to_local)
46
+ end
47
+
48
+ # Indicate whether the Series uses lexical ordering.
49
+ #
50
+ # @note
51
+ # This functionality is considered **unstable**. It may be changed
52
+ # at any point without it being considered a breaking change.
53
+ #
34
54
  # @return [Boolean]
35
55
  #
36
- # @example Categoricals constructed without a string cache are considered local.
37
- # s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
38
- # s.cat.is_local
56
+ # @example
57
+ # s = Polars::Series.new(["b", "a", "b"]).cast(Polars::Categorical)
58
+ # s.cat.uses_lexical_ordering
39
59
  # # => true
60
+ def uses_lexical_ordering
61
+ _s.cat_uses_lexical_ordering
62
+ end
63
+
64
+ # Return the byte-length of the string representation of each value.
40
65
  #
41
- # @example Categoricals constructed with a string cache are considered global.
42
- # s = nil
43
- # Polars::StringCache.new do
44
- # s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
45
- # end
46
- # s.cat.is_local
47
- # # => false
48
- def is_local
49
- _s.cat_is_local
66
+ # @return [Series]
67
+ #
68
+ # @example
69
+ # s = Polars::Series.new(["Café", "345", "東京", nil], dtype: Polars::Categorical)
70
+ # s.cat.len_bytes
71
+ # # =>
72
+ # # shape: (4,)
73
+ # # Series: '' [u32]
74
+ # # [
75
+ # # 5
76
+ # # 3
77
+ # # 6
78
+ # # null
79
+ # # ]
80
+ def len_bytes
81
+ super
50
82
  end
51
83
 
52
- # Convert a categorical column to its local representation.
84
+ # Return the number of characters of the string representation of each value.
53
85
  #
54
- # This may change the underlying physical representation of the column.
86
+ # @return [Series]
87
+ #
88
+ # @example
89
+ # s = Polars::Series.new(["Café", "345", "東京", nil], dtype: Polars::Categorical)
90
+ # s.cat.len_chars
91
+ # # =>
92
+ # # shape: (4,)
93
+ # # Series: '' [u32]
94
+ # # [
95
+ # # 4
96
+ # # 3
97
+ # # 2
98
+ # # null
99
+ # # ]
100
+ def len_chars
101
+ super
102
+ end
103
+
104
+ # Check if string representations of values start with a substring.
105
+ #
106
+ # @param prefix [String]
107
+ # Prefix substring.
55
108
  #
56
109
  # @return [Series]
57
110
  #
58
- # @example Compare the global and local representations of a categorical.
59
- # s = nil
60
- # Polars::StringCache.new do
61
- # _ = Polars::Series.new("x", ["a", "b", "a"], dtype: Polars::Categorical)
62
- # s = Polars::Series.new("y", ["c", "b", "d"], dtype: Polars::Categorical)
63
- # end
64
- # s.to_physical
111
+ # @example
112
+ # s = Polars::Series.new("fruits", ["apple", "mango", nil], dtype: Polars::Categorical)
113
+ # s.cat.starts_with("app")
65
114
  # # =>
66
115
  # # shape: (3,)
67
- # # Series: 'y' [u32]
116
+ # # Series: 'fruits' [bool]
68
117
  # # [
69
- # # 2
70
- # # 1
71
- # # 3
118
+ # # true
119
+ # # false
120
+ # # null
72
121
  # # ]
122
+ def starts_with(prefix)
123
+ super
124
+ end
125
+
126
+ # Check if string representations of values end with a substring.
127
+ #
128
+ # @param suffix [String]
129
+ # Suffix substring.
130
+ #
131
+ # @return [Series]
73
132
  #
74
133
  # @example
75
- # s.cat.to_local.to_physical
134
+ # s = Polars::Series.new("fruits", ["apple", "mango", nil], dtype: Polars::Categorical)
135
+ # s.cat.ends_with("go")
76
136
  # # =>
77
137
  # # shape: (3,)
78
- # # Series: 'y' [u32]
138
+ # # Series: 'fruits' [bool]
79
139
  # # [
80
- # # 0
81
- # # 1
82
- # # 2
140
+ # # false
141
+ # # true
142
+ # # null
83
143
  # # ]
84
- def to_local
85
- Utils.wrap_s(_s.cat_to_local)
144
+ def ends_with(suffix)
145
+ super
146
+ end
147
+
148
+ # Extract a substring from the string representation of each string value.
149
+ #
150
+ # @param offset [Integer]
151
+ # Start index. Negative indexing is supported.
152
+ # @param length [Integer]
153
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
154
+ # end of the string.
155
+ #
156
+ # @return [Series]
157
+ #
158
+ # @example
159
+ # s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"], dtype: Polars::Categorical)
160
+ # s.cat.slice(-3)
161
+ # # =>
162
+ # # shape: (4,)
163
+ # # Series: '' [str]
164
+ # # [
165
+ # # "ear"
166
+ # # null
167
+ # # "aya"
168
+ # # "uit"
169
+ # # ]
170
+ #
171
+ # @example Using the optional `length` parameter
172
+ # s.cat.slice(4, 3)
173
+ # # =>
174
+ # # shape: (4,)
175
+ # # Series: '' [str]
176
+ # # [
177
+ # # ""
178
+ # # null
179
+ # # "ya"
180
+ # # "onf"
181
+ # # ]
182
+ def slice(offset, length = nil)
183
+ super
86
184
  end
87
185
  end
88
186
  end
@@ -0,0 +1,20 @@
1
+ module Polars
2
+ class Catalog
3
+ module Unity
4
+ # Information for a catalog within a metastore.
5
+ CatalogInfo =
6
+ ::Struct.new(
7
+ :name,
8
+ :comment,
9
+ :properties,
10
+ :options,
11
+ :storage_location,
12
+ :created_at,
13
+ :created_by,
14
+ :updated_at,
15
+ :updated_by,
16
+ keyword_init: true
17
+ )
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,31 @@
1
+ module Polars
2
+ class Catalog
3
+ module Unity
4
+ # Information for a column within a catalog table.
5
+ ColumnInfo =
6
+ ::Struct.new(
7
+ :name,
8
+ :type_name,
9
+ :type_text,
10
+ :type_json,
11
+ :position,
12
+ :comment,
13
+ :partition_index,
14
+ keyword_init: true
15
+ )
16
+
17
+ class ColumnInfo
18
+ # Get the native polars datatype of this column.
19
+ #
20
+ # @note
21
+ # This functionality is considered **unstable**. It may be changed
22
+ # at any point without it being considered a breaking change.
23
+ #
24
+ # @return [Object]
25
+ def get_polars_dtype
26
+ RbCatalogClient.type_json_to_polars_type(type_json)
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,21 @@
1
+ module Polars
2
+ class Catalog
3
+ module Unity
4
+ # Information for a namespace within a catalog.
5
+ #
6
+ # This is also known by the name "schema" in unity catalog terminology.
7
+ NamespaceInfo =
8
+ ::Struct.new(
9
+ :name,
10
+ :comment,
11
+ :properties,
12
+ :storage_location,
13
+ :created_at,
14
+ :created_by,
15
+ :updated_at,
16
+ :updated_by,
17
+ keyword_init: true
18
+ )
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,50 @@
1
+ module Polars
2
+ class Catalog
3
+ module Unity
4
+ # Information for a catalog table.
5
+ TableInfo =
6
+ ::Struct.new(
7
+ :name,
8
+ :comment,
9
+ :table_id,
10
+ :table_type,
11
+ :storage_location,
12
+ :data_source_format,
13
+ :columns,
14
+ :properties,
15
+ :created_at,
16
+ :created_by,
17
+ :updated_at,
18
+ :updated_by,
19
+ keyword_init: true
20
+ )
21
+
22
+ class TableInfo
23
+ # Get the native polars schema of this table.
24
+ #
25
+ # @note
26
+ # This functionality is considered **unstable**. It may be changed
27
+ # at any point without it being considered a breaking change.
28
+ #
29
+ # @return [Schema]
30
+ def get_polars_schema
31
+ if columns.nil?
32
+ return nil
33
+ end
34
+
35
+ schema = Schema.new(check_dtypes: false)
36
+
37
+ columns.each do |column_info|
38
+ if schema[column_info.name]
39
+ msg = "duplicate column name: #{column_info.name}"
40
+ raise DuplicateError, msg
41
+ end
42
+ schema[column_info.name] = column_info.get_polars_dtype
43
+ end
44
+
45
+ schema
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end