rust 0.13 → 0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56e795fb0a8893df45abd976e2ed91344156f3c3dd4a68e17afd1a0fb317ece3
4
- data.tar.gz: 406416738f1ab84fca06edd5cb59efdc623b12cefe03cd51b1a2cd840e218647
3
+ metadata.gz: aef18d4ed8bce09d5931fa0e9fde630ff36e1436897a448bbbc1dd2b451a28fc
4
+ data.tar.gz: 9313ac648a27c9f1cb6b369ba43852e0defb4bd4ab0a089b1c58ad4e43322de5
5
5
  SHA512:
6
- metadata.gz: 56854c3ff1bbd64ca8ff9d1201bc16fd37f4d3d465527217ab5c49d5cee0d6f4f34998bdf8a3ebdedf7ea8379909ca0db75d05da1bd46460c3e7066ee882ba7b
7
- data.tar.gz: 6b21ba70c7d144384d1647dfa76f8894457bf4d65b74ce32d4e775cc3ecdc660f4596f61edd81e91b08c9d2b3def3491cebe10b7923b9f2988d072f7c5d25674
6
+ metadata.gz: 9908bd416aa81bbd07ad8b5f4960481c31e9ed8da2b106e3e129b4165d835343029b43ee0cd1b74b1fd56451039a5558abda553037c4887ebf6f9b5865dc5202
7
+ data.tar.gz: db41177a2182dc57516459bbfff5ee6d361c275ccd1d78b5a5b2767da433a5cde366303b598d4e9bd00d692c14c682044bbc4976392973415099573185fcee15
@@ -607,6 +607,8 @@ module Rust
607
607
  # Sorts the rows of this data-frame by the values of the +by+ column.
608
608
 
609
609
  def sort_by!(by)
610
+ raise TypeError, "String expected" unless by.is_a?(String)
611
+ raise "'#{by}' is not a valid column name (#{self.colnames.to_s})" unless self.colnames.include?(by)
610
612
  copy = @data[by].clone
611
613
  copy.sort!
612
614
 
@@ -109,13 +109,23 @@ module Rust
109
109
 
110
110
  class Null < RustDatatype
111
111
  def self.can_pull?(type, klass)
112
- return type == "NULL" && klass == "NULL"
112
+ return (type == "NULL" && klass == "NULL")
113
113
  end
114
114
 
115
115
  def self.pull_variable(variable, type, klass)
116
116
  return nil
117
117
  end
118
118
  end
119
+
120
+ class Omit < RustDatatype
121
+ def self.can_pull?(type, klass)
122
+ return (klass == "omit")
123
+ end
124
+
125
+ def self.pull_variable(variable, type, klass)
126
+ return Rust["as.#{type}(#{variable})"]
127
+ end
128
+ end
119
129
  end
120
130
 
121
131
  class TrueClass
@@ -0,0 +1,4 @@
1
+ self_path = File.expand_path(__FILE__)
2
+ Dir.glob(File.dirname(self_path) + "/*.rb").each do |lib|
3
+ require_relative lib unless lib == self_path
4
+ end
@@ -0,0 +1,283 @@
1
+ require_relative '../core'
2
+ require 'time'
3
+
4
+ module Rust
5
+
6
+ ##
7
+ # Class that allows to read CSVs exported from Google Forms.
8
+ class GoogleFormMapping
9
+
10
+ ##
11
+ # Loads a mapping from a CSV file that can be used in the constructor of GoogleForm given the CSV +filename+ and the
12
+ # keys for defining what should be transformed (+key_from+) in what (+key_to+). Returns a hash with the mapping
13
+ # from -> to.
14
+
15
+ def self.load(filename, key_from="from", key_to="to", **options)
16
+ raise TypeError, "Expected string for filename" unless filename.is_a?(String)
17
+ raise TypeError, "Expected string for key_from" unless key_from.is_a?(String)
18
+ raise TypeError, "Expected string for key_to" unless key_to.is_a?(String)
19
+
20
+ result = {}
21
+ mapping = Rust::CSV.read(filename, headers: true)
22
+ mapping.each do |r|
23
+ result[r[key_from]] = r[key_to]
24
+ end
25
+
26
+ return GoogleFormMapping.new(result, **options)
27
+ end
28
+
29
+ def initialize(hash, **options)
30
+ raise TypeError, "Hash should be an hash" unless hash.is_a?(Hash)
31
+ raise TypeError, "Mapping for question #{question} must have either all String keys or all Regexp keys." if !hash.keys.all? { |m| m.is_a?(Regexp) } && !hash.keys.all? { |m| m.is_a?(String) }
32
+ raise "Unsupported options: #{options.keys - [:strip, :downcase]}" if (options.keys - [:strip, :downcase]).size > 0
33
+
34
+ if hash.keys.all? { |m| m.is_a?(Regexp) }
35
+ @type = :regexp
36
+ else
37
+ @type = :direct
38
+ end
39
+
40
+ @strip = options[:strip]
41
+ @downcase = options[:downcase]
42
+
43
+ if @type == :direct
44
+ @hash = {}
45
+ hash.each do |k, v|
46
+ @hash[normalize(k)] = v
47
+ end
48
+ else
49
+ @hash = hash
50
+ end
51
+ end
52
+
53
+ def get(from)
54
+ if @type == :regexp
55
+ @hash.each do |k, v|
56
+ if from.match(k)
57
+ return v
58
+ end
59
+ end
60
+ return from
61
+ elsif @type == :direct
62
+ return @hash[normalize(from)] || from
63
+ end
64
+ end
65
+
66
+ private
67
+ def normalize(string)
68
+ string = string.downcase if @downcase
69
+ string = string.strip if @strip
70
+ return string
71
+ end
72
+ end
73
+
74
+ class GoogleForm
75
+ ALLOWED_TYPES = [:multiple, :checkbox, :scale, :text]
76
+
77
+ ##
78
+ # Reads the CSV at +filename+ and returns a GoogleForm. The schema must be a hash that contains, for each question number or name,
79
+ # the type of answer (:multiple, :checkbox, :scale, or :text). For the other options, see Rust::CSV.read.
80
+
81
+ def self.read(filename, schema, mappings={}, **options)
82
+ data_frame = Rust::CSV.read(filename, **options)
83
+
84
+ return GoogleForm.new(data_frame, schema, mappings)
85
+ end
86
+
87
+ def initialize(data_frame, schema, mappings={})
88
+ raise TypeError, "Expected Rust::DataFrame" unless data_frame.is_a?(Rust::DataFrame)
89
+ raise TypeError, "Expected Hash or Array" if !schema.is_a?(Hash) && !schema.is_a?(Array)
90
+ raise TypeError, "Schema keys must all be numbers or strings" if schema.is_a?(Hash) && !schema.keys.all? { |k| k.is_a?(String) }
91
+ raise TypeError, "Mappings should be an hash [String, Integer] -> GoogleFormMapping" if !mappings.is_a?(Hash) || !mappings.keys.all? { |k| k.is_a?(String) || k.is_a?(Integer) } || !mappings.values.all? { |v| v.is_a?(GoogleFormMapping) }
92
+ if schema.is_a?(Array)
93
+ new_schema = {}
94
+ for i in 0...schema.size
95
+ new_schema[index_to_title(i+1, data_frame)] = schema[i]
96
+ end
97
+ schema = new_schema
98
+ end
99
+ raise TypeError, "Schema values must all be #{ALLOWED_TYPES}; #{schema.values.uniq - ALLOWED_TYPES} given instead" if !schema.values.all? { |v| ALLOWED_TYPES.include?(v)}
100
+ raise TypeError, "Schema must include types for all the questions" if schema.size != (data_frame.columns - 1)
101
+
102
+ @data_frame = data_frame
103
+ @questions = data_frame.colnames
104
+ @schema = schema
105
+
106
+ mappings.each do |question, mapping|
107
+ raise "Mappings can not be defined for :scale questions" if schema[title_to_index(question)] == :scale
108
+ end
109
+ @mappings = mappings
110
+ end
111
+
112
+ def data_frame
113
+ @data_frame
114
+ end
115
+
116
+ def mapped_data_frame
117
+ df = Rust::DataFrame.new(@data_frame.colnames)
118
+ self.each_answer do |a|
119
+ df << a
120
+ end
121
+ return df
122
+ end
123
+
124
+ def rows
125
+ @data_frame.rows
126
+ end
127
+
128
+ def answer(i)
129
+ row = @data_frame.row(i)
130
+
131
+ @questions.each_with_index do |colname, i|
132
+ if i == 0
133
+ row[colname] = Time.parse(row[colname])
134
+ else
135
+ row[colname] = get_value(row[colname], colname)
136
+ end
137
+ end
138
+
139
+ return row
140
+ end
141
+
142
+ def each_answer
143
+ for i in 0...@data_frame.rows
144
+ yield(self.answer(i))
145
+ end
146
+ end
147
+
148
+ def answers
149
+ answers = []
150
+ for i in 0...@data_frame.rows
151
+ answers << self.answer(i)
152
+ end
153
+ return answers
154
+ end
155
+
156
+ def filter
157
+ matching = Rust::DataFrame.new(@questions)
158
+
159
+ for i in 0...@data_frame.rows
160
+ matching << @data_frame.row(i) if yield(self.answer(i))
161
+ end
162
+
163
+ return GoogleForm.new(matching, @schema, @mappings)
164
+ end
165
+
166
+ def raw_answers_to(question)
167
+ question = index_to_title(question) if question.is_a?(Integer)
168
+ results = []
169
+
170
+ (@data_frame|question).each do |value|
171
+ value = get_value(value, question)
172
+ results << value
173
+ end
174
+
175
+ return results
176
+ end
177
+
178
+ def answers_to(question)
179
+ question = index_to_title(question) if question.is_a?(Integer)
180
+
181
+ results = {}
182
+
183
+ (@data_frame|question).each do |value|
184
+ value = get_value(value, question)
185
+ if value.is_a?(Array)
186
+ value.each do |v|
187
+ results[v] = 0 unless results[v]
188
+ results[v] += 1
189
+ end
190
+ else
191
+ results[value] = 0 unless results[value]
192
+ results[value] += 1
193
+ end
194
+ end
195
+ results.delete(nil)
196
+
197
+ return results
198
+ end
199
+
200
+ def textual_answers_to(question)
201
+ question = index_to_title(question) if question.is_a?(Integer)
202
+ raise TypeError, "Expected textual question, #{@schema[question]} instead" if @schema[question] != :text
203
+
204
+ results = {}
205
+
206
+ (@data_frame|question).each do |value|
207
+ value = get_value(value, question)
208
+ next if value == nil
209
+
210
+ category = yield(value)
211
+ results[category] = 0 unless results[category]
212
+ results[category] += 1
213
+ end
214
+
215
+ return results
216
+ end
217
+
218
+ def percentual_answers_to(question, exclude=[])
219
+ answers = answers_to(question)
220
+
221
+ exclude.each do |ex|
222
+ answers.delete(ex)
223
+ end
224
+
225
+ tot = answers.values.sum
226
+ answers = answers.map { |k, v| [k, v.to_f/tot] }.to_h
227
+ return answers
228
+ end
229
+
230
+ def percentual_textual_answers_to(question, &block)
231
+ answers = textual_answers_to(question, &block)
232
+
233
+ tot = answers.values.sum
234
+ answers = answers.map { |k, v| [k, v.to_f/tot] }.to_h
235
+ return answers
236
+ end
237
+
238
+ private
239
+ def index_to_title(i, data_frame=@data_frame)
240
+ data_frame.colnames[i]
241
+ end
242
+
243
+ def title_to_index(title, data_frame=@data_frame)
244
+ data_frame.colnames.index(title)
245
+ end
246
+
247
+ def get_value(value, question, data_frame=@data_frame)
248
+ mapping = @mappings[question]
249
+
250
+ mapped_value = mapping ? mapping.get(value) : value
251
+
252
+ case @schema[question]
253
+ when :multiple
254
+ return nil if mapped_value == ""
255
+ return mapped_value
256
+
257
+ when :checkbox
258
+ return value.split(';').map { |single_value| mapping ? mapping.get(single_value) : single_value }
259
+
260
+ when :scale
261
+ return nil if value == ""
262
+ ordinal = (data_frame|question).uniq.sort
263
+ ordinal.delete("")
264
+ return ordinal.index(value) + 1
265
+
266
+ when :text
267
+ return mapped_value
268
+ end
269
+
270
+ raise TypeError
271
+ end
272
+ end
273
+ end
274
+
275
+ module Rust::RBindings
276
+ def read_csv(filename, **options)
277
+ Rust::CSV.read(filename, **options)
278
+ end
279
+
280
+ def write_csv(filename, dataframe, **options)
281
+ Rust::CSV.write(filename, dataframe, **options)
282
+ end
283
+ end
@@ -0,0 +1,4 @@
1
+ self_path = File.expand_path(__FILE__)
2
+ Dir.glob(File.dirname(self_path) + "/*.rb").each do |lib|
3
+ require_relative lib unless lib == self_path
4
+ end
@@ -0,0 +1,144 @@
1
+ require_relative '../core'
2
+
3
+ module Rust::Jobs
4
+ class TaskHook
5
+ def initialize(task, on_complete, on_error)
6
+ @task = task
7
+ @on_complete = on_complete
8
+ @on_error = on_error
9
+ end
10
+
11
+ def complete!
12
+ @on_complete.call
13
+ @task.notify
14
+ end
15
+
16
+ def error!
17
+ @on_error.call
18
+ @task.notify
19
+ end
20
+ end
21
+
22
+ class Task
23
+ def initialize(title, &block)
24
+ raise "Expected block to describe the task" unless block_given?
25
+ @title = title
26
+ @todo = block
27
+ @done = false
28
+
29
+ @complete_hook = proc {}
30
+ @error_hook = proc {}
31
+ end
32
+
33
+ def start
34
+ @thread = Thread.start do
35
+ @todo.call(TaskHook.new(@complete_hook, @error_hook))
36
+ end
37
+ end
38
+
39
+ def notify
40
+ @done = true
41
+ end
42
+
43
+ def waitfor(granularity=0.1)
44
+ while !@done
45
+ sleep granularity
46
+ end
47
+ end
48
+
49
+ def commit
50
+ end
51
+
52
+ def on_complete(&block)
53
+ raise "Block expected" unless block_given?
54
+ @complete_hook = block
55
+ end
56
+
57
+ def on_error(&block)
58
+ raise "Block expected" unless block_given?
59
+ @error_hook = block
60
+ end
61
+ end
62
+
63
+ class Job
64
+ def initialize(name, **options)
65
+ @name = name
66
+ @tasks = []
67
+
68
+ @parallel = false
69
+
70
+ if options['parallel']
71
+ @parallel = true
72
+ @parallel_tasks = 10
73
+ end
74
+
75
+ if options['parallel_tasks'].is_a?(Integer)
76
+ @parallel_tasks = options['parallel_tasks'].to_i
77
+ end
78
+
79
+ @logger = options['logger'] ? options['logger'] : STDOUT
80
+
81
+ if options[:quiet]
82
+ @logger = File.open(File::NULL, "w")
83
+ end
84
+ end
85
+
86
+ def log(message, type="INFO")
87
+ @logger << "[#{type}] #{Time.now}: #{message.gsub("\n", " -- ")}"
88
+ end
89
+
90
+ def log_info(message)
91
+ log(message, "INFO")
92
+ end
93
+
94
+ def log_warning(message)
95
+ log(message, "WARNING")
96
+ end
97
+
98
+ def log_error(message)
99
+ log(message, "ERROR")
100
+ end
101
+
102
+ def add_task(task=nil, **options, &block)
103
+ if block_given?
104
+ raise "You gave both a block and a task. Please, choose one" if task
105
+ task = Task.new(options['title'], block)
106
+ end
107
+
108
+ raise "Expected a task, #{task.class} given instead" unless task.is_a?(Task)
109
+
110
+ @tasks << task
111
+ end
112
+
113
+ def start
114
+ log_info "Job \"#@name\" started"
115
+ if @parallel
116
+ else
117
+ @tasks.each do |t|
118
+ t.on_complete do
119
+ log_info "Task \"#{t.title}\" completed"
120
+ end
121
+
122
+ t.on_error do |message|
123
+ log_error "Task \"#{t.title}\" did not complete: #{message}"
124
+ end
125
+
126
+ log_info "Task \"#{t.title}\" started"
127
+ t.start
128
+ t.waitfor
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ class Resumeable < Job
135
+ def initialize(name, **options)
136
+ super
137
+ # TODO complete here
138
+ end
139
+
140
+ def start
141
+ # TODO complete here
142
+ end
143
+ end
144
+ end
@@ -181,12 +181,22 @@ module Rust::Models::Regression
181
181
 
182
182
  return self, excluded
183
183
  end
184
-
184
+
185
+ ##
186
+ # Predicts the dependent variable from a new observation of the independent ones.
187
+ # Note: Not fully tested
188
+
189
+ def predict(line)
190
+ Rust.exclusive do
191
+ Rust['tmp.model.newline'] = line
192
+ return Rust["predict(#{self.r_mirror}, tmp.model.newline)"]
193
+ end
194
+ end
185
195
 
186
196
  def method_missing(name, *args)
187
197
  return model|name.to_s
188
198
  end
189
-
199
+
190
200
  ##
191
201
  # Returns a summary for the model using the summary function in R.
192
202
 
@@ -241,6 +251,57 @@ module Rust::Models::Regression
241
251
  **options
242
252
  )
243
253
  end
254
+
255
+ ##
256
+ # Returns the model as a proc that can be used to predict values
257
+
258
+ def to_proc
259
+ proc do |unnormalized_data|
260
+ data = Rust::DataFrame.new(["__TOREM__"])
261
+ unnormalized_data.rows.times do
262
+ data << [0]
263
+ end
264
+ unnormalized_data.colnames.each do |col|
265
+ if (unnormalized_data|col)[0].is_a?(Numeric)
266
+ newcol = Rust::DataFrame.new([col])
267
+ (unnormalized_data|col).each do |v|
268
+ newcol << [v]
269
+ end
270
+ data.cbind!(newcol)
271
+ else
272
+ (unnormalized_data|col).uniq.each do |val|
273
+ newcol = Rust::DataFrame.new([col + val])
274
+ (unnormalized_data|col).each do |v|
275
+ if v == val
276
+ newcol << [1]
277
+ else
278
+ newcol << [0]
279
+ end
280
+ end
281
+ data.cbind!(newcol)
282
+ end
283
+ end
284
+ end
285
+ data.delete_column("__TOREM__")
286
+ value = 0
287
+ @variables.each do |var|
288
+ p var
289
+ if var.name == "(Intercept)"
290
+ value += var.coefficient
291
+ else
292
+ if data.colnames.include?(var.name)
293
+ value += (data|var.name)[0] * var.coefficient
294
+ end
295
+ end
296
+ end
297
+
298
+ value
299
+ end
300
+ end
301
+
302
+ def predict(line)
303
+ self.to_proc.call(line)
304
+ end
244
305
  end
245
306
 
246
307
  ##
data/lib/rust.rb CHANGED
@@ -2,6 +2,7 @@ require_relative 'rust/core'
2
2
  require_relative 'rust/models/all'
3
3
  require_relative 'rust/plots/all'
4
4
  require_relative 'rust/stats/all'
5
+ require_relative 'rust/forms/all'
5
6
 
6
7
  module Rust
7
8
  @@datasets = {}
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.13'
4
+ version: '0.15'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-11-17 00:00:00.000000000 Z
10
+ date: 2026-06-25 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rinruby
@@ -79,6 +78,10 @@ files:
79
78
  - lib/rust/external/ggplot2/scale.rb
80
79
  - lib/rust/external/ggplot2/themes.rb
81
80
  - lib/rust/external/robustbase.rb
81
+ - lib/rust/forms/all.rb
82
+ - lib/rust/forms/google_forms.rb
83
+ - lib/rust/jobs/all.rb
84
+ - lib/rust/jobs/jobs.rb
82
85
  - lib/rust/models/all.rb
83
86
  - lib/rust/models/anova.rb
84
87
  - lib/rust/models/regression.rb
@@ -96,7 +99,6 @@ homepage: https://github.com/intersimone999/ruby-rust
96
99
  licenses:
97
100
  - GPL-3.0-only
98
101
  metadata: {}
99
- post_install_message:
100
102
  rdoc_options: []
101
103
  require_paths:
102
104
  - lib
@@ -111,8 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
113
  - !ruby/object:Gem::Version
112
114
  version: '0'
113
115
  requirements: []
114
- rubygems_version: 3.5.16
115
- signing_key:
116
+ rubygems_version: 3.6.9
116
117
  specification_version: 4
117
118
  summary: Ruby advanced statistical library
118
119
  test_files: []