lda-ruby 0.3.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +5 -13
  2. data/CHANGELOG.md +16 -0
  3. data/Gemfile +9 -0
  4. data/README.md +126 -3
  5. data/VERSION.yml +3 -3
  6. data/docs/modernization-handoff.md +233 -0
  7. data/docs/porting-strategy.md +148 -0
  8. data/docs/precompiled-platform-policy.md +81 -0
  9. data/docs/precompiled-target-evaluation.md +67 -0
  10. data/docs/release-runbook.md +192 -0
  11. data/docs/rust-orchestration-guardrails.md +50 -0
  12. data/ext/lda-ruby/cokus.c +10 -11
  13. data/ext/lda-ruby/cokus.h +3 -3
  14. data/ext/lda-ruby/extconf.rb +10 -6
  15. data/ext/lda-ruby/lda-inference.c +23 -7
  16. data/ext/lda-ruby/utils.c +8 -0
  17. data/ext/lda-ruby-rust/Cargo.toml +12 -0
  18. data/ext/lda-ruby-rust/README.md +73 -0
  19. data/ext/lda-ruby-rust/extconf.rb +135 -0
  20. data/ext/lda-ruby-rust/include/strings.h +35 -0
  21. data/ext/lda-ruby-rust/src/lib.rs +1263 -0
  22. data/lda-ruby.gemspec +0 -0
  23. data/lib/lda-ruby/backends/base.rb +133 -0
  24. data/lib/lda-ruby/backends/native.rb +158 -0
  25. data/lib/lda-ruby/backends/pure_ruby.rb +675 -0
  26. data/lib/lda-ruby/backends/rust.rb +607 -0
  27. data/lib/lda-ruby/backends.rb +58 -0
  28. data/lib/lda-ruby/corpus/corpus.rb +17 -15
  29. data/lib/lda-ruby/corpus/data_corpus.rb +2 -2
  30. data/lib/lda-ruby/corpus/directory_corpus.rb +2 -2
  31. data/lib/lda-ruby/corpus/text_corpus.rb +2 -2
  32. data/lib/lda-ruby/document/document.rb +6 -6
  33. data/lib/lda-ruby/document/text_document.rb +5 -4
  34. data/lib/lda-ruby/rust_build_policy.rb +21 -0
  35. data/lib/lda-ruby/version.rb +5 -0
  36. data/lib/lda-ruby.rb +293 -48
  37. data/test/backend_compatibility_test.rb +146 -0
  38. data/test/backends_selection_test.rb +100 -0
  39. data/test/benchmark_scripts_test.rb +23 -0
  40. data/test/gemspec_test.rb +27 -0
  41. data/test/lda_ruby_test.rb +49 -11
  42. data/test/packaged_gem_smoke_test.rb +33 -0
  43. data/test/pure_ruby_orchestration_test.rb +109 -0
  44. data/test/release_scripts_test.rb +93 -0
  45. data/test/rust_build_policy_test.rb +23 -0
  46. data/test/rust_orchestration_test.rb +911 -0
  47. data/test/simple_pipeline_test.rb +22 -0
  48. data/test/simple_yaml.rb +1 -7
  49. data/test/test_helper.rb +5 -6
  50. metadata +54 -38
  51. data/Rakefile +0 -61
  52. data/ext/lda-ruby/Makefile +0 -181
  53. data/test/data/.gitignore +0 -2
  54. data/test/simple_test.rb +0 -26
@@ -1,24 +1,26 @@
1
- require 'set'
1
+ require "set"
2
+ require "yaml"
2
3
 
3
4
  module Lda
4
5
  class Corpus
5
6
  attr_reader :documents, :num_docs, :num_terms, :vocabulary, :stopwords
6
7
 
7
8
  def initialize(stop_word_list = nil)
8
- @documents = Array.new
9
+ @documents = []
9
10
  @all_terms = Set.new
10
11
  @num_terms = @num_docs = 0
11
12
  @vocabulary = Vocabulary.new
12
- if stop_word_list.nil?
13
- @stopwords = YAML.load_file(File.join(File.dirname(__FILE__), '..', 'config', 'stopwords.yml'))
14
- else
15
- @stopwords = YAML.load_file(stop_word_list)
16
- end
17
- @stopwords.map! { |w| w.strip }
13
+ @stopwords = if stop_word_list.nil?
14
+ File.join(File.dirname(__FILE__), '..', 'config', 'stopwords.yml')
15
+ else
16
+ stop_word_list
17
+ end
18
+ @stopwords = YAML.load_file(@stopwords)
19
+ @stopwords.map!(&:strip)
18
20
  end
19
-
21
+
20
22
  def add_document(doc)
21
- raise 'Parameter +doc+ must be of type Document' unless doc.kind_of?(Document)
23
+ raise 'Parameter +doc+ must be of type Document' unless doc.is_a?(Document)
22
24
 
23
25
  @documents << doc
24
26
 
@@ -29,11 +31,11 @@ module Lda
29
31
  update_vocabulary(doc)
30
32
  nil
31
33
  end
32
-
33
- def remove_word(word)
34
- @vocabulary.words.delete word
35
- end
36
-
34
+
35
+ def remove_word(word)
36
+ @vocabulary.words.delete word
37
+ end
38
+
37
39
  protected
38
40
 
39
41
  def update_vocabulary(doc)
@@ -12,11 +12,11 @@ module Lda
12
12
  protected
13
13
 
14
14
  def load_from_file
15
- txt = File.open(@filename, 'r') { |f| f.read }
15
+ txt = File.open(@filename, 'r', &:read)
16
16
  lines = txt.split(/[\r\n]+/)
17
17
  lines.each do |line|
18
18
  add_document(DataDocument.new(self, line))
19
19
  end
20
20
  end
21
21
  end
22
- end
22
+ end
@@ -15,11 +15,11 @@ module Lda
15
15
  protected
16
16
 
17
17
  def load_from_directory
18
- dir_glob = File.join(@path, (@extension ? "*.#{@extension}" : "*"))
18
+ dir_glob = File.join(@path, (@extension ? "*.#{@extension}" : '*'))
19
19
 
20
20
  Dir.glob(dir_glob).each do |filename|
21
21
  add_document(TextDocument.build_from_file(self, filename))
22
22
  end
23
23
  end
24
24
  end
25
- end
25
+ end
@@ -6,7 +6,7 @@ module Lda
6
6
  def initialize(input_data)
7
7
  super()
8
8
 
9
- docs = if input_data.is_a?(String) && File.exists?(input_data)
9
+ docs = if input_data.is_a?(String) && File.exist?(input_data)
10
10
  # yaml file containing an array of strings representing each document
11
11
  YAML.load_file(input_data)
12
12
  elsif input_data.is_a?(Array)
@@ -16,7 +16,7 @@ module Lda
16
16
  # a single string representing one document
17
17
  [input_data]
18
18
  else
19
- raise "Unknown input type: please pass in a valid filename or an array of strings."
19
+ raise 'Unknown input type: please pass in a valid filename or an array of strings.'
20
20
  end
21
21
 
22
22
  docs.each do |doc|
@@ -1,4 +1,3 @@
1
- # coding: utf-8
2
1
  require 'yaml'
3
2
 
4
3
  module Lda
@@ -8,9 +7,9 @@ module Lda
8
7
  def initialize(corpus)
9
8
  @corpus = corpus
10
9
 
11
- @words = Array.new
12
- @counts = Array.new
13
- @tokens = Array.new
10
+ @words = []
11
+ @counts = []
12
+ @tokens = []
14
13
  @length = 0
15
14
  @total = 0
16
15
  end
@@ -23,7 +22,7 @@ module Lda
23
22
  @length = @words.size
24
23
  end
25
24
 
26
- def has_text?
25
+ def text?
27
26
  false
28
27
  end
29
28
 
@@ -32,7 +31,8 @@ module Lda
32
31
  end
33
32
 
34
33
  def tokenize(text)
35
- clean_text = text.gsub(/[^a-zäöüß'-]+/i, ' ').gsub(/\s+/, ' ').downcase # remove everything but letters and ' and leave only single spaces
34
+ # remove everything but letters and ' and leave only single spaces
35
+ clean_text = text.gsub(/[^a-zäöüß'-]+/i, ' ').gsub(/\s+/, ' ').downcase
36
36
  @tokens = handle(clean_text.split(' '))
37
37
  nil
38
38
  end
@@ -11,14 +11,15 @@ module Lda
11
11
  build_from_tokens
12
12
  end
13
13
 
14
- def has_text?
14
+ def text?
15
15
  true
16
16
  end
17
17
 
18
18
  def self.build_from_file(corpus, filename)
19
- @filename = filename.dup.freeze
20
- text = File.open(@filename, 'r') { |f| f.read }
21
- self.new(corpus, text)
19
+ text = File.read(filename)
20
+ document = new(corpus, text)
21
+ document.instance_variable_set(:@filename, filename.dup.freeze)
22
+ document
22
23
  end
23
24
 
24
25
  protected
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lda
4
+ module RustBuildPolicy
5
+ ENV_KEY = "LDA_RUBY_RUST_BUILD"
6
+ AUTO = "auto"
7
+ ALWAYS = "always"
8
+ NEVER = "never"
9
+ VALID_VALUES = [AUTO, ALWAYS, NEVER].freeze
10
+
11
+ module_function
12
+
13
+ def resolve(raw_value = ENV[ENV_KEY])
14
+ value = raw_value.to_s.strip.downcase
15
+ return AUTO if value.empty?
16
+ return value if VALID_VALUES.include?(value)
17
+
18
+ AUTO
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lda
4
+ VERSION = "0.5.0"
5
+ end
data/lib/lda-ruby.rb CHANGED
@@ -1,29 +1,125 @@
1
- $LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
-
3
- require 'lda-ruby/lda'
4
- require 'lda-ruby/document/document'
5
- require 'lda-ruby/document/data_document'
6
- require 'lda-ruby/document/text_document'
7
- require 'lda-ruby/corpus/corpus'
8
- require 'lda-ruby/corpus/data_corpus'
9
- require 'lda-ruby/corpus/text_corpus'
10
- require 'lda-ruby/corpus/directory_corpus'
11
- require 'lda-ruby/vocabulary'
1
+ # frozen_string_literal: true
2
+
3
+ require "lda-ruby/version"
4
+ require "rbconfig"
5
+
6
+ rust_extension_loaded = false
7
+ rust_dlext = RbConfig::CONFIG.fetch("DLEXT")
8
+
9
+ [
10
+ "lda_ruby_rust",
11
+ "../ext/lda-ruby-rust/target/release/lda_ruby_rust",
12
+ "../ext/lda-ruby-rust/target/release/lda_ruby_rust.#{rust_dlext}",
13
+ "../ext/lda-ruby-rust/target/debug/lda_ruby_rust",
14
+ "../ext/lda-ruby-rust/target/debug/lda_ruby_rust.#{rust_dlext}"
15
+ ].each do |rust_extension_candidate|
16
+ begin
17
+ if rust_extension_candidate.start_with?("../")
18
+ require_relative rust_extension_candidate
19
+ else
20
+ require rust_extension_candidate
21
+ end
22
+
23
+ rust_extension_loaded = true
24
+ break
25
+ rescue LoadError
26
+ next
27
+ end
28
+ end
29
+
30
+ native_extension_loaded = false
31
+
32
+ begin
33
+ require "lda-ruby/lda"
34
+ native_extension_loaded = true
35
+ rescue LoadError
36
+ begin
37
+ require_relative "../ext/lda-ruby/lda"
38
+ native_extension_loaded = true
39
+ rescue LoadError
40
+ native_extension_loaded = false
41
+ end
42
+ end
43
+
44
+ LDA_RUBY_NATIVE_EXTENSION_LOADED = native_extension_loaded unless defined?(LDA_RUBY_NATIVE_EXTENSION_LOADED)
45
+ LDA_RUBY_RUST_EXTENSION_LOADED = rust_extension_loaded unless defined?(LDA_RUBY_RUST_EXTENSION_LOADED)
46
+
47
+ require "lda-ruby/document/document"
48
+ require "lda-ruby/document/data_document"
49
+ require "lda-ruby/document/text_document"
50
+ require "lda-ruby/corpus/corpus"
51
+ require "lda-ruby/corpus/data_corpus"
52
+ require "lda-ruby/corpus/text_corpus"
53
+ require "lda-ruby/corpus/directory_corpus"
54
+ require "lda-ruby/vocabulary"
55
+ require "lda-ruby/backends"
12
56
 
13
57
  module Lda
58
+ RUST_EXTENSION_LOADED = LDA_RUBY_RUST_EXTENSION_LOADED unless const_defined?(:RUST_EXTENSION_LOADED)
59
+ NATIVE_EXTENSION_LOADED = LDA_RUBY_NATIVE_EXTENSION_LOADED unless const_defined?(:NATIVE_EXTENSION_LOADED)
60
+
14
61
  class Lda
15
- attr_reader :vocab, :corpus
62
+ NATIVE_ALIAS_MAP = {
63
+ fast_load_corpus_from_file: :__native_fast_load_corpus_from_file,
64
+ "corpus=": :__native_set_corpus,
65
+ em: :__native_em,
66
+ load_settings: :__native_load_settings,
67
+ set_config: :__native_set_config,
68
+ max_iter: :__native_max_iter,
69
+ "max_iter=": :__native_set_max_iter,
70
+ convergence: :__native_convergence,
71
+ "convergence=": :__native_set_convergence,
72
+ em_max_iter: :__native_em_max_iter,
73
+ "em_max_iter=": :__native_set_em_max_iter,
74
+ em_convergence: :__native_em_convergence,
75
+ "em_convergence=": :__native_set_em_convergence,
76
+ init_alpha: :__native_init_alpha,
77
+ "init_alpha=": :__native_set_init_alpha,
78
+ est_alpha: :__native_est_alpha,
79
+ "est_alpha=": :__native_set_est_alpha,
80
+ num_topics: :__native_num_topics,
81
+ "num_topics=": :__native_set_num_topics,
82
+ verbose: :__native_verbose,
83
+ "verbose=": :__native_set_verbose,
84
+ beta: :__native_beta,
85
+ gamma: :__native_gamma,
86
+ compute_phi: :__native_compute_phi,
87
+ model: :__native_model
88
+ }.freeze
89
+
90
+ NATIVE_ALIAS_MAP.each do |native_name, alias_name|
91
+ next unless method_defined?(native_name)
92
+
93
+ alias_method alias_name, native_name
94
+ private alias_name
95
+ end
96
+
97
+ attr_reader :vocab, :corpus, :backend
98
+
99
+ def initialize(corpus, backend: nil, random_seed: nil)
100
+ @backend = Backends.build(host: self, requested: backend, random_seed: random_seed)
16
101
 
17
- def initialize(corpus)
18
102
  load_default_settings
19
103
 
20
104
  @vocab = nil
21
105
  self.corpus = corpus
22
- @vocab = corpus.vocabulary.to_a if corpus.vocabulary
106
+ @vocab = corpus.vocabulary.to_a if corpus.respond_to?(:vocabulary) && corpus.vocabulary
23
107
 
24
108
  @phi = nil
25
109
  end
26
110
 
111
+ def backend_name
112
+ @backend.name
113
+ end
114
+
115
+ def native_backend?
116
+ backend_name == "native"
117
+ end
118
+
119
+ def rust_backend?
120
+ backend_name == "rust"
121
+ end
122
+
27
123
  def load_default_settings
28
124
  self.max_iter = 20
29
125
  self.convergence = 1e-6
@@ -36,25 +132,138 @@ module Lda
36
132
  [20, 1e-6, 100, 1e-4, 20, 0.3, 1]
37
133
  end
38
134
 
39
- def load_corpus(filename)
40
- @corpus = Corpus.new
41
- @corpus.load_from_file(filename)
135
+ def set_config(init_alpha, num_topics, max_iter, convergence, em_max_iter, em_convergence = self.em_convergence, est_alpha = self.est_alpha)
136
+ @backend.set_config(
137
+ Float(init_alpha),
138
+ Integer(num_topics),
139
+ Integer(max_iter),
140
+ Float(convergence),
141
+ Integer(em_max_iter),
142
+ Float(em_convergence),
143
+ Integer(est_alpha)
144
+ )
145
+ end
146
+
147
+ def max_iter
148
+ @backend.max_iter
149
+ end
150
+
151
+ def max_iter=(value)
152
+ @backend.max_iter = Integer(value)
153
+ end
154
+
155
+ def convergence
156
+ @backend.convergence
157
+ end
158
+
159
+ def convergence=(value)
160
+ @backend.convergence = Float(value)
161
+ end
162
+
163
+ def em_max_iter
164
+ @backend.em_max_iter
165
+ end
166
+
167
+ def em_max_iter=(value)
168
+ @backend.em_max_iter = Integer(value)
169
+ end
170
+
171
+ def em_convergence
172
+ @backend.em_convergence
173
+ end
174
+
175
+ def em_convergence=(value)
176
+ @backend.em_convergence = Float(value)
177
+ end
178
+
179
+ def num_topics
180
+ @backend.num_topics
181
+ end
182
+
183
+ def num_topics=(value)
184
+ @backend.num_topics = Integer(value)
185
+ end
42
186
 
187
+ def init_alpha
188
+ @backend.init_alpha
189
+ end
190
+
191
+ def init_alpha=(value)
192
+ @backend.init_alpha = Float(value)
193
+ end
194
+
195
+ def est_alpha
196
+ @backend.est_alpha
197
+ end
198
+
199
+ def est_alpha=(value)
200
+ @backend.est_alpha = Integer(value)
201
+ end
202
+
203
+ def verbose
204
+ @backend.verbose
205
+ end
206
+
207
+ def verbose=(value)
208
+ @backend.verbose = !!value
209
+ end
210
+
211
+ def corpus=(corpus)
212
+ @corpus = corpus
213
+ @backend.corpus = corpus
43
214
  true
44
215
  end
45
216
 
217
+ def load_corpus(filename)
218
+ fast_load_corpus_from_file(filename)
219
+ end
220
+
221
+ def fast_load_corpus_from_file(filename)
222
+ loaded = @backend.fast_load_corpus_from_file(filename)
223
+
224
+ if @backend.corpus
225
+ @corpus = @backend.corpus
226
+ @vocab = @corpus.vocabulary.to_a if @corpus.respond_to?(:vocabulary) && @corpus.vocabulary
227
+ elsif @corpus.nil?
228
+ @corpus = DataCorpus.new(filename)
229
+ end
230
+
231
+ !!loaded
232
+ end
233
+
234
+ def load_settings(settings_file)
235
+ @backend.load_settings(settings_file)
236
+ end
237
+
46
238
  def load_vocabulary(vocab)
47
239
  if vocab.is_a?(Array)
48
- @vocab = Marshal::load(Marshal::dump(vocab)) # deep clone array
240
+ @vocab = Marshal.load(Marshal.dump(vocab)) # deep clone array
49
241
  elsif vocab.is_a?(Vocabulary)
50
242
  @vocab = vocab.to_a
51
243
  else
52
- @vocab = File.open(vocab, 'r') { |f| f.read.split(/\s+/) }
244
+ @vocab = File.read(vocab).split(/\s+/)
53
245
  end
54
246
 
55
247
  true
56
248
  end
57
249
 
250
+ def em(start = "random")
251
+ @phi = nil
252
+ @backend.em(start.to_s)
253
+ end
254
+
255
+ def beta
256
+ @backend.beta
257
+ end
258
+
259
+ def gamma
260
+ @backend.gamma
261
+ end
262
+
263
+ def model
264
+ @backend.model
265
+ end
266
+
58
267
  #
59
268
  # Visualization method for printing out the top +words_per_topic+ words
60
269
  # for each topic.
@@ -62,14 +271,18 @@ module Lda
62
271
  # See also +top_words+.
63
272
  #
64
273
  def print_topics(words_per_topic = 10)
65
- raise 'No vocabulary loaded.' unless @vocab
274
+ raise "No vocabulary loaded." unless @vocab
66
275
 
67
- self.beta.each_with_index do |topic, topic_num|
68
- # Sort the topic array and return the sorted indices of the best scores
69
- indices = (topic.zip((0...@vocab.size).to_a).sort { |i, j| i[0] <=> j[0] }.map { |i, j| j }.reverse)[0...words_per_topic]
276
+ beta.each_with_index do |topic, topic_num|
277
+ indices = topic
278
+ .each_with_index
279
+ .sort_by { |score, _index| score }
280
+ .reverse
281
+ .first(words_per_topic)
282
+ .map { |_score, index| index }
70
283
 
71
284
  puts "Topic #{topic_num}"
72
- puts "\t#{indices.map {|i| @vocab[i]}.join("\n\t")}"
285
+ puts "\t#{indices.map { |i| @vocab[i] }.join("\n\t")}"
73
286
  puts ""
74
287
  end
75
288
 
@@ -87,21 +300,24 @@ module Lda
87
300
  # See also +print_topics+.
88
301
  #
89
302
  def top_word_indices(words_per_topic = 10)
90
- raise 'No vocabulary loaded.' unless @vocab
303
+ raise "No vocabulary loaded." unless @vocab
91
304
 
92
- # find the highest scoring words per topic
93
- topics = Hash.new
94
- indices = (0...@vocab.size).to_a
305
+ topics = {}
95
306
 
96
- self.beta.each_with_index do |topic, topic_num|
97
- topics[topic_num] = (topic.zip((0...@vocab.size).to_a).sort { |i, j| i[0] <=> j[0] }.map { |i, j| j }.reverse)[0...words_per_topic]
307
+ beta.each_with_index do |topic, topic_num|
308
+ topics[topic_num] = topic
309
+ .each_with_index
310
+ .sort_by { |score, _index| score }
311
+ .reverse
312
+ .first(words_per_topic)
313
+ .map { |_score, index| index }
98
314
  end
99
315
 
100
316
  topics
101
317
  end
102
318
 
103
319
  def top_words(words_per_topic = 10)
104
- output = Hash.new
320
+ output = {}
105
321
 
106
322
  topics = top_word_indices(words_per_topic)
107
323
  topics.each_pair do |topic_num, words|
@@ -118,49 +334,78 @@ module Lda
118
334
  # after the first call, so if it needs to be recomputed, set the +recompute+
119
335
  # value to true.
120
336
  #
121
- def phi(recompute=false)
122
- if @phi.nil? || recompute
123
- @phi = self.compute_phi
124
- end
337
+ def phi(recompute = false)
338
+ @phi = compute_phi if @phi.nil? || recompute
125
339
 
126
340
  @phi
127
341
  end
128
342
 
343
+ def compute_phi
344
+ @backend.compute_phi
345
+ end
346
+
129
347
  #
130
348
  # Compute the average log probability for each topic for each document in the corpus.
131
349
  # This method returns a matrix: num_docs x num_topics with the average log probability
132
350
  # for the topic in the document.
133
351
  #
134
352
  def compute_topic_document_probability
135
- outp = Array.new
353
+ phi_matrix = phi
354
+ document_counts = @corpus.documents.map(&:counts)
355
+
356
+ backend_output = @backend.topic_document_probability(phi_matrix, document_counts)
357
+ if valid_topic_document_probability_output?(backend_output, document_counts.size, num_topics)
358
+ return backend_output
359
+ end
360
+
361
+ outp = []
136
362
 
137
363
  @corpus.documents.each_with_index do |doc, idx|
138
- tops = [0.0] * self.num_topics
139
- ttl = doc.counts.inject(0.0) {|sum, i| sum + i}
140
- self.phi[idx].each_with_index do |word_dist, word_idx|
364
+ tops = [0.0] * num_topics
365
+ ttl = doc.counts.inject(0.0) { |sum, i| sum + i }
366
+
367
+ phi_matrix[idx].each_with_index do |word_dist, word_idx|
141
368
  word_dist.each_with_index do |top_prob, top_idx|
142
- tops[top_idx] += Math.log(top_prob) * doc.counts[word_idx]
369
+ tops[top_idx] += Math.log([top_prob, 1e-300].max) * doc.counts[word_idx]
143
370
  end
144
371
  end
145
- tops = tops.map {|i| i / ttl}
372
+
373
+ tops = tops.map { |i| i / ttl }
146
374
  outp << tops
147
375
  end
148
376
 
149
377
  outp
150
378
  end
151
379
 
380
+ def valid_topic_document_probability_output?(output, expected_docs, expected_topics)
381
+ return false unless output.is_a?(Array)
382
+ return false unless output.size == expected_docs
383
+
384
+ output.each do |row|
385
+ return false unless row.is_a?(Array)
386
+ return false unless row.size == expected_topics
387
+ row.each do |value|
388
+ return false unless value.is_a?(Numeric)
389
+ return false unless value.finite?
390
+ end
391
+ end
392
+
393
+ true
394
+ end
395
+
152
396
  #
153
397
  # String representation displaying current settings.
154
398
  #
155
399
  def to_s
156
400
  outp = ["LDA Settings:"]
157
- outp << " Initial alpha: %0.6f" % self.init_alpha
158
- outp << " # of topics: %d" % self.num_topics
159
- outp << " Max iterations: %d" % self.max_iter
160
- outp << " Convergence: %0.6f" % self.convergence
161
- outp << "EM max iterations: %d" % self.em_max_iter
162
- outp << " EM convergence: %0.6f" % self.em_convergence
163
- outp << " Estimate alpha: %d" % self.est_alpha
401
+ outp << format(" Initial alpha: %0.6f", init_alpha)
402
+ outp << format(" # of topics: %d", num_topics)
403
+ outp << format(" Max iterations: %d", max_iter)
404
+ outp << format(" Convergence: %0.6f", convergence)
405
+ outp << format("EM max iterations: %d", em_max_iter)
406
+ outp << format(" EM convergence: %0.6f", em_convergence)
407
+ outp << format(" Estimate alpha: %d", est_alpha)
408
+ outp << format(" Backend: %s", backend_name)
164
409
 
165
410
  outp.join("\n")
166
411
  end