ealdent-lda-ruby 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/VERSION.yml +4 -0
  2. data/lib/lda-inference.c +48 -64
  3. data/lib/lda.rb +7 -11
  4. metadata +2 -1
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 2
3
+ :patch: 2
4
+ :major: 0
@@ -918,70 +918,54 @@ static VALUE wrap_get_model_settings(VALUE self) {
918
918
 
919
919
 
920
920
  void Init_lda_ext() {
921
- corpus_loaded = FALSE;
922
- model_loaded = FALSE;
923
- VERBOSE = TRUE;
924
-
925
- rb_require("lda");
926
-
927
- /*
928
- * The Latent Dirichlet Allocation algorithm by Blei et al (2003). Ruby wrapper based on
929
- * lda-c code by David Blei (available at http://www.cs.princeton.edu/~blei/lda-c).
930
- */
931
- rb_mLda = rb_define_module("Lda");
932
-
933
- /*
934
- * Class that handles most of the functionality of LDA.
935
- */
936
- rb_cLda = rb_define_class_under(rb_mLda, "Lda", rb_cObject);
937
-
938
- /*
939
- * Class that represents a corpus.
940
- */
941
- rb_cLdaCorpus = rb_define_class_under(rb_mLda, "Corpus", rb_cObject);
942
-
943
- /*
944
- * Class that represents a single document.
945
- */
946
- rb_cLdaDocument = rb_define_class_under(rb_mLda, "Document", rb_cObject);
947
-
948
-
949
- // method to load the corpus
950
- rb_define_method(rb_cLda, "fast_load_corpus_from_file", wrap_load_corpus, 1);
951
- rb_define_method(rb_cLda, "corpus=", wrap_ruby_corpus, 1);
952
-
953
- // method to run em
954
- rb_define_method(rb_cLda, "em", wrap_em, 1);
955
-
956
- // method to load settings from file
957
- rb_define_method(rb_cLda, "load_settings", wrap_load_settings, 1);
958
-
959
- // method to set all the config options at once
960
- rb_define_method(rb_cLda, "set_config", wrap_set_config, 5);
961
-
962
- // accessor stuff for main settings
963
- rb_define_method(rb_cLda, "max_iter", wrap_get_max_iter, 0);
964
- rb_define_method(rb_cLda, "max_iter=", wrap_set_max_iter, 1);
965
- rb_define_method(rb_cLda, "convergence", wrap_get_converged, 0);
966
- rb_define_method(rb_cLda, "convergence=", wrap_set_converged, 1);
967
- rb_define_method(rb_cLda, "em_max_iter", wrap_get_em_max_iter, 0);
968
- rb_define_method(rb_cLda, "em_max_iter=", wrap_set_em_max_iter, 1);
969
- rb_define_method(rb_cLda, "em_convergence", wrap_get_em_converged, 0);
970
- rb_define_method(rb_cLda, "em_convergence=", wrap_set_em_converged, 1);
971
- rb_define_method(rb_cLda, "init_alpha=", wrap_set_initial_alpha, 1);
972
- rb_define_method(rb_cLda, "init_alpha", wrap_get_initial_alpha, 0);
973
- rb_define_method(rb_cLda, "est_alpha=", wrap_set_estimate_alpha, 1);
974
- rb_define_method(rb_cLda, "est_alpha", wrap_get_estimate_alpha, 0);
975
- rb_define_method(rb_cLda, "num_topics", wrap_get_num_topics, 0);
976
- rb_define_method(rb_cLda, "num_topics=", wrap_set_num_topics, 1);
977
- rb_define_method(rb_cLda, "verbose", wrap_get_verbosity, 0);
978
- rb_define_method(rb_cLda, "verbose=", wrap_set_verbosity, 1);
979
-
980
- // retrieve model and gamma
981
- rb_define_method(rb_cLda, "beta", wrap_get_model_beta, 0);
982
- rb_define_method(rb_cLda, "gamma", wrap_get_gamma, 0);
983
- rb_define_method(rb_cLda, "compute_phi", wrap_get_phi, 0);
984
- rb_define_method(rb_cLda, "model", wrap_get_model_settings, 0);
921
+ corpus_loaded = FALSE;
922
+ model_loaded = FALSE;
923
+ VERBOSE = TRUE;
924
+
925
+ rb_require("lda");
926
+
927
+ rb_mLda = rb_define_module("Lda");
928
+ rb_cLda = rb_define_class_under(rb_mLda, "Lda", rb_cObject);
929
+ rb_cLdaCorpus = rb_define_class_under(rb_mLda, "Corpus", rb_cObject);
930
+ rb_cLdaDocument = rb_define_class_under(rb_mLda, "Document", rb_cObject);
931
+
932
+
933
+ // method to load the corpus
934
+ rb_define_method(rb_cLda, "fast_load_corpus_from_file", wrap_load_corpus, 1);
935
+ rb_define_method(rb_cLda, "corpus=", wrap_ruby_corpus, 1);
936
+
937
+ // method to run em
938
+ rb_define_method(rb_cLda, "em", wrap_em, 1);
939
+
940
+ // method to load settings from file
941
+ rb_define_method(rb_cLda, "load_settings", wrap_load_settings, 1);
942
+
943
+ // method to set all the config options at once
944
+ rb_define_method(rb_cLda, "set_config", wrap_set_config, 5);
945
+
946
+ // accessor stuff for main settings
947
+ rb_define_method(rb_cLda, "max_iter", wrap_get_max_iter, 0);
948
+ rb_define_method(rb_cLda, "max_iter=", wrap_set_max_iter, 1);
949
+ rb_define_method(rb_cLda, "convergence", wrap_get_converged, 0);
950
+ rb_define_method(rb_cLda, "convergence=", wrap_set_converged, 1);
951
+ rb_define_method(rb_cLda, "em_max_iter", wrap_get_em_max_iter, 0);
952
+ rb_define_method(rb_cLda, "em_max_iter=", wrap_set_em_max_iter, 1);
953
+ rb_define_method(rb_cLda, "em_convergence", wrap_get_em_converged, 0);
954
+ rb_define_method(rb_cLda, "em_convergence=", wrap_set_em_converged, 1);
955
+ rb_define_method(rb_cLda, "init_alpha=", wrap_set_initial_alpha, 1);
956
+ rb_define_method(rb_cLda, "init_alpha", wrap_get_initial_alpha, 0);
957
+ rb_define_method(rb_cLda, "est_alpha=", wrap_set_estimate_alpha, 1);
958
+ rb_define_method(rb_cLda, "est_alpha", wrap_get_estimate_alpha, 0);
959
+ rb_define_method(rb_cLda, "num_topics", wrap_get_num_topics, 0);
960
+ rb_define_method(rb_cLda, "num_topics=", wrap_set_num_topics, 1);
961
+ rb_define_method(rb_cLda, "verbose", wrap_get_verbosity, 0);
962
+ rb_define_method(rb_cLda, "verbose=", wrap_set_verbosity, 1);
963
+
964
+ // retrieve model and gamma
965
+ rb_define_method(rb_cLda, "beta", wrap_get_model_beta, 0);
966
+ rb_define_method(rb_cLda, "gamma", wrap_get_gamma, 0);
967
+ rb_define_method(rb_cLda, "compute_phi", wrap_get_phi, 0);
968
+ rb_define_method(rb_cLda, "model", wrap_get_model_settings, 0);
985
969
  }
986
970
 
987
971
  #endif
data/lib/lda.rb CHANGED
@@ -150,10 +150,8 @@ module Lda
150
150
  # following it, though this isn't strictly enforced in this method.
151
151
  #
152
152
  def load_corpus(filename)
153
- c = Corpus.new
154
- c.load_from_file(filename)
155
- self.corpus = c
156
- @corpus = c
153
+ @corpus = Corpus.new
154
+ @corpus.load_from_file(filename)
157
155
 
158
156
  true
159
157
  end
@@ -168,14 +166,12 @@ module Lda
168
166
  # array itself.
169
167
  #
170
168
  def load_vocabulary(vocab)
171
- @vocab = Array.new
172
-
173
- File.open(filename, 'r') do |f|
174
- f.each do |line|
175
- @vocab << line.strip
176
- end
169
+ if vocab.is_a?(Array)
170
+ @vocab = Marshal::load(Marshal::dump(vocab)) # deep clone array
171
+ else
172
+ @vocab = File.open(vocab, 'r') { |f| f.read.split(/[\n\r]+/) }
177
173
  end
178
-
174
+
179
175
  true
180
176
  end
181
177
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ealdent-lda-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason M. Adams
@@ -40,6 +40,7 @@ files:
40
40
  - lib/lda.rb
41
41
  - lib/utils.c
42
42
  - lib/utils.h
43
+ - VERSION.yml
43
44
  has_rdoc: true
44
45
  homepage: http://github.com/ealdent/lda-ruby
45
46
  post_install_message: