ealdent-lda-ruby 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ *.o
2
+ *.bundle
3
+ *.tmproj
4
+ pkg
5
+ test/blei
data/README CHANGED
@@ -7,16 +7,15 @@ The original C code relied on files for the input and output. We felt it was nec
7
7
  Example usage:
8
8
 
9
9
  require 'lda'
10
- lda = Lda::Lda.new # create an Lda object for training
11
10
  corpus = Lda::Corpus.new("data/data_file.dat")
12
- lda.corpus = corpus
13
- lda.em("random") # run EM algorithm using random starting points
11
+ lda = Lda::Lda.new(corpus) # create an Lda object for training
12
+ lda.em("random") # run EM algorithm using random starting points
14
13
  lda.load_vocabulary("data/vocab.txt")
15
- lda.print_topics(20) # print the topic 20 words per topic
14
+ lda.print_topics(20) # print the topic 20 words per topic
16
15
 
17
- See the rdocs for further information. You can also check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the topic models mailing list, since the people who monitor that are very knowledgeable.
16
+ You can check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the topic models mailing list, since the people who monitor that are very knowledgeable.
18
17
 
19
18
 
20
19
  References
21
20
 
22
- Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022.
21
+ Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022.
@@ -8,19 +8,17 @@ The original C code relied on files for the input and output. We felt it was nec
8
8
 
9
9
  ### Example usage:
10
10
 
11
- require 'lda'
12
- lda = Lda::Lda.new # create an Lda object for training
11
+ require 'lda-ruby'
13
12
  corpus = Lda::Corpus.new("data/data_file.dat")
14
- lda.corpus = corpus
15
- lda.em("random") # run EM algorithm using random starting points
13
+ lda = Lda::Lda.new(corpus) # create an Lda object for training
14
+ lda.em("random") # run EM algorithm using random starting points
16
15
  lda.load_vocabulary("data/vocab.txt")
17
- lda.print_topics(20) # print the topic 20 words per topic
16
+ lda.print_topics(20) # print the topic 20 words per topic
18
17
 
19
- See the rdocs for further information. You can also check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the topic models mailing list, since the people who monitor that are very knowledgeable.
18
+ You can check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the [topic models mailing list][topic-models], since the people who monitor that are very knowledgeable.
20
19
 
21
20
  ## Resources
22
21
 
23
-
24
22
  + [Blog post about LDA-Ruby][lda-ruby]
25
23
  + [David Blei's lda-c code][blei]
26
24
  + [Wikipedia article on LDA][wikipedia]
@@ -29,11 +27,12 @@ See the rdocs for further information. You can also check out the mailing list f
29
27
 
30
28
  ## References
31
29
 
32
- Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022 [[pdf][pdf]].
30
+ Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022 [[pdf][pdf]].
33
31
 
34
32
  [svmlight]: http://svmlight.joachims.org
35
33
  [lda-ruby]: http://mendicantbug.com/2008/11/17/lda-in-ruby/
36
34
  [blei]: http://www.cs.princeton.edu/~blei/lda-c/
37
35
  [wikipedia]: http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation
38
36
  [ap-data]: http://www.cs.princeton.edu/~blei/lda-c/ap.tgz
39
- [pdf]: http://www.cs.princeton.edu/picasso/mats/BleiNgJordan2003_blei.pdf
37
+ [pdf]: http://www.cs.princeton.edu/picasso/mats/BleiNgJordan2003_blei.pdf
38
+ [topic-models]: https://lists.cs.princeton.edu/mailman/listinfo/topic-models
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "lda-ruby"
8
+ gem.summary = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei.}
9
+ gem.description = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
10
+ gem.email = "jasonmadams@gmail.com"
11
+ gem.homepage = "http://github.com/ealdent/lda-ruby"
12
+ gem.authors = ['David Blei', 'Jason Adams']
13
+ gem.extensions = ['ext/lda-ruby/extconf.rb']
14
+ gem.require_paths = ['lib', 'ext']
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/*_test.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/*_test.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+ task :default => :test
43
+
44
+ require 'rake/rdoctask'
45
+ Rake::RDocTask.new do |rdoc|
46
+ if File.exist?('VERSION.yml')
47
+ config = YAML.load(File.read('VERSION.yml'))
48
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
49
+ else
50
+ version = ""
51
+ end
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "lda-ruby #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
58
+
@@ -1,4 +1,4 @@
1
1
  ---
2
- :patch: 3
3
2
  :major: 0
4
- :minor: 2
3
+ :minor: 3
4
+ :patch: 0
@@ -0,0 +1,181 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = .
7
+ topdir = /home/taf2/.local/include/ruby-1.9.1
8
+ hdrdir = /home/taf2/.local/include/ruby-1.9.1
9
+ arch_hdrdir = /home/taf2/.local/include/ruby-1.9.1/$(arch)
10
+ VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
11
+ prefix = $(DESTDIR)/home/taf2/.local
12
+ exec_prefix = $(prefix)
13
+ vendorhdrdir = $(rubyhdrdir)/vendor_ruby
14
+ sitehdrdir = $(rubyhdrdir)/site_ruby
15
+ rubyhdrdir = $(includedir)/$(RUBY_INSTALL_NAME)-$(ruby_version)
16
+ vendordir = $(libdir)/$(RUBY_INSTALL_NAME)/vendor_ruby
17
+ sitedir = $(libdir)/$(RUBY_INSTALL_NAME)/site_ruby
18
+ mandir = $(datarootdir)/man
19
+ localedir = $(datarootdir)/locale
20
+ libdir = $(exec_prefix)/lib
21
+ psdir = $(docdir)
22
+ pdfdir = $(docdir)
23
+ dvidir = $(docdir)
24
+ htmldir = $(docdir)
25
+ infodir = $(datarootdir)/info
26
+ docdir = $(datarootdir)/doc/$(PACKAGE)
27
+ oldincludedir = $(DESTDIR)/usr/include
28
+ includedir = $(prefix)/include
29
+ localstatedir = $(prefix)/var
30
+ sharedstatedir = $(prefix)/com
31
+ sysconfdir = $(prefix)/etc
32
+ datadir = $(datarootdir)
33
+ datarootdir = $(prefix)/share
34
+ libexecdir = $(exec_prefix)/libexec
35
+ sbindir = $(exec_prefix)/sbin
36
+ bindir = $(exec_prefix)/bin
37
+ rubylibdir = $(libdir)/$(ruby_install_name)/$(ruby_version)
38
+ archdir = $(rubylibdir)/$(arch)
39
+ sitelibdir = $(sitedir)/$(ruby_version)
40
+ sitearchdir = $(sitelibdir)/$(sitearch)
41
+ vendorlibdir = $(vendordir)/$(ruby_version)
42
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
43
+
44
+ CC = gcc
45
+ CXX = g++
46
+ LIBRUBY = $(LIBRUBY_SO)
47
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
48
+ LIBRUBYARG_SHARED = -Wl,-R -Wl,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)
49
+ LIBRUBYARG_STATIC = -Wl,-R -Wl,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)-static
50
+ OUTFLAG = -o
51
+ COUTFLAG = -o
52
+
53
+ RUBY_EXTCONF_H =
54
+ cflags = $(optflags) $(debugflags) $(warnflags)
55
+ optflags = -O0
56
+ debugflags = -g3 -ggdb
57
+ warnflags = -Wall -Wno-parentheses
58
+ CFLAGS = -fPIC $(cflags) -fPIC -Wall -ggdb -O0
59
+ INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
60
+ DEFS =
61
+ CPPFLAGS = -D USE_RUBY $(DEFS) $(cppflags)
62
+ CXXFLAGS = $(CFLAGS) $(cxxflags)
63
+ ldflags = -L. -rdynamic -Wl,-export-dynamic
64
+ dldflags =
65
+ archflag =
66
+ DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
67
+ LDSHARED = $(CC) -shared
68
+ LDSHAREDXX = $(CXX) -shared
69
+ AR = ar
70
+ EXEEXT =
71
+
72
+ RUBY_INSTALL_NAME = ruby
73
+ RUBY_SO_NAME = ruby
74
+ arch = x86_64-linux
75
+ sitearch = x86_64-linux
76
+ ruby_version = 1.9.1
77
+ ruby = /home/taf2/.local/bin/ruby
78
+ RUBY = $(ruby)
79
+ RM = rm -f
80
+ RM_RF = $(RUBY) -run -e rm -- -rf
81
+ RMDIRS = $(RUBY) -run -e rmdir -- -p
82
+ MAKEDIRS = mkdir -p
83
+ INSTALL = /usr/bin/install -c
84
+ INSTALL_PROG = $(INSTALL) -m 0755
85
+ INSTALL_DATA = $(INSTALL) -m 644
86
+ COPY = cp
87
+
88
+ #### End of system configuration section. ####
89
+
90
+ preload =
91
+
92
+ libpath = . $(libdir)
93
+ LIBPATH = -L. -L$(libdir) -Wl,-R$(libdir)
94
+ DEFFILE =
95
+
96
+ CLEANFILES = mkmf.log
97
+ DISTCLEANFILES =
98
+ DISTCLEANDIRS =
99
+
100
+ extout =
101
+ extout_prefix =
102
+ target_prefix =
103
+ LOCAL_LIBS =
104
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -lrt -ldl -lcrypt -lm -lc
105
+ SRCS = lda-model.c lda-data.c utils.c lda-alpha.c cokus.c lda-inference.c
106
+ OBJS = lda-model.o lda-data.o utils.o lda-alpha.o cokus.o lda-inference.o
107
+ TARGET = lda_ext
108
+ DLLIB = $(TARGET).so
109
+ EXTSTATIC =
110
+ STATIC_LIB =
111
+
112
+ BINDIR = $(bindir)
113
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
114
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
115
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
116
+ HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
117
+ ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
118
+
119
+ TARGET_SO = $(DLLIB)
120
+ CLEANLIBS = $(TARGET).so
121
+ CLEANOBJS = *.o *.bak
122
+
123
+ all: $(DLLIB)
124
+ static: $(STATIC_LIB)
125
+
126
+ clean-rb-default::
127
+ clean-rb::
128
+ clean-so::
129
+ clean: clean-so clean-rb-default clean-rb
130
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
131
+
132
+ distclean-rb-default::
133
+ distclean-rb::
134
+ distclean-so::
135
+ distclean: clean distclean-so distclean-rb-default distclean-rb
136
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
137
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
138
+ @-$(RMDIRS) $(DISTCLEANDIRS)
139
+
140
+ realclean: distclean
141
+ install: install-so install-rb
142
+
143
+ install-so: $(RUBYARCHDIR)
144
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
145
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
146
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
147
+ install-rb: pre-install-rb install-rb-default
148
+ install-rb-default: pre-install-rb-default
149
+ pre-install-rb: Makefile
150
+ pre-install-rb-default: Makefile
151
+ $(RUBYARCHDIR):
152
+ $(MAKEDIRS) $@
153
+
154
+ site-install: site-install-so site-install-rb
155
+ site-install-so: install-so
156
+ site-install-rb: install-rb
157
+
158
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
159
+
160
+ .cc.o:
161
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
162
+
163
+ .cxx.o:
164
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
165
+
166
+ .cpp.o:
167
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
168
+
169
+ .C.o:
170
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
171
+
172
+ .c.o:
173
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
174
+
175
+ $(DLLIB): $(OBJS) Makefile
176
+ @-$(RM) $(@)
177
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
178
+
179
+
180
+
181
+ $(OBJS): $(hdrdir)/ruby.h $(hdrdir)/ruby/defines.h $(arch_hdrdir)/ruby/config.h
File without changes
File without changes
@@ -0,0 +1,9 @@
1
+ ENV["ARCHFLAGS"] = "-arch #{`uname -p` =~ /powerpc/ ? 'ppc' : 'i386'}"
2
+
3
+ require 'mkmf'
4
+
5
+ $CFLAGS << ' -Wall -ggdb -O0'
6
+ $defs.push( "-D USE_RUBY" )
7
+
8
+ dir_config('lda-ruby/lda')
9
+ create_makefile("lda-ruby/lda")
File without changes
File without changes
File without changes
File without changes
@@ -34,7 +34,7 @@
34
34
  #ifdef USE_RUBY
35
35
  #include "ruby.h"
36
36
 
37
- VALUE rb_mLda;
37
+ VALUE rb_cLdaModule;
38
38
  VALUE rb_cLda;
39
39
  VALUE rb_cLdaCorpus;
40
40
  VALUE rb_cLdaDocument;
@@ -83,7 +83,7 @@ double lda_inference(document* doc, lda_model* model, double* var_gamma, double*
83
83
  printf("phi for term: %d of %d\n", index, model->num_terms);
84
84
  phi[n][k] = 0.0;
85
85
  }
86
- else {
86
+ else {
87
87
  phi[n][k] =
88
88
  digamma_gam[k] +
89
89
  model->log_prob_w[k][index];
@@ -142,7 +142,7 @@ double compute_likelihood(document* doc, lda_model* model, double** phi, double*
142
142
  for (k = 0; k < model->num_topics; k++)
143
143
  {
144
144
  likelihood += (model->alpha - 1)*(dig[k] - digsum) + lgamma(var_gamma[k]) - (var_gamma[k] - 1)*(dig[k] - digsum);
145
-
145
+
146
146
  for (n = 0; n < doc->length; n++)
147
147
  {
148
148
  if (phi[n][k] > 0)
@@ -261,7 +261,7 @@ void run_em(char* start, char* directory, corpus* corpus) {
261
261
  } else {
262
262
  quiet_lda_mle(model, ss, 0);
263
263
  }
264
-
264
+
265
265
  model->alpha = INITIAL_ALPHA;
266
266
  } else if (strcmp(start, "random")==0) {
267
267
  model = new_lda_model(corpus->num_terms, NTOPICS);
@@ -499,7 +499,7 @@ void run_quiet_em(char* start, corpus* corpus) {
499
499
 
500
500
  lda_suffstats* ss = NULL;
501
501
  if (strncmp(start, "seeded",6)==0) {
502
- model = new_lda_model(corpus->num_terms, NTOPICS);
502
+ model = quiet_new_lda_model(corpus->num_terms, NTOPICS);
503
503
  model->alpha = INITIAL_ALPHA;
504
504
  ss = new_lda_suffstats(model);
505
505
  if (VERBOSE) {
@@ -513,7 +513,7 @@ void run_quiet_em(char* start, corpus* corpus) {
513
513
  quiet_lda_mle(model, ss, 0);
514
514
  }
515
515
  } else if (strncmp(start, "fixed",5)==0) {
516
- model = new_lda_model(corpus->num_terms, NTOPICS);
516
+ model = quiet_new_lda_model(corpus->num_terms, NTOPICS);
517
517
  model->alpha = INITIAL_ALPHA;
518
518
  ss = new_lda_suffstats(model);
519
519
  corpus_initialize_fixed_ss(ss, model, corpus);
@@ -523,7 +523,7 @@ void run_quiet_em(char* start, corpus* corpus) {
523
523
  quiet_lda_mle(model, ss, 0);
524
524
  }
525
525
  } else if (strncmp(start, "random",6)==0) {
526
- model = new_lda_model(corpus->num_terms, NTOPICS);
526
+ model = quiet_new_lda_model(corpus->num_terms, NTOPICS);
527
527
  model->alpha = INITIAL_ALPHA;
528
528
  ss = new_lda_suffstats(model);
529
529
  random_initialize_ss(ss, model);
@@ -605,7 +605,7 @@ void run_quiet_em(char* start, corpus* corpus) {
605
605
 
606
606
  /*
607
607
  * Set all of the settings in one command:
608
- *
608
+ *
609
609
  * * init_alpha
610
610
  * * num_topics
611
611
  * * max_iter
@@ -710,7 +710,7 @@ static VALUE wrap_get_num_topics(VALUE self) {
710
710
  */
711
711
  static VALUE wrap_set_initial_alpha(VALUE self, VALUE initial_alpha) {
712
712
  INITIAL_ALPHA = (float)NUM2DBL(initial_alpha);
713
-
713
+
714
714
  return initial_alpha;
715
715
  }
716
716
 
@@ -719,7 +719,7 @@ static VALUE wrap_set_initial_alpha(VALUE self, VALUE initial_alpha) {
719
719
  */
720
720
  static VALUE wrap_set_num_topics(VALUE self, VALUE ntopics) {
721
721
  NTOPICS = NUM2INT(ntopics);
722
-
722
+
723
723
  return ntopics;
724
724
  }
725
725
 
@@ -735,7 +735,7 @@ static VALUE wrap_get_estimate_alpha(VALUE self) {
735
735
  */
736
736
  static VALUE wrap_set_estimate_alpha(VALUE self, VALUE est_alpha) {
737
737
  ESTIMATE_ALPHA = NUM2INT(est_alpha);
738
-
738
+
739
739
  return est_alpha;
740
740
  }
741
741
 
@@ -760,7 +760,7 @@ static VALUE wrap_set_verbosity(VALUE self, VALUE verbosity) {
760
760
  } else {
761
761
  VERBOSE = FALSE;
762
762
  }
763
-
763
+
764
764
  return verbosity;
765
765
  }
766
766
 
@@ -777,7 +777,7 @@ static VALUE wrap_set_verbosity(VALUE self, VALUE verbosity) {
777
777
  static VALUE wrap_em(VALUE self, VALUE start) {
778
778
  if (!corpus_loaded)
779
779
  return Qnil;
780
-
780
+
781
781
  run_quiet_em(STR2CSTR(start), last_corpus);
782
782
 
783
783
  return Qnil;
@@ -815,7 +815,7 @@ static VALUE wrap_ruby_corpus(VALUE self, VALUE rcorpus) {
815
815
  corpus* c;
816
816
  int i = 0;
817
817
  int j = 0;
818
-
818
+
819
819
  c = malloc(sizeof(corpus));
820
820
  c->num_terms = NUM2INT(rb_iv_get(rcorpus, "@num_terms"));
821
821
  c->num_docs = NUM2INT(rb_iv_get(rcorpus, "@num_docs"));
@@ -825,7 +825,7 @@ static VALUE wrap_ruby_corpus(VALUE self, VALUE rcorpus) {
825
825
  VALUE one_doc = rb_ary_entry(doc_ary, i);
826
826
  VALUE words = rb_iv_get(one_doc, "@words");
827
827
  VALUE counts = rb_iv_get(one_doc, "@counts");
828
-
828
+
829
829
  c->docs[i].length = NUM2INT(rb_iv_get(one_doc, "@length"));
830
830
  c->docs[i].total = NUM2INT(rb_iv_get(one_doc, "@total"));
831
831
  c->docs[i].words = malloc(sizeof(int) * c->docs[i].length);
@@ -834,18 +834,18 @@ static VALUE wrap_ruby_corpus(VALUE self, VALUE rcorpus) {
834
834
  int one_word = NUM2INT(rb_ary_entry(words, j));
835
835
  int one_count = NUM2INT(rb_ary_entry(counts, j));
836
836
  if( one_word > c->num_terms ) {
837
- rb_raise(rb_eRuntimeError, "error term count(%d) less then word index(%d)", c->num_terms, one_word);
837
+ rb_raise(rb_eRuntimeError, "error term count(%d) less than word index(%d)", c->num_terms, one_word);
838
838
  }
839
839
  c->docs[i].words[j] = one_word;
840
840
  c->docs[i].counts[j] = one_count;
841
841
  }
842
842
  }
843
-
843
+
844
844
  last_corpus = c;
845
845
  corpus_loaded = TRUE;
846
-
846
+
847
847
  rb_iv_set(self, "@corpus", rcorpus);
848
-
848
+
849
849
  return Qtrue;
850
850
  }
851
851
 
@@ -856,11 +856,11 @@ static VALUE wrap_ruby_corpus(VALUE self, VALUE rcorpus) {
856
856
  static VALUE wrap_get_gamma(VALUE self) {
857
857
  if (!model_loaded)
858
858
  return Qnil;
859
-
859
+
860
860
  // last_gamma is a double[num_docs][num_topics]
861
861
  VALUE arr;
862
862
  int i = 0, j = 0;
863
-
863
+
864
864
  arr = rb_ary_new2(last_corpus->num_docs);
865
865
  for (i = 0; i < last_corpus->num_docs; i++) {
866
866
  VALUE arr2 = rb_ary_new2(last_model->num_topics);
@@ -869,7 +869,7 @@ static VALUE wrap_get_gamma(VALUE self) {
869
869
  }
870
870
  rb_ary_store(arr, i, arr2);
871
871
  }
872
-
872
+
873
873
  return arr;
874
874
  }
875
875
 
@@ -882,31 +882,31 @@ static VALUE wrap_get_gamma(VALUE self) {
882
882
  static VALUE wrap_get_phi(VALUE self) {
883
883
  if (!model_loaded)
884
884
  return Qnil;
885
-
885
+
886
886
  VALUE arr = rb_ary_new2(last_corpus->num_docs);
887
887
  int i = 0, j = 0, k = 0;
888
-
888
+
889
889
  //int max_length = max_corpus_length(last_corpus);
890
890
  short error = 0;
891
-
891
+
892
892
  for (i = 0; i < last_corpus->num_docs; i++) {
893
893
  VALUE arr1 = rb_ary_new2(last_corpus->docs[i].length);
894
-
894
+
895
895
  lda_inference(&(last_corpus->docs[i]), last_model, last_gamma[i], last_phi, &error);
896
-
896
+
897
897
  for (j = 0; j < last_corpus->docs[i].length; j++) {
898
898
  VALUE arr2 = rb_ary_new2(last_model->num_topics);
899
-
899
+
900
900
  for (k = 0; k < last_model->num_topics; k++) {
901
901
  rb_ary_store(arr2, k, rb_float_new(last_phi[j][k]));
902
902
  }
903
-
903
+
904
904
  rb_ary_store(arr1, j, arr2);
905
905
  }
906
-
906
+
907
907
  rb_ary_store(arr, i, arr1);
908
908
  }
909
-
909
+
910
910
  return arr;
911
911
  }
912
912
 
@@ -918,11 +918,11 @@ static VALUE wrap_get_phi(VALUE self) {
918
918
  static VALUE wrap_get_model_beta(VALUE self) {
919
919
  if (!model_loaded)
920
920
  return Qnil;
921
-
921
+
922
922
  // beta is a double[num_topics][num_terms]
923
923
  VALUE arr;
924
924
  int i = 0, j = 0;
925
-
925
+
926
926
  arr = rb_ary_new2(last_model->num_topics);
927
927
  for (i = 0; i < last_model->num_topics; i++) {
928
928
  VALUE arr2 = rb_ary_new2(last_model->num_terms);
@@ -931,7 +931,7 @@ static VALUE wrap_get_model_beta(VALUE self) {
931
931
  }
932
932
  rb_ary_store(arr, i, arr2);
933
933
  }
934
-
934
+
935
935
  return arr;
936
936
  }
937
937
 
@@ -944,28 +944,27 @@ static VALUE wrap_get_model_settings(VALUE self) {
944
944
  return Qnil;
945
945
 
946
946
  VALUE arr;
947
-
947
+
948
948
  arr = rb_ary_new();
949
949
  rb_ary_push(arr, rb_int_new(last_model->num_topics));
950
950
  rb_ary_push(arr, rb_int_new(last_model->num_terms));
951
951
  rb_ary_push(arr, rb_float_new(last_model->alpha));
952
-
952
+
953
953
  return arr; // [num_topics, num_terms, alpha]
954
954
  }
955
955
 
956
956
 
957
- void Init_lda_ext() {
957
+ void Init_lda() {
958
958
  corpus_loaded = FALSE;
959
959
  model_loaded = FALSE;
960
960
  VERBOSE = TRUE;
961
961
 
962
- rb_require("lda");
963
-
964
- rb_mLda = rb_define_module("Lda");
965
- rb_cLda = rb_define_class_under(rb_mLda, "Lda", rb_cObject);
966
- rb_cLdaCorpus = rb_define_class_under(rb_mLda, "Corpus", rb_cObject);
967
- rb_cLdaDocument = rb_define_class_under(rb_mLda, "Document", rb_cObject);
962
+ rb_require("lda-ruby");
968
963
 
964
+ rb_cLdaModule = rb_define_module("Lda");
965
+ rb_cLda = rb_define_class_under(rb_cLdaModule, "Lda", rb_cObject);
966
+ rb_cLdaCorpus = rb_define_class_under(rb_cLdaModule, "Corpus", rb_cObject);
967
+ rb_cLdaDocument = rb_define_class_under(rb_cLdaModule, "Document", rb_cObject);
969
968
 
970
969
  // method to load the corpus
971
970
  rb_define_method(rb_cLda, "fast_load_corpus_from_file", wrap_load_corpus, 1);
@@ -988,7 +987,7 @@ void Init_lda_ext() {
988
987
  rb_define_method(rb_cLda, "em_max_iter", wrap_get_em_max_iter, 0);
989
988
  rb_define_method(rb_cLda, "em_max_iter=", wrap_set_em_max_iter, 1);
990
989
  rb_define_method(rb_cLda, "em_convergence", wrap_get_em_converged, 0);
991
- rb_define_method(rb_cLda, "em_convergence=", wrap_set_em_converged, 1);
990
+ rb_define_method(rb_cLda, "em_convergence=", wrap_set_em_converged, 1);
992
991
  rb_define_method(rb_cLda, "init_alpha=", wrap_set_initial_alpha, 1);
993
992
  rb_define_method(rb_cLda, "init_alpha", wrap_get_initial_alpha, 0);
994
993
  rb_define_method(rb_cLda, "est_alpha=", wrap_set_estimate_alpha, 1);