RubyGems - ealdent-lda-ruby - Versions diffs - 0.2.2 → 0.2.3 - Mend

ealdent-lda-ruby 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/README.markdown ADDED

@@ -0,0 +1,39 @@
+# Latent Dirichlet Allocation – Ruby Wrapper
+## What is LDA-Ruby?
+This wrapper is based on C-code by David M. Blei. In a nutshell, it can be used to automatically cluster documents into topics. The number of topics are chosen beforehand and the topics found are usually fairly intuitive. Details of the implementation can be found in the paper by Blei, Ng, and Jordan.
+The original C code relied on files for the input and output. We felt it was necessary to depart from that model and use Ruby objects for these steps instead. The only file necessary will be the data file (in a format similar to that used by [SVMlight][svmlight]). Optionally you may need a vocabulary file to be able to extract the words belonging to topics.
+### Example usage:
+    require 'lda'
+    lda = Lda::Lda.new      # create an Lda object for training
+    corpus = Lda::Corpus.new("data/data_file.dat")
+    lda.corpus = corpus
+    lda.em("random")        # run EM algorithm using random starting points
+    lda.load_vocabulary("data/vocab.txt")
+    lda.print_topics(20)    # print the topic 20 words per topic
+See the rdocs for further information. You can also check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the topic models mailing list, since the people who monitor that are very knowledgeable.
+## Resources
++ [Blog post about LDA-Ruby][lda-ruby]
++ [David Blei's lda-c code][blei]
++ [Wikipedia article on LDA][wikipedia]
++ [Sample AP data][ap-data]
+## References
+Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022 [[pdf][pdf]].
+[svmlight]: http://svmlight.joachims.org
+[lda-ruby]: http://mendicantbug.com/2008/11/17/lda-in-ruby/
+[blei]: http://www.cs.princeton.edu/~blei/lda-c/
+[wikipedia]: http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation
+[ap-data]: http://www.cs.princeton.edu/~blei/lda-c/ap.tgz
+[pdf]: http://www.cs.princeton.edu/picasso/mats/BleiNgJordan2003_blei.pdf

data/VERSION.yml CHANGED

@@ -1,4 +1,4 @@
 ---
-:minor: 2
-:patch: 2
+:patch: 3
 :major: 0
+:minor: 2

data/lib/extconf.rb CHANGED

@@ -1,4 +1,7 @@
 require 'mkmf'
+$CFLAGS << ' -Wall -ggdb -O0'
+$defs.push( "-D USE_RUBY" )
 dir_config("lda_ext")
 create_makefile("lda_ext")

data/lib/lda-inference.c CHANGED

@@ -17,10 +17,6 @@
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 // USA
-#ifndef USE_RUBY
-#define USE_RUBY
-#endif
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
@@ -50,13 +46,17 @@ VALUE rb_cLdaDocument;
  * variational inference
  */
-double lda_inference(document* doc, lda_model* model, double* var_gamma, double** phi) {
+double lda_inference(document* doc, lda_model* model, double* var_gamma, double** phi, short* errors) {
 	double converged = 1;
 	double phisum = 0, likelihood = 0;
 	double likelihood_old = 0, oldphi[model->num_topics];
-	int k, n, var_iter;
+	int k = 0, n = 0, var_iter = 0, index = 0;
 	double digamma_gam[model->num_topics];
+  /* zero'em out */
+  memset(digamma_gam,0.0,sizeof(digamma_gam));
+  memset(oldphi,0.0,sizeof(oldphi));
 		// compute posterior dirichlet
 	for (k = 0; k < model->num_topics; k++)
@@ -78,9 +78,16 @@ double lda_inference(document* doc, lda_model* model, double* var_gamma, double*
 			for (k = 0; k < model->num_topics; k++)
 			{
 				oldphi[k] = phi[n][k];
-				phi[n][k] =
-					digamma_gam[k] +
-					model->log_prob_w[k][doc->words[n]];
+        index = doc->words[n];
+        if( index < 0 || index > model->num_terms ) {
+          printf("phi for term: %d of %d\n", index, model->num_terms);
+				  phi[n][k] = 0.0;
+        }
+        else {
+				  phi[n][k] =
+				  	digamma_gam[k] +
+				  	model->log_prob_w[k][index];
+        }
 				if (k > 0)
 					phisum = log_sum(phisum, phi[n][k]);
@@ -100,7 +107,8 @@ double lda_inference(document* doc, lda_model* model, double* var_gamma, double*
 		}
 		likelihood = compute_likelihood(doc, model, phi, var_gamma);
-		assert(!isnan(likelihood));
+		//assert(!isnan(likelihood));
+    if( isnan(likelihood) ) { *errors = 1; }
 		converged = (likelihood_old - likelihood) / likelihood_old;
 		likelihood_old = likelihood;
@@ -116,7 +124,8 @@ double lda_inference(document* doc, lda_model* model, double* var_gamma, double*
 double compute_likelihood(document* doc, lda_model* model, double** phi, double* var_gamma) {
 	double likelihood = 0, digsum = 0, var_gamma_sum = 0, dig[model->num_topics];
-	int k, n;
+	int k = 0, n = 0, index = 0;
+  memset(dig,0.0,sizeof(dig));
 	for (k = 0; k < model->num_topics; k++)
 	{
@@ -125,7 +134,10 @@ double compute_likelihood(document* doc, lda_model* model, double** phi, double*
 	}
 	digsum = digamma(var_gamma_sum);
-	likelihood = lgamma(model->alpha * model -> num_topics) - model -> num_topics * lgamma(model->alpha) - (lgamma(var_gamma_sum));
+	likelihood = lgamma(model->alpha * model->num_topics) -
+               model->num_topics *
+               lgamma(model->alpha) -
+               lgamma(var_gamma_sum);
 	for (k = 0; k < model->num_topics; k++)
 	{
@@ -135,9 +147,10 @@ double compute_likelihood(document* doc, lda_model* model, double** phi, double*
 		{
 			if (phi[n][k] > 0)
 			{
+        index = doc->words[n];
 				likelihood += doc->counts[n]*
 					(phi[n][k]*((dig[k] - digsum) - log(phi[n][k])
-					+ model->log_prob_w[k][doc->words[n]]));
+					+ model->log_prob_w[k][index]));
 			}
 		}
 	}
@@ -148,10 +161,13 @@ double compute_likelihood(document* doc, lda_model* model, double** phi, double*
 double doc_e_step(document* doc, double* gamma, double** phi, lda_model* model, lda_suffstats* ss) {
 	double likelihood;
 	int n, k;
+  short error = 0;
+  // posterior inference
-		// posterior inference
+	likelihood = lda_inference(doc, model, gamma, phi, &error);
+  if (error) { likelihood = 0.0; }
-	likelihood = lda_inference(doc, model, gamma, phi);
 		// update sufficient statistics
@@ -221,6 +237,7 @@ void run_em(char* start, char* directory, corpus* corpus) {
 	// allocate variational parameters
 	var_gamma = malloc(sizeof(double*)*(corpus->num_docs));
 	for (d = 0; d < corpus->num_docs; d++)
 		var_gamma[d] = malloc(sizeof(double) * NTOPICS);
@@ -279,23 +296,22 @@ void run_em(char* start, char* directory, corpus* corpus) {
 		zero_initialize_ss(ss, model);
 		// e-step
+    printf("e-step\n");
 		for (d = 0; d < corpus->num_docs; d++) {
 			if ((d % 1000) == 0 && VERBOSE) printf("document %d\n",d);
 			likelihood += doc_e_step(&(corpus->docs[d]), var_gamma[d], phi, model, ss);
 		}
+    printf("m-step\n");
 		// m-step
-        if (VERBOSE) {
-            lda_mle(model, ss, ESTIMATE_ALPHA);
-        } else {
-            quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
-        }
+    if (VERBOSE) {
+      lda_mle(model, ss, ESTIMATE_ALPHA);
+    } else {
+      quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
+    }
 		// check for convergence
 		converged = (likelihood_old - likelihood) / (likelihood_old);
 		if (converged < 0) VAR_MAX_ITER = VAR_MAX_ITER * 2;
 		likelihood_old = likelihood;
@@ -324,10 +340,15 @@ void run_em(char* start, char* directory, corpus* corpus) {
 	sprintf(filename, "%s/word-assignments.dat", directory);
 	FILE* w_asgn_file = fopen(filename, "w");
+  short error = 0;
+  double tl = 0.0;
 	for (d = 0; d < corpus->num_docs; d++)
 	{
 		if ((d % 100) == 0 && VERBOSE) printf("final e step document %d\n",d);
-		likelihood += lda_inference(&(corpus->docs[d]), model, var_gamma[d], phi);
+    error = 0;
+    tl = lda_inference(&(corpus->docs[d]), model, var_gamma[d], phi,&error);
+    if( error ) { continue; }
+		likelihood += tl;
 		write_word_assignment(w_asgn_file, &(corpus->docs[d]), phi, model);
 	}
 	fclose(w_asgn_file);
@@ -388,7 +409,8 @@ void infer(char* model_root, char* save, corpus* corpus) {
 		phi = (double**) malloc(sizeof(double*) * doc->length);
 		for (n = 0; n < doc->length; n++)
 			phi[n] = (double*) malloc(sizeof(double) * model->num_topics);
-		likelihood = lda_inference(doc, model, var_gamma[d], phi);
+    short error = 0;
+		likelihood = lda_inference(doc, model, var_gamma[d], phi, &error);
 		fprintf(fileptr, "%5.5f\n", likelihood);
 	}
@@ -448,58 +470,68 @@ int main(int argc, char* argv[]) {
 /* */
 void run_quiet_em(char* start, corpus* corpus) {
-	int d, n;
+	int d = 0, n = 0;
 	lda_model *model = NULL;
-	double **var_gamma, **phi;
+	double **var_gamma = NULL, **phi = NULL;
+	// last_gamma is a double[num_docs][num_topics]
 	// allocate variational parameters
-	var_gamma = malloc(sizeof(double*)*(corpus->num_docs));
-	for (d = 0; d < corpus->num_docs; d++)
-		var_gamma[d] = malloc(sizeof(double) * NTOPICS);
+	var_gamma = (double**)malloc(sizeof(double*)*(corpus->num_docs));
+  memset(var_gamma, 0.0, corpus->num_docs);
+	for (d = 0; d < corpus->num_docs; ++d) {
+		var_gamma[d] = (double*)malloc(sizeof(double) * NTOPICS);
+    memset(var_gamma[d], 0.0, sizeof(double)*NTOPICS);
+  }
 	int max_length = max_corpus_length(corpus);
-	phi = malloc(sizeof(double*)*max_length);
-	for (n = 0; n < max_length; n++)
-		phi[n] = malloc(sizeof(double) * NTOPICS);
+	phi = (double**)malloc(sizeof(double*)*max_length);
+  memset(phi, 0.0, max_length);
+	for (n = 0; n < max_length; ++n) {
+		phi[n] = (double*)malloc(sizeof(double) * NTOPICS);
+    memset(phi[n], 0.0, sizeof(double)*NTOPICS);
+  }
 	// initialize model
 	lda_suffstats* ss = NULL;
-	if (strcmp(start, "seeded")==0) {
+	if (strncmp(start, "seeded",6)==0) {
 		model = new_lda_model(corpus->num_terms, NTOPICS);
-		ss = new_lda_suffstats(model);
-		if (VERBOSE) {
-		    corpus_initialize_ss(ss, model, corpus);
-	    } else {
-            quiet_corpus_initialize_ss(ss, model, corpus);
-	    }
-		if (VERBOSE) {
-		    lda_mle(model, ss, 0);
-		} else {
-		    quiet_lda_mle(model, ss, 0);
-		}
 		model->alpha = INITIAL_ALPHA;
-	} else if (strcmp(start, "fixed")==0) {
-	    model = new_lda_model(corpus->num_terms, NTOPICS);
 		ss = new_lda_suffstats(model);
-		corpus_initialize_fixed_ss(ss, model, corpus);
 		if (VERBOSE) {
-		    lda_mle(model, ss, 0);
+      corpus_initialize_ss(ss, model, corpus);
+    } else {
+      quiet_corpus_initialize_ss(ss, model, corpus);
+    }
+		if (VERBOSE) {
+      lda_mle(model, ss, 0);
 		} else {
-		    quiet_lda_mle(model, ss, 0);
+      quiet_lda_mle(model, ss, 0);
 		}
-		model->alpha = INITIAL_ALPHA;
-	} else if (strcmp(start, "random")==0) {
+	} else if (strncmp(start, "fixed",5)==0) {
+	  model = new_lda_model(corpus->num_terms, NTOPICS);
+    model->alpha = INITIAL_ALPHA;
+	  ss = new_lda_suffstats(model);
+	  corpus_initialize_fixed_ss(ss, model, corpus);
+    if (VERBOSE) {
+      lda_mle(model, ss, 0);
+    } else {
+      quiet_lda_mle(model, ss, 0);
+    }
+	} else if (strncmp(start, "random",6)==0) {
 		model = new_lda_model(corpus->num_terms, NTOPICS);
+		model->alpha = INITIAL_ALPHA;
 		ss = new_lda_suffstats(model);
 		random_initialize_ss(ss, model);
 		if (VERBOSE) {
-		    lda_mle(model, ss, 0);
+      lda_mle(model, ss, 0);
 		} else {
-		    quiet_lda_mle(model, ss, 0);
+      quiet_lda_mle(model, ss, 0);
 		}
-		model->alpha = INITIAL_ALPHA;
 	} else {
 		model = load_lda_model(start);
 		ss = new_lda_suffstats(model);
@@ -512,12 +544,11 @@ void run_quiet_em(char* start, corpus* corpus) {
 	// run expectation maximization
 	int i = 0;
-	double likelihood, likelihood_old = 0, converged = 1;
+	double likelihood = 0.0, likelihood_old = 0, converged = 1;
 	while (((converged < 0) || (converged > EM_CONVERGED) || (i <= 2)) && (i <= EM_MAX_ITER)) {
 		i++;
-		if (VERBOSE)
-		    printf("**** em iteration %d ****\n", i);
+		if (VERBOSE) printf("**** em iteration %d ****\n", i);
 		likelihood = 0;
 		zero_initialize_ss(ss, model);
@@ -529,12 +560,11 @@ void run_quiet_em(char* start, corpus* corpus) {
 		}
 		// m-step
-        if (VERBOSE) {
-            lda_mle(model, ss, ESTIMATE_ALPHA);
-        } else {
-            quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
-        }
+    if (VERBOSE) {
+      lda_mle(model, ss, ESTIMATE_ALPHA);
+    } else {
+      quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
+    }
 		// check for convergence
@@ -546,14 +576,16 @@ void run_quiet_em(char* start, corpus* corpus) {
 		last_model = model;
 		last_gamma = var_gamma;
-        last_phi = phi;
+    last_phi = phi;
 	}
 	// output the final model
 	last_model = model;
 	last_gamma = var_gamma;
-    last_phi = phi;
+  last_phi = phi;
+  free_lda_suffstats(model,ss);
 	// output the word assignments (for visualization)
 	/*
@@ -585,6 +617,7 @@ void run_quiet_em(char* start, corpus* corpus) {
 static VALUE wrap_set_config(VALUE self, VALUE init_alpha, VALUE num_topics, VALUE max_iter, VALUE convergence, VALUE em_max_iter, VALUE em_convergence, VALUE est_alpha) {
 	INITIAL_ALPHA = NUM2DBL(init_alpha);
 	NTOPICS = NUM2INT(num_topics);
+  if( NTOPICS < 0 ) { rb_raise(rb_eRuntimeError, "NTOPICS must be greater than 0 - %d", NTOPICS); }
 	VAR_MAX_ITER = NUM2INT(max_iter);
 	VAR_CONVERGED = (float)NUM2DBL(convergence);
 	EM_MAX_ITER = NUM2INT(em_max_iter);
@@ -798,8 +831,11 @@ static VALUE wrap_ruby_corpus(VALUE self, VALUE rcorpus) {
 		c->docs[i].words = malloc(sizeof(int) * c->docs[i].length);
 		c->docs[i].counts = malloc(sizeof(int) * c->docs[i].length);
 		for (j = 0; j < c->docs[i].length; j++) {
-			VALUE one_word = NUM2INT(rb_ary_entry(words, j));
-			VALUE one_count = NUM2INT(rb_ary_entry(counts, j));
+			int one_word = NUM2INT(rb_ary_entry(words, j));
+			int one_count = NUM2INT(rb_ary_entry(counts, j));
+      if( one_word > c->num_terms ) {
+        rb_raise(rb_eRuntimeError, "error term count(%d) less then word index(%d)", c->num_terms, one_word);
+      }
 			c->docs[i].words[j] = one_word;
 			c->docs[i].counts[j] = one_count;
 		}
@@ -850,12 +886,13 @@ static VALUE wrap_get_phi(VALUE self) {
     VALUE arr = rb_ary_new2(last_corpus->num_docs);
     int i = 0, j = 0, k = 0;
-    int max_length = max_corpus_length(last_corpus);
+    //int max_length = max_corpus_length(last_corpus);
+    short error = 0;
     for (i = 0; i < last_corpus->num_docs; i++) {
         VALUE arr1 = rb_ary_new2(last_corpus->docs[i].length);
-        lda_inference(&(last_corpus->docs[i]), last_model, last_gamma[i], last_phi);
+        lda_inference(&(last_corpus->docs[i]), last_model, last_gamma[i], last_phi, &error);
         for (j = 0; j < last_corpus->docs[i].length; j++) {
             VALUE arr2 = rb_ary_new2(last_model->num_topics);
@@ -968,4 +1005,4 @@ void Init_lda_ext() {
   rb_define_method(rb_cLda, "model", wrap_get_model_settings, 0);
 }
-#endif
+#endif

data/lib/lda-inference.h CHANGED

@@ -4,7 +4,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <float.h>
-#include <assert.h>
 #include "lda.h"
 #include "utils.h"
@@ -31,7 +30,7 @@ enum BOOL { FALSE, TRUE } corpus_loaded, model_loaded, VERBOSE;
-double lda_inference(document*, lda_model*, double*, double**);
+double lda_inference(document*, lda_model*, double*, double**, short*);
 double compute_likelihood(document*, lda_model*, double**, double*);
@@ -61,4 +60,4 @@ void infer(char* model_root,
            corpus* corpus);
-#endif
+#endif

data/lib/lda-model.c CHANGED

@@ -18,6 +18,7 @@
 // USA
 #include "lda-model.h"
+#include <string.h>
 /*
@@ -88,25 +89,38 @@ void quiet_lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha) {
 */
 lda_suffstats* new_lda_suffstats(lda_model* model) {
+	register int i;
 	int num_topics = model->num_topics;
 	int num_terms = model->num_terms;
-	int i,j;
-	lda_suffstats* ss = malloc(sizeof(lda_suffstats));
-	ss->class_total = malloc(sizeof(double)*num_topics);
-	ss->class_word = malloc(sizeof(double*)*num_topics);
-	for (i = 0; i < num_topics; i++)
-	{
+	lda_suffstats* ss = (lda_suffstats*)malloc(sizeof(lda_suffstats));
+  memset(ss,0,sizeof(lda_suffstats));
+	ss->class_total = (double*)malloc(sizeof(double)*num_topics);
+	ss->class_word = (double**)malloc(sizeof(double*)*num_topics);
+	for (i = 0; i < num_topics; ++i) {
 		ss->class_total[i] = 0;
-		ss->class_word[i] = malloc(sizeof(double)*num_terms);
-		for (j = 0; j < num_terms; j++)
-		{
-			ss->class_word[i][j] = 0;
-		}
+		ss->class_word[i] = (double*)malloc(sizeof(double)*num_terms);
+    memset(ss->class_word[i],0.0,sizeof(double)*num_terms);
 	}
 	return(ss);
 }
+/*
+ * deallocate new lda suffstats
+ *
+ */
+void free_lda_suffstats(lda_model* model, lda_suffstats* ss) {
+  int i;
+	int num_topics = model->num_topics;
+  free(ss->class_total);
+	for (i = 0; i < num_topics; ++i) {
+    free(ss->class_word[i]);
+  }
+  free(ss->class_word);
+  free(ss);
+}
 /*
 * various intializations for the sufficient statistics
@@ -132,6 +146,7 @@ void random_initialize_ss(lda_suffstats* ss, lda_model* model) {
 	int num_topics = model->num_topics;
 	int num_terms = model->num_terms;
 	int k, n;
 	for (k = 0; k < num_topics; k++)
 	{
 		for (n = 0; n < num_terms; n++)
@@ -221,7 +236,7 @@ void corpus_initialize_fixed_ss(lda_suffstats* ss, lda_model* model, corpus* c)
 */
 lda_model* new_lda_model(int num_terms, int num_topics) {
-	int i,j;
+	int i;
 	lda_model* model;
 	model = malloc(sizeof(lda_model));
@@ -229,11 +244,11 @@ lda_model* new_lda_model(int num_terms, int num_topics) {
 	model->num_terms = num_terms;
 	model->alpha = 1.0;
 	model->log_prob_w = malloc(sizeof(double*)*num_topics);
+  printf("new model with: %d topics and %d terms\n", num_topics, num_terms);
 	for (i = 0; i < num_topics; i++)
 	{
 		model->log_prob_w[i] = malloc(sizeof(double)*num_terms);
-		for (j = 0; j < num_terms; j++)
-			model->log_prob_w[i][j] = 0;
+    memset(model->log_prob_w[i],0,sizeof(double)*num_terms);
 	}
 	return(model);
 }

data/lib/lda-model.h CHANGED

@@ -16,6 +16,7 @@ void free_lda_model(lda_model*);
 void save_lda_model(lda_model*, char*);
 lda_model* new_lda_model(int, int);
 lda_suffstats* new_lda_suffstats(lda_model* model);
+void free_lda_suffstats(lda_model* model, lda_suffstats* ss);
 void corpus_initialize_ss(lda_suffstats* ss, lda_model* model, corpus* c);
 void quiet_corpus_initialize_ss(lda_suffstats* ss, lda_model* model, corpus* c);
 void corpus_initialize_fixed_ss(lda_suffstats* ss, lda_model* model, corpus* c);

data/lib/lda.rb CHANGED

@@ -30,10 +30,10 @@ module Lda
     def add_document(doc)
       if doc.is_a?(Document)
         @documents << doc
-        @all_terms = @all_terms + doc.words
+        @all_terms += doc.words
       elsif doc.is_a?(String)
         d = Document.new(doc)
-        @all_terms = @all_terms + d.words
+        @all_terms += d.words
         @documents << d
       end
       @num_docs += 1
@@ -51,11 +51,25 @@ module Lda
       true
     end
   end
+  class BaseDocument
+    def words
+      raise NotSupportedError
+    end
+    def length
+      raise NotSupportedError
+    end
+    def total
+      raise NotSupportedError
+    end
+  end
   #
   # A single document.
   #
-  class Document
+  class Document < BaseDocument
     attr_accessor :words, :counts
     attr_reader :length, :total
@@ -222,26 +236,20 @@ module Lda
         return nil
       end
-      # Load the model
-      beta = self.beta
-      unless beta
-        puts "Model has not been run."
-        return nil
-      end
       # find the highest scoring words per topic
       topics = Hash.new
-      indices = (0..(@vocab.size - 1)).to_a
-      topic_num = 0
-      beta.each do |topic|
-        topics[topic_num] = Array.new
-        indices.sort! {|x, y| -(topic[x] <=> topic[y])}
-        words_per_topic.times do |i|
-          topics[topic_num] << @vocab[indices[i]]
+      indices = (0...@vocab.size).to_a
+      begin
+        beta.each_with_index do |topic, topic_idx|
+          indices.sort! {|x, y| -(topic[x] <=> topic[y])}
+          topics[topic_idx] = indices.first(words_per_topic).map { |i| @vocab[i] }
         end
-        topic_num += 1
+      rescue NoMethodError
+        puts "Error:  model has not been run."
+        topics = nil
       end
       topics
     end
@@ -308,4 +316,4 @@ module Lda
 end
 # load the c-side stuff
-require 'lda_ext'
+require 'lda_ext'

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ealdent-lda-ruby
 version: !ruby/object:Gem::Version
-  version: 0.2.2
+  version: 0.2.3
 platform: ruby
 authors:
 - Jason M. Adams
@@ -10,21 +10,31 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-01-24 00:00:00 -08:00
+date: 2009-07-19 00:00:00 -07:00
 default_executable:
-dependencies: []
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: stemmer
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0"
+    version:
 description:
 email: jasonmadams@gmail.com
 executables: []
 extensions:
 - lib/extconf.rb
-extra_rdoc_files: []
+extra_rdoc_files:
+- README
+- README.markdown
 files:
 - README
-- license.txt
+- VERSION.yml
 - lib/cokus.c
 - lib/cokus.h
 - lib/extconf.rb
@@ -40,12 +50,12 @@ files:
 - lib/lda.rb
 - lib/utils.c
 - lib/utils.h
-- VERSION.yml
-has_rdoc: true
+- license.txt
+- README.markdown
+has_rdoc: false
 homepage: http://github.com/ealdent/lda-ruby
 post_install_message:
 rdoc_options:
-- --inline-source
 - --charset=UTF-8
 require_paths:
 - lib
@@ -66,7 +76,7 @@ requirements: []
 rubyforge_project:
 rubygems_version: 1.2.0
 signing_key:
-specification_version: 2
+specification_version: 3
 summary: Ruby port of Latent Dirichlet Allocation by David M. Blei.
 test_files: []