RubyGems - ealdent-lda-ruby - Versions diffs - 0.1.2 → 0.1.3 - Mend

ealdent-lda-ruby 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/{cokus.c → lib/cokus.c} +0 -0
data/{cokus.h → lib/cokus.h} +0 -0
data/{extconf.rb → lib/extconf.rb} +0 -0
data/{lda-alpha.c → lib/lda-alpha.c} +28 -0
data/{lda-alpha.h → lib/lda-alpha.h} +2 -1
data/{lda-data.c → lib/lda-data.c} +0 -0
data/{lda-data.h → lib/lda-data.h} +0 -0
data/{lda-inference.c → lib/lda-inference.c} +70 -13
data/{lda-inference.h → lib/lda-inference.h} +8 -2
data/{lda-model.c → lib/lda-model.c} +30 -0
data/{lda-model.h → lib/lda-model.h} +1 -1
data/{lda.h → lib/lda.h} +0 -0
data/{lda.rb → lib/lda.rb} +0 -0
data/{utils.c → lib/utils.c} +0 -0
data/{utils.h → lib/utils.h} +0 -0
metadata +19 -19

data/{cokus.c → lib/cokus.c} RENAMED Viewed

File without changes

data/{cokus.h → lib/cokus.h} RENAMED Viewed

File without changes

data/{extconf.rb → lib/extconf.rb} RENAMED Viewed

File without changes

data/{lda-alpha.c → lib/lda-alpha.c} RENAMED Viewed

@@ -66,3 +66,31 @@ double opt_alpha(double ss, int D, int K)
     while ((fabs(df) > NEWTON_THRESH) && (iter < MAX_ALPHA_ITER));
     return(exp(log_a));
 }
+double quiet_opt_alpha(double ss, int D, int K)
+{
+    double a, log_a, init_a = 100;
+    double f, df, d2f;
+    int iter = 0;
+    log_a = log(init_a);
+    do
+    {
+        iter++;
+        a = exp(log_a);
+        if (isnan(a))
+        {
+            init_a = init_a * 10;
+            //printf("warning : alpha is nan; new init = %5.5f\n", init_a);
+            a = init_a;
+            log_a = log(a);
+        }
+        f = alhood(a, ss, D, K);
+        df = d_alhood(a, ss, D, K);
+        d2f = d2_alhood(a, D, K);
+        log_a = log_a - df/(d2f * a + df);
+        //printf("alpha maximization : %5.5f   %5.5f\n", f, df);
+    }
+    while ((fabs(df) > NEWTON_THRESH) && (iter < MAX_ALPHA_ITER));
+    return(exp(log_a));
+}

data/{lda-alpha.h → lib/lda-alpha.h} RENAMED Viewed

@@ -15,6 +15,7 @@ double alhood(double a, double ss, int D, int K);
 double d_alhood(double a, double ss, int D, int K);
 double d2_alhood(double a, int D, int K);
 double opt_alpha(double ss, int D, int K);
-void maximize_alpha(double** gamma, lda_model* model, int num_docs);
+double quiet_opt_alpha(double ss, int D, int K);
+//void maximize_alpha(double** gamma, lda_model* model, int num_docs);
 #endif

data/{lda-data.c → lib/lda-data.c} RENAMED Viewed

File without changes

data/{lda-data.h → lib/lda-data.h} RENAMED Viewed

File without changes

data/{lda-inference.c → lib/lda-inference.c} RENAMED Viewed

@@ -32,7 +32,6 @@
 #include "lda-data.h"
 #include "lda-inference.h"
 #include "lda-model.h"
-#include "lda-alpha.h"
 #include "utils.h"
 #include "cokus.h"
@@ -45,6 +44,8 @@ VALUE rb_cLdaCorpus;
 VALUE rb_cLdaDocument;
 #endif
 /*
  * variational inference
  */
@@ -238,13 +239,22 @@ void run_em(char* start, char* directory, corpus* corpus) {
 		model = new_lda_model(corpus->num_terms, NTOPICS);
 		ss = new_lda_suffstats(model);
 		corpus_initialize_ss(ss, model, corpus);
-		lda_mle(model, ss, 0);
+		if (VERBOSE) {
+		    lda_mle(model, ss, 0);
+	    } else {
+            quiet_lda_mle(model, ss, 0);
+	    }
 		model->alpha = INITIAL_ALPHA;
 	} else if (strcmp(start, "random")==0) {
 		model = new_lda_model(corpus->num_terms, NTOPICS);
 		ss = new_lda_suffstats(model);
 		random_initialize_ss(ss, model);
-		lda_mle(model, ss, 0);
+		if (VERBOSE) {
+		    lda_mle(model, ss, 0);
+	    } else {
+	        quiet_lda_mle(model, ss, 0);
+	    }
 		model->alpha = INITIAL_ALPHA;
 	} else {
 		model = load_lda_model(start);
@@ -263,20 +273,26 @@ void run_em(char* start, char* directory, corpus* corpus) {
 	while (((converged < 0) || (converged > EM_CONVERGED) || (i <= 2)) && (i <= EM_MAX_ITER)) {
 		i++;
-		printf("**** em iteration %d ****\n", i);
+		if (VERBOSE)
+		    printf("**** em iteration %d ****\n", i);
 		likelihood = 0;
 		zero_initialize_ss(ss, model);
 		// e-step
 		for (d = 0; d < corpus->num_docs; d++) {
-			if ((d % 1000) == 0) printf("document %d\n",d);
+			if ((d % 1000) == 0 && VERBOSE) printf("document %d\n",d);
 			likelihood += doc_e_step(&(corpus->docs[d]), var_gamma[d], phi, model, ss);
 		}
 		// m-step
-		lda_mle(model, ss, ESTIMATE_ALPHA);
+        if (VERBOSE) {
+            lda_mle(model, ss, ESTIMATE_ALPHA);
+        } else {
+            quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
+        }
 		// check for convergence
@@ -310,7 +326,7 @@ void run_em(char* start, char* directory, corpus* corpus) {
 	FILE* w_asgn_file = fopen(filename, "w");
 	for (d = 0; d < corpus->num_docs; d++)
 	{
-		if ((d % 100) == 0) printf("final e step document %d\n",d);
+		if ((d % 100) == 0 && VERBOSE) printf("final e step document %d\n",d);
 		likelihood += lda_inference(&(corpus->docs[d]), model, var_gamma[d], phi);
 		write_word_assignment(w_asgn_file, &(corpus->docs[d]), phi, model);
 	}
@@ -366,7 +382,7 @@ void infer(char* model_root, char* save, corpus* corpus) {
 	sprintf(filename, "%s-lda-lhood.dat", save);
 	fileptr = fopen(filename, "w");
 	for (d = 0; d < corpus->num_docs; d++) {
-		if (((d % 100) == 0) && (d>0)) printf("document %d\n",d);
+		if (((d % 100) == 0) && (d>0) && VERBOSE) printf("document %d\n",d);
 		doc = &(corpus->docs[d]);
 		phi = (double**) malloc(sizeof(double*) * doc->length);
@@ -454,13 +470,21 @@ void run_quiet_em(char* start, corpus* corpus) {
 		model = new_lda_model(corpus->num_terms, NTOPICS);
 		ss = new_lda_suffstats(model);
 		corpus_initialize_ss(ss, model, corpus);
-		lda_mle(model, ss, 0);
+		if (VERBOSE) {
+		    lda_mle(model, ss, 0);
+		} else {
+		    quiet_lda_mle(model, ss, 0);
+		}
 		model->alpha = INITIAL_ALPHA;
 	} else if (strcmp(start, "random")==0) {
 		model = new_lda_model(corpus->num_terms, NTOPICS);
 		ss = new_lda_suffstats(model);
 		random_initialize_ss(ss, model);
-		lda_mle(model, ss, 0);
+		if (VERBOSE) {
+		    lda_mle(model, ss, 0);
+		} else {
+		    quiet_lda_mle(model, ss, 0);
+		}
 		model->alpha = INITIAL_ALPHA;
 	} else {
 		model = load_lda_model(start);
@@ -478,20 +502,25 @@ void run_quiet_em(char* start, corpus* corpus) {
 	while (((converged < 0) || (converged > EM_CONVERGED) || (i <= 2)) && (i <= EM_MAX_ITER)) {
 		i++;
-		printf("**** em iteration %d ****\n", i);
+		if (VERBOSE)
+		    printf("**** em iteration %d ****\n", i);
 		likelihood = 0;
 		zero_initialize_ss(ss, model);
 		// e-step
 		for (d = 0; d < corpus->num_docs; d++) {
-			if ((d % 1000) == 0) printf("document %d\n",d);
+			if ((d % 1000) == 0 && VERBOSE) printf("document %d\n",d);
 			likelihood += doc_e_step(&(corpus->docs[d]), var_gamma[d], phi, model, ss);
 		}
 		// m-step
-		lda_mle(model, ss, ESTIMATE_ALPHA);
+        if (VERBOSE) {
+            lda_mle(model, ss, ESTIMATE_ALPHA);
+        } else {
+            quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
+        }
 		// check for convergence
@@ -661,6 +690,31 @@ static VALUE wrap_set_estimate_alpha(VALUE self, VALUE est_alpha) {
 	return est_alpha;
 }
+/*
+ * Get the verbosity setting.
+ */
+static VALUE wrap_get_verbosity(VALUE self) {
+    if (VERBOSE) {
+        return Qtrue;
+    } else {
+        return Qfalse;
+    }
+}
+/*
+ * Set the verbosity level (true, false).
+ */
+static VALUE wrap_set_verbosity(VALUE self, VALUE verbosity) {
+    if (verbosity == Qtrue) {
+        VERBOSE = TRUE;
+    } else {
+        VERBOSE = FALSE;
+    }
+    return verbosity;
+}
 /*
@@ -812,6 +866,7 @@ static VALUE wrap_get_model_settings(VALUE self) {
 void Init_lda_ext() {
 	corpus_loaded = FALSE;
 	model_loaded = FALSE;
+    VERBOSE = TRUE;
 	rb_require("lda");
@@ -865,6 +920,8 @@ void Init_lda_ext() {
 	rb_define_method(rb_cLda, "est_alpha", wrap_get_estimate_alpha, 0);
 	rb_define_method(rb_cLda, "num_topics", wrap_get_num_topics, 0);
 	rb_define_method(rb_cLda, "num_topics=", wrap_set_num_topics, 1);
+    rb_define_method(rb_cLda, "verbose", wrap_get_verbosity, 0);
+    rb_define_method(rb_cLda, "verbose=", wrap_set_verbosity, 1);
 	// retrieve model and gamma
 	rb_define_method(rb_cLda, "beta", wrap_get_model_beta, 0);

data/{lda-inference.h → lib/lda-inference.h} RENAMED Viewed

@@ -1,12 +1,15 @@
 #ifndef LDA_INFERENCE_H
 #define LDA_INFERENCE_H
+#include <stdlib.h>
 #include <math.h>
 #include <float.h>
 #include <assert.h>
 #include "lda.h"
 #include "utils.h"
 int LAG = 5;
 float EM_CONVERGED;
@@ -22,9 +25,11 @@ corpus *last_corpus;
 lda_model *last_model;
 double **last_gamma;
-enum BOOL { FALSE, TRUE } corpus_loaded, model_loaded;
+enum BOOL { FALSE, TRUE } corpus_loaded, model_loaded, VERBOSE;
 #endif
 double lda_inference(document*, lda_model*, double*, double**);
 double compute_likelihood(document*, lda_model*, double**, double*);
@@ -53,5 +58,6 @@ void read_settings(char* filename);
 void infer(char* model_root,
            char* save,
            corpus* corpus);
-#endif
+#endif

data/{lda-model.c → lib/lda-model.c} RENAMED Viewed

@@ -51,6 +51,36 @@ void lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha) {
 	}
 }
+void quiet_lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha) {
+	int k; int w;
+	for (k = 0; k < model->num_topics; k++)
+	{
+		for (w = 0; w < model->num_terms; w++)
+		{
+			if (ss->class_word[k][w] > 0)
+			{
+				model->log_prob_w[k][w] =
+					log(ss->class_word[k][w]) -
+					log(ss->class_total[k]);
+			}
+			else
+				model->log_prob_w[k][w] = -100;
+		}
+	}
+	if (estimate_alpha == 1)
+	{
+		model->alpha = opt_alpha(ss->alpha_suffstats,
+			ss->num_docs,
+			model->num_topics);
+		printf("new alpha = %5.5f\n", model->alpha);
+	}
+}
 /*
 * allocate sufficient statistics
 	*

data/{lda-model.h → lib/lda-model.h} RENAMED Viewed

@@ -5,7 +5,6 @@
 #include <stdio.h>
 #include <math.h>
 #include "lda.h"
-#include "lda-alpha.h"
 #include "cokus.h"
 #define myrand() (double) (((unsigned long) randomMT()) / 4294967296.)
@@ -19,6 +18,7 @@ void corpus_initialize_ss(lda_suffstats* ss, lda_model* model, corpus* c);
 void random_initialize_ss(lda_suffstats* ss, lda_model* model);
 void zero_initialize_ss(lda_suffstats* ss, lda_model* model);
 void lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha);
+void quiet_lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha);
 lda_model* load_lda_model(char* model_root);
 #endif

data/{lda.h → lib/lda.h} RENAMED Viewed

File without changes

data/{lda.rb → lib/lda.rb} RENAMED Viewed

File without changes

data/{utils.c → lib/utils.c} RENAMED Viewed

File without changes

data/{utils.h → lib/utils.h} RENAMED Viewed

File without changes

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ealdent-lda-ruby
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - Jason M. Adams
@@ -10,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2008-11-16 00:00:00 -08:00
+date: 2008-11-19 00:00:00 -08:00
 default_executable:
 dependencies: []
@@ -18,28 +18,28 @@ description:
 email: jasonmadams@gmail.com
 executables: []
-extensions: []
+extensions:
+- lib/extconf.rb
 extra_rdoc_files: []
 files:
 - README
 - license.txt
-- cokus.c
-- cokus.h
-- extconf.rb
-- lda-alpha.c
-- lda-alpha.h
-- lda-data.c
-- lda-data.h
-- lda-inference.c
-- lda-inference.h
-- lda-model.c
-- lda-model.h
-- lda.h
-- lda.rb
-- utils.c
-- utils.h
+- lib/cokus.c
+- lib/cokus.h
+- lib/extconf.rb
+- lib/lda-alpha.c
+- lib/lda-alpha.h
+- lib/lda-data.c
+- lib/lda-data.h
+- lib/lda-inference.c
+- lib/lda-inference.h
+- lib/lda-model.c
+- lib/lda-model.h
+- lib/lda.h
+- lib/lda.rb
+- lib/utils.c
+- lib/utils.h
 has_rdoc: true
 homepage: http://github.com/ealdent/lda-ruby
 post_install_message: