ealdent-lda-ruby 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
File without changes
@@ -66,3 +66,31 @@ double opt_alpha(double ss, int D, int K)
66
66
  while ((fabs(df) > NEWTON_THRESH) && (iter < MAX_ALPHA_ITER));
67
67
  return(exp(log_a));
68
68
  }
69
+
70
+ double quiet_opt_alpha(double ss, int D, int K)
71
+ {
72
+ double a, log_a, init_a = 100;
73
+ double f, df, d2f;
74
+ int iter = 0;
75
+
76
+ log_a = log(init_a);
77
+ do
78
+ {
79
+ iter++;
80
+ a = exp(log_a);
81
+ if (isnan(a))
82
+ {
83
+ init_a = init_a * 10;
84
+ //printf("warning : alpha is nan; new init = %5.5f\n", init_a);
85
+ a = init_a;
86
+ log_a = log(a);
87
+ }
88
+ f = alhood(a, ss, D, K);
89
+ df = d_alhood(a, ss, D, K);
90
+ d2f = d2_alhood(a, D, K);
91
+ log_a = log_a - df/(d2f * a + df);
92
+ //printf("alpha maximization : %5.5f %5.5f\n", f, df);
93
+ }
94
+ while ((fabs(df) > NEWTON_THRESH) && (iter < MAX_ALPHA_ITER));
95
+ return(exp(log_a));
96
+ }
@@ -15,6 +15,7 @@ double alhood(double a, double ss, int D, int K);
15
15
  double d_alhood(double a, double ss, int D, int K);
16
16
  double d2_alhood(double a, int D, int K);
17
17
  double opt_alpha(double ss, int D, int K);
18
- void maximize_alpha(double** gamma, lda_model* model, int num_docs);
18
+ double quiet_opt_alpha(double ss, int D, int K);
19
+ //void maximize_alpha(double** gamma, lda_model* model, int num_docs);
19
20
 
20
21
  #endif
File without changes
File without changes
@@ -32,7 +32,6 @@
32
32
  #include "lda-data.h"
33
33
  #include "lda-inference.h"
34
34
  #include "lda-model.h"
35
- #include "lda-alpha.h"
36
35
  #include "utils.h"
37
36
  #include "cokus.h"
38
37
 
@@ -45,6 +44,8 @@ VALUE rb_cLdaCorpus;
45
44
  VALUE rb_cLdaDocument;
46
45
  #endif
47
46
 
47
+
48
+
48
49
  /*
49
50
  * variational inference
50
51
  */
@@ -238,13 +239,22 @@ void run_em(char* start, char* directory, corpus* corpus) {
238
239
  model = new_lda_model(corpus->num_terms, NTOPICS);
239
240
  ss = new_lda_suffstats(model);
240
241
  corpus_initialize_ss(ss, model, corpus);
241
- lda_mle(model, ss, 0);
242
+ if (VERBOSE) {
243
+ lda_mle(model, ss, 0);
244
+ } else {
245
+ quiet_lda_mle(model, ss, 0);
246
+ }
247
+
242
248
  model->alpha = INITIAL_ALPHA;
243
249
  } else if (strcmp(start, "random")==0) {
244
250
  model = new_lda_model(corpus->num_terms, NTOPICS);
245
251
  ss = new_lda_suffstats(model);
246
252
  random_initialize_ss(ss, model);
247
- lda_mle(model, ss, 0);
253
+ if (VERBOSE) {
254
+ lda_mle(model, ss, 0);
255
+ } else {
256
+ quiet_lda_mle(model, ss, 0);
257
+ }
248
258
  model->alpha = INITIAL_ALPHA;
249
259
  } else {
250
260
  model = load_lda_model(start);
@@ -263,20 +273,26 @@ void run_em(char* start, char* directory, corpus* corpus) {
263
273
 
264
274
  while (((converged < 0) || (converged > EM_CONVERGED) || (i <= 2)) && (i <= EM_MAX_ITER)) {
265
275
  i++;
266
- printf("**** em iteration %d ****\n", i);
276
+ if (VERBOSE)
277
+ printf("**** em iteration %d ****\n", i);
267
278
  likelihood = 0;
268
279
  zero_initialize_ss(ss, model);
269
280
 
270
281
  // e-step
271
282
 
272
283
  for (d = 0; d < corpus->num_docs; d++) {
273
- if ((d % 1000) == 0) printf("document %d\n",d);
284
+ if ((d % 1000) == 0 && VERBOSE) printf("document %d\n",d);
274
285
  likelihood += doc_e_step(&(corpus->docs[d]), var_gamma[d], phi, model, ss);
275
286
  }
276
287
 
277
288
  // m-step
278
289
 
279
- lda_mle(model, ss, ESTIMATE_ALPHA);
290
+ if (VERBOSE) {
291
+ lda_mle(model, ss, ESTIMATE_ALPHA);
292
+ } else {
293
+ quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
294
+ }
295
+
280
296
 
281
297
  // check for convergence
282
298
 
@@ -310,7 +326,7 @@ void run_em(char* start, char* directory, corpus* corpus) {
310
326
  FILE* w_asgn_file = fopen(filename, "w");
311
327
  for (d = 0; d < corpus->num_docs; d++)
312
328
  {
313
- if ((d % 100) == 0) printf("final e step document %d\n",d);
329
+ if ((d % 100) == 0 && VERBOSE) printf("final e step document %d\n",d);
314
330
  likelihood += lda_inference(&(corpus->docs[d]), model, var_gamma[d], phi);
315
331
  write_word_assignment(w_asgn_file, &(corpus->docs[d]), phi, model);
316
332
  }
@@ -366,7 +382,7 @@ void infer(char* model_root, char* save, corpus* corpus) {
366
382
  sprintf(filename, "%s-lda-lhood.dat", save);
367
383
  fileptr = fopen(filename, "w");
368
384
  for (d = 0; d < corpus->num_docs; d++) {
369
- if (((d % 100) == 0) && (d>0)) printf("document %d\n",d);
385
+ if (((d % 100) == 0) && (d>0) && VERBOSE) printf("document %d\n",d);
370
386
 
371
387
  doc = &(corpus->docs[d]);
372
388
  phi = (double**) malloc(sizeof(double*) * doc->length);
@@ -454,13 +470,21 @@ void run_quiet_em(char* start, corpus* corpus) {
454
470
  model = new_lda_model(corpus->num_terms, NTOPICS);
455
471
  ss = new_lda_suffstats(model);
456
472
  corpus_initialize_ss(ss, model, corpus);
457
- lda_mle(model, ss, 0);
473
+ if (VERBOSE) {
474
+ lda_mle(model, ss, 0);
475
+ } else {
476
+ quiet_lda_mle(model, ss, 0);
477
+ }
458
478
  model->alpha = INITIAL_ALPHA;
459
479
  } else if (strcmp(start, "random")==0) {
460
480
  model = new_lda_model(corpus->num_terms, NTOPICS);
461
481
  ss = new_lda_suffstats(model);
462
482
  random_initialize_ss(ss, model);
463
- lda_mle(model, ss, 0);
483
+ if (VERBOSE) {
484
+ lda_mle(model, ss, 0);
485
+ } else {
486
+ quiet_lda_mle(model, ss, 0);
487
+ }
464
488
  model->alpha = INITIAL_ALPHA;
465
489
  } else {
466
490
  model = load_lda_model(start);
@@ -478,20 +502,25 @@ void run_quiet_em(char* start, corpus* corpus) {
478
502
 
479
503
  while (((converged < 0) || (converged > EM_CONVERGED) || (i <= 2)) && (i <= EM_MAX_ITER)) {
480
504
  i++;
481
- printf("**** em iteration %d ****\n", i);
505
+ if (VERBOSE)
506
+ printf("**** em iteration %d ****\n", i);
482
507
  likelihood = 0;
483
508
  zero_initialize_ss(ss, model);
484
509
 
485
510
  // e-step
486
511
 
487
512
  for (d = 0; d < corpus->num_docs; d++) {
488
- if ((d % 1000) == 0) printf("document %d\n",d);
513
+ if ((d % 1000) == 0 && VERBOSE) printf("document %d\n",d);
489
514
  likelihood += doc_e_step(&(corpus->docs[d]), var_gamma[d], phi, model, ss);
490
515
  }
491
516
 
492
517
  // m-step
493
518
 
494
- lda_mle(model, ss, ESTIMATE_ALPHA);
519
+ if (VERBOSE) {
520
+ lda_mle(model, ss, ESTIMATE_ALPHA);
521
+ } else {
522
+ quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
523
+ }
495
524
 
496
525
  // check for convergence
497
526
 
@@ -661,6 +690,31 @@ static VALUE wrap_set_estimate_alpha(VALUE self, VALUE est_alpha) {
661
690
  return est_alpha;
662
691
  }
663
692
 
693
+ /*
694
+ * Get the verbosity setting.
695
+ */
696
+ static VALUE wrap_get_verbosity(VALUE self) {
697
+ if (VERBOSE) {
698
+ return Qtrue;
699
+ } else {
700
+ return Qfalse;
701
+ }
702
+ }
703
+
704
+
705
+ /*
706
+ * Set the verbosity level (true, false).
707
+ */
708
+ static VALUE wrap_set_verbosity(VALUE self, VALUE verbosity) {
709
+ if (verbosity == Qtrue) {
710
+ VERBOSE = TRUE;
711
+ } else {
712
+ VERBOSE = FALSE;
713
+ }
714
+
715
+ return verbosity;
716
+ }
717
+
664
718
 
665
719
 
666
720
  /*
@@ -812,6 +866,7 @@ static VALUE wrap_get_model_settings(VALUE self) {
812
866
  void Init_lda_ext() {
813
867
  corpus_loaded = FALSE;
814
868
  model_loaded = FALSE;
869
+ VERBOSE = TRUE;
815
870
 
816
871
  rb_require("lda");
817
872
 
@@ -865,6 +920,8 @@ void Init_lda_ext() {
865
920
  rb_define_method(rb_cLda, "est_alpha", wrap_get_estimate_alpha, 0);
866
921
  rb_define_method(rb_cLda, "num_topics", wrap_get_num_topics, 0);
867
922
  rb_define_method(rb_cLda, "num_topics=", wrap_set_num_topics, 1);
923
+ rb_define_method(rb_cLda, "verbose", wrap_get_verbosity, 0);
924
+ rb_define_method(rb_cLda, "verbose=", wrap_set_verbosity, 1);
868
925
 
869
926
  // retrieve model and gamma
870
927
  rb_define_method(rb_cLda, "beta", wrap_get_model_beta, 0);
@@ -1,12 +1,15 @@
1
1
  #ifndef LDA_INFERENCE_H
2
2
  #define LDA_INFERENCE_H
3
3
 
4
+ #include <stdlib.h>
4
5
  #include <math.h>
5
6
  #include <float.h>
6
7
  #include <assert.h>
7
8
  #include "lda.h"
8
9
  #include "utils.h"
9
10
 
11
+
12
+
10
13
  int LAG = 5;
11
14
 
12
15
  float EM_CONVERGED;
@@ -22,9 +25,11 @@ corpus *last_corpus;
22
25
  lda_model *last_model;
23
26
  double **last_gamma;
24
27
 
25
- enum BOOL { FALSE, TRUE } corpus_loaded, model_loaded;
28
+ enum BOOL { FALSE, TRUE } corpus_loaded, model_loaded, VERBOSE;
26
29
  #endif
27
30
 
31
+
32
+
28
33
  double lda_inference(document*, lda_model*, double*, double**);
29
34
  double compute_likelihood(document*, lda_model*, double**, double*);
30
35
 
@@ -53,5 +58,6 @@ void read_settings(char* filename);
53
58
  void infer(char* model_root,
54
59
  char* save,
55
60
  corpus* corpus);
61
+
56
62
 
57
- #endif
63
+ #endif
@@ -51,6 +51,36 @@ void lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha) {
51
51
  }
52
52
  }
53
53
 
54
+ void quiet_lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha) {
55
+ int k; int w;
56
+
57
+ for (k = 0; k < model->num_topics; k++)
58
+ {
59
+ for (w = 0; w < model->num_terms; w++)
60
+ {
61
+ if (ss->class_word[k][w] > 0)
62
+ {
63
+ model->log_prob_w[k][w] =
64
+ log(ss->class_word[k][w]) -
65
+ log(ss->class_total[k]);
66
+ }
67
+ else
68
+ model->log_prob_w[k][w] = -100;
69
+ }
70
+ }
71
+ if (estimate_alpha == 1)
72
+ {
73
+ model->alpha = opt_alpha(ss->alpha_suffstats,
74
+ ss->num_docs,
75
+ model->num_topics);
76
+
77
+ printf("new alpha = %5.5f\n", model->alpha);
78
+ }
79
+ }
80
+
81
+
82
+
83
+
54
84
  /*
55
85
  * allocate sufficient statistics
56
86
  *
@@ -5,7 +5,6 @@
5
5
  #include <stdio.h>
6
6
  #include <math.h>
7
7
  #include "lda.h"
8
- #include "lda-alpha.h"
9
8
  #include "cokus.h"
10
9
 
11
10
  #define myrand() (double) (((unsigned long) randomMT()) / 4294967296.)
@@ -19,6 +18,7 @@ void corpus_initialize_ss(lda_suffstats* ss, lda_model* model, corpus* c);
19
18
  void random_initialize_ss(lda_suffstats* ss, lda_model* model);
20
19
  void zero_initialize_ss(lda_suffstats* ss, lda_model* model);
21
20
  void lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha);
21
+ void quiet_lda_mle(lda_model* model, lda_suffstats* ss, int estimate_alpha);
22
22
  lda_model* load_lda_model(char* model_root);
23
23
 
24
24
  #endif
File without changes
File without changes
File without changes
File without changes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ealdent-lda-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason M. Adams
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2008-11-16 00:00:00 -08:00
13
+ date: 2008-11-19 00:00:00 -08:00
14
14
  default_executable:
15
15
  dependencies: []
16
16
 
@@ -18,28 +18,28 @@ description:
18
18
  email: jasonmadams@gmail.com
19
19
  executables: []
20
20
 
21
- extensions: []
22
-
21
+ extensions:
22
+ - lib/extconf.rb
23
23
  extra_rdoc_files: []
24
24
 
25
25
  files:
26
26
  - README
27
27
  - license.txt
28
- - cokus.c
29
- - cokus.h
30
- - extconf.rb
31
- - lda-alpha.c
32
- - lda-alpha.h
33
- - lda-data.c
34
- - lda-data.h
35
- - lda-inference.c
36
- - lda-inference.h
37
- - lda-model.c
38
- - lda-model.h
39
- - lda.h
40
- - lda.rb
41
- - utils.c
42
- - utils.h
28
+ - lib/cokus.c
29
+ - lib/cokus.h
30
+ - lib/extconf.rb
31
+ - lib/lda-alpha.c
32
+ - lib/lda-alpha.h
33
+ - lib/lda-data.c
34
+ - lib/lda-data.h
35
+ - lib/lda-inference.c
36
+ - lib/lda-inference.h
37
+ - lib/lda-model.c
38
+ - lib/lda-model.h
39
+ - lib/lda.h
40
+ - lib/lda.rb
41
+ - lib/utils.c
42
+ - lib/utils.h
43
43
  has_rdoc: true
44
44
  homepage: http://github.com/ealdent/lda-ruby
45
45
  post_install_message: