lda-ruby 0.3.1 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.markdown CHANGED
@@ -15,7 +15,7 @@ The original C code relied on files for the input and output. We felt it was nec
15
15
  lda.load_vocabulary("data/vocab.txt")
16
16
  lda.print_topics(20) # print the topic 20 words per topic
17
17
 
18
- You can check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the [topic models mailing list][topic-models], since the people who monitor that are very knowledgeable.
18
+ If you have general questions about Latent Dirichlet Allocation, I urge you to use the [topic models mailing list][topic-models], since the people who monitor that are very knowledgeable. If you encounter bugs specific to lda-ruby, please post an issue on the Github project.
19
19
 
20
20
  ## Resources
21
21
 
@@ -24,7 +24,6 @@ You can check out the mailing list for this project if you have any questions or
24
24
  + [Wikipedia article on LDA][wikipedia]
25
25
  + [Sample AP data][ap-data]
26
26
 
27
-
28
27
  ## References
29
28
 
30
29
  Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022 [[pdf][pdf]].
data/Rakefile CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
+ require 'yaml'
3
4
 
4
5
  begin
5
6
  require 'jeweler'
@@ -9,9 +10,10 @@ begin
9
10
  gem.description = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
10
11
  gem.email = "jasonmadams@gmail.com"
11
12
  gem.homepage = "http://github.com/ealdent/lda-ruby"
12
- gem.authors = ['David Blei', 'Jason Adams']
13
+ gem.authors = ['David Blei', 'Jason Adams', 'Rio Akasaka']
13
14
  gem.extensions = ['ext/lda-ruby/extconf.rb']
14
15
  gem.require_paths = ['lib', 'ext']
16
+ gem.add_dependency 'shoulda'
15
17
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
18
  end
17
19
 
data/VERSION.yml CHANGED
@@ -1,4 +1,5 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 3
4
- :patch: 1
4
+ :patch: 4
5
+ :build:
@@ -778,7 +778,7 @@ static VALUE wrap_em(VALUE self, VALUE start) {
778
778
  if (!corpus_loaded)
779
779
  return Qnil;
780
780
 
781
- run_quiet_em(STR2CSTR(start), last_corpus);
781
+ run_quiet_em(StringValuePtr(start), last_corpus);
782
782
 
783
783
  return Qnil;
784
784
  }
@@ -788,7 +788,7 @@ static VALUE wrap_em(VALUE self, VALUE start) {
788
788
  * Load settings from the given file.
789
789
  */
790
790
  static VALUE wrap_load_settings(VALUE self, VALUE settings_file) {
791
- read_settings(STR2CSTR(settings_file));
791
+ read_settings(StringValuePtr(settings_file));
792
792
 
793
793
  return Qtrue;
794
794
  }
@@ -800,7 +800,7 @@ static VALUE wrap_load_settings(VALUE self, VALUE settings_file) {
800
800
  */
801
801
  static VALUE wrap_load_corpus(VALUE self, VALUE filename) {
802
802
  if (!corpus_loaded) {
803
- last_corpus = read_data(STR2CSTR(filename));
803
+ last_corpus = read_data(StringValuePtr(filename));
804
804
  corpus_loaded = TRUE;
805
805
  return Qtrue;
806
806
  } else {
@@ -15,6 +15,8 @@
15
15
  void free_lda_model(lda_model*);
16
16
  void save_lda_model(lda_model*, char*);
17
17
  lda_model* new_lda_model(int, int);
18
+ lda_model* quiet_new_lda_model(int num_terms, int num_topics);
19
+ lda_model* new_lda_model(int num_terms, int num_topics);
18
20
  lda_suffstats* new_lda_suffstats(lda_model* model);
19
21
  void free_lda_suffstats(lda_model* model, lda_suffstats* ss);
20
22
  void corpus_initialize_ss(lda_suffstats* ss, lda_model* model, corpus* c);
data/lda-ruby.gemspec CHANGED
@@ -1,78 +1,76 @@
1
1
  # Generated by jeweler
2
- # DO NOT EDIT THIS FILE
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{lda-ruby}
8
- s.version = "0.3.1"
8
+ s.version = "0.3.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["David Blei", "Jason Adams"]
12
- s.date = %q{2009-08-11}
11
+ s.authors = ["David Blei", "Jason Adams", "Rio Akasaka"]
12
+ s.date = %q{2011-07-29}
13
13
  s.description = %q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
14
14
  s.email = %q{jasonmadams@gmail.com}
15
15
  s.extensions = ["ext/lda-ruby/extconf.rb"]
16
16
  s.extra_rdoc_files = [
17
17
  "README",
18
- "README.markdown"
18
+ "README.markdown"
19
19
  ]
20
20
  s.files = [
21
- ".gitignore",
22
- "CHANGELOG",
23
- "README",
24
- "README.markdown",
25
- "Rakefile",
26
- "VERSION.yml",
27
- "ext/lda-ruby/Makefile",
28
- "ext/lda-ruby/cokus.c",
29
- "ext/lda-ruby/cokus.h",
30
- "ext/lda-ruby/extconf.rb",
31
- "ext/lda-ruby/lda-alpha.c",
32
- "ext/lda-ruby/lda-alpha.h",
33
- "ext/lda-ruby/lda-data.c",
34
- "ext/lda-ruby/lda-data.h",
35
- "ext/lda-ruby/lda-inference.c",
36
- "ext/lda-ruby/lda-inference.h",
37
- "ext/lda-ruby/lda-model.c",
38
- "ext/lda-ruby/lda-model.h",
39
- "ext/lda-ruby/lda.h",
40
- "ext/lda-ruby/utils.c",
41
- "ext/lda-ruby/utils.h",
42
- "lda-ruby.gemspec",
43
- "lib/lda-ruby.rb",
44
- "lib/lda-ruby/corpus/corpus.rb",
45
- "lib/lda-ruby/corpus/data_corpus.rb",
46
- "lib/lda-ruby/corpus/directory_corpus.rb",
47
- "lib/lda-ruby/corpus/text_corpus.rb",
48
- "lib/lda-ruby/document/data_document.rb",
49
- "lib/lda-ruby/document/document.rb",
50
- "lib/lda-ruby/document/text_document.rb",
51
- "lib/lda-ruby/vocabulary.rb",
52
- "license.txt",
53
- "test/data/.gitignore",
54
- "test/data/docs.dat",
55
- "test/data/wiki-test-docs.yml",
56
- "test/lda_ruby_test.rb",
57
- "test/test_helper.rb"
21
+ "CHANGELOG",
22
+ "README",
23
+ "README.markdown",
24
+ "Rakefile",
25
+ "VERSION.yml",
26
+ "ext/lda-ruby/Makefile",
27
+ "ext/lda-ruby/cokus.c",
28
+ "ext/lda-ruby/cokus.h",
29
+ "ext/lda-ruby/extconf.rb",
30
+ "ext/lda-ruby/lda-alpha.c",
31
+ "ext/lda-ruby/lda-alpha.h",
32
+ "ext/lda-ruby/lda-data.c",
33
+ "ext/lda-ruby/lda-data.h",
34
+ "ext/lda-ruby/lda-inference.c",
35
+ "ext/lda-ruby/lda-inference.h",
36
+ "ext/lda-ruby/lda-model.c",
37
+ "ext/lda-ruby/lda-model.h",
38
+ "ext/lda-ruby/lda.h",
39
+ "ext/lda-ruby/utils.c",
40
+ "ext/lda-ruby/utils.h",
41
+ "lda-ruby.gemspec",
42
+ "lib/lda-ruby.rb",
43
+ "lib/lda-ruby/corpus/corpus.rb",
44
+ "lib/lda-ruby/corpus/data_corpus.rb",
45
+ "lib/lda-ruby/corpus/directory_corpus.rb",
46
+ "lib/lda-ruby/corpus/text_corpus.rb",
47
+ "lib/lda-ruby/document/data_document.rb",
48
+ "lib/lda-ruby/document/document.rb",
49
+ "lib/lda-ruby/document/text_document.rb",
50
+ "lib/lda-ruby/vocabulary.rb",
51
+ "license.txt",
52
+ "test/data/.gitignore",
53
+ "test/data/docs.dat",
54
+ "test/data/sample.rb",
55
+ "test/data/wiki-test-docs.yml",
56
+ "test/lda_ruby_test.rb",
57
+ "test/test_helper.rb"
58
58
  ]
59
59
  s.homepage = %q{http://github.com/ealdent/lda-ruby}
60
- s.rdoc_options = ["--charset=UTF-8"]
61
60
  s.require_paths = ["lib", "ext"]
62
- s.rubygems_version = %q{1.3.4}
61
+ s.rubygems_version = %q{1.6.2}
63
62
  s.summary = %q{Ruby port of Latent Dirichlet Allocation by David M. Blei.}
64
- s.test_files = [
65
- "test/lda_ruby_test.rb",
66
- "test/test_helper.rb"
67
- ]
68
63
 
69
64
  if s.respond_to? :specification_version then
70
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
71
65
  s.specification_version = 3
72
66
 
73
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
67
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
68
+ s.add_runtime_dependency(%q<shoulda>, [">= 0"])
74
69
  else
70
+ s.add_dependency(%q<shoulda>, [">= 0"])
75
71
  end
76
72
  else
73
+ s.add_dependency(%q<shoulda>, [">= 0"])
77
74
  end
78
75
  end
76
+
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'lda-ruby'
5
+
6
+ # Load the Corpus. The AP data from David Blei's website is in the "DataCorpus" format
7
+ corpus = Lda::DataCorpus.new("ap/ap.dat")
8
+
9
+ # Initialize the Lda instance with the corpus
10
+ lda = Lda::Lda.new(corpus)
11
+
12
+ # Run the EM algorithm using random starting points. Fixed starting points will use the first n documents
13
+ # to initialize the topics, where n is the number of topics.
14
+ lda.em("random") # run EM algorithm using random starting points
15
+
16
+ # Load the vocabulary file necessary with DataCorpus objects
17
+ lda.load_vocabulary("ap/vocab.txt")
18
+
19
+ # Print the top 20 words per topic
20
+ lda.print_topics(20)
@@ -110,7 +110,7 @@ class LdaRubyTest < Test::Unit::TestCase
110
110
 
111
111
  context "An Lda::DataCorpus instance loaded from a file" do
112
112
  setup do
113
- @filename = 'data/docs.dat'
113
+ @filename = File.join(File.dirname(__FILE__), 'data', 'docs.dat')
114
114
  @filetext = File.open(@filename, 'r') { |f| f.read }
115
115
  @corpus = Lda::DataCorpus.new(@filename)
116
116
  end
@@ -126,7 +126,7 @@ class LdaRubyTest < Test::Unit::TestCase
126
126
 
127
127
  context "An Lda::TextCorpus instance loaded from a file" do
128
128
  setup do
129
- @filename = 'data/wiki-test-docs.yml'
129
+ @filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
130
130
  @filedocs = YAML::load_file(@filename)
131
131
  @corpus = Lda::TextCorpus.new(@filename)
132
132
  end
@@ -142,13 +142,13 @@ class LdaRubyTest < Test::Unit::TestCase
142
142
 
143
143
  context "An Lda::DirectoryCorpus instance loaded from a directory" do
144
144
  setup do
145
- @path = 'data/tmp'
145
+ @path = File.join(File.dirname(__FILE__), 'data', 'tmp')
146
146
  @extension = 'txt'
147
147
  Dir.mkdir(@path)
148
- @original_filename = 'data/wiki-test-docs.yml'
148
+ @original_filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
149
149
  @filedocs = YAML::load_file(@original_filename)
150
150
  @filedocs.each_with_index do |doc, idx|
151
- File.open("data/tmp/doc_#{idx + 1}.txt", 'w') { |f| f.write(doc) }
151
+ File.open(File.join(@path, "doc_#{idx + 1}.txt"), 'w') { |f| f.write(doc) }
152
152
  end
153
153
 
154
154
  @corpus = Lda::DirectoryCorpus.new(@path, @extension)
@@ -173,11 +173,11 @@ class LdaRubyTest < Test::Unit::TestCase
173
173
  setup do
174
174
  @vocab = Lda::Vocabulary.new
175
175
  @words = ['word1', 'word2', 'word3', 'word4', 'word5', 'word6']
176
- @filename1 = 'data/tmp_file.txt'
176
+ @filename1 = File.join(File.dirname(__FILE__), 'data', 'tmp_file.txt')
177
177
  File.open(@filename1, 'w') do |f|
178
178
  @words.each { |w| f.write("#{w}\n") }
179
179
  end
180
- @filename2 = 'data/tmp_file.yml'
180
+ @filename2 = File.join(File.dirname(__FILE__), 'data', 'tmp_file.yml')
181
181
  File.open(@filename2, 'w') { |f| YAML::dump(@words, f) }
182
182
  end
183
183
 
@@ -208,7 +208,7 @@ class LdaRubyTest < Test::Unit::TestCase
208
208
 
209
209
  context "An Lda::Lda instance" do
210
210
  setup do
211
- @filename = 'data/wiki-test-docs.yml'
211
+ @filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
212
212
  @filedocs = YAML::load_file(@filename)
213
213
  @corpus = Lda::TextCorpus.new(@filename)
214
214
 
data/test/test_helper.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'test/unit'
3
3
  require 'shoulda'
4
+ require 'yaml'
4
5
 
5
6
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
7
  $LOAD_PATH.unshift(File.dirname(__FILE__))
metadata CHANGED
@@ -1,30 +1,39 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: lda-ruby
3
- version: !ruby/object:Gem::Version
4
- version: 0.3.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.4
5
+ prerelease:
5
6
  platform: ruby
6
- authors:
7
+ authors:
7
8
  - David Blei
8
9
  - Jason Adams
10
+ - Rio Akasaka
9
11
  autorequire:
10
12
  bindir: bin
11
13
  cert_chain: []
12
-
13
- date: 2009-08-11 00:00:00 -04:00
14
+ date: 2011-07-29 00:00:00.000000000 -04:00
14
15
  default_executable:
15
- dependencies: []
16
-
16
+ dependencies:
17
+ - !ruby/object:Gem::Dependency
18
+ name: shoulda
19
+ requirement: &2161566540 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ! '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: *2161566540
17
28
  description: Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.
18
29
  email: jasonmadams@gmail.com
19
30
  executables: []
20
-
21
- extensions:
31
+ extensions:
22
32
  - ext/lda-ruby/extconf.rb
23
- extra_rdoc_files:
33
+ extra_rdoc_files:
24
34
  - README
25
35
  - README.markdown
26
- files:
27
- - .gitignore
36
+ files:
28
37
  - CHANGELOG
29
38
  - README
30
39
  - README.markdown
@@ -58,38 +67,34 @@ files:
58
67
  - license.txt
59
68
  - test/data/.gitignore
60
69
  - test/data/docs.dat
70
+ - test/data/sample.rb
61
71
  - test/data/wiki-test-docs.yml
62
72
  - test/lda_ruby_test.rb
63
73
  - test/test_helper.rb
64
74
  has_rdoc: true
65
75
  homepage: http://github.com/ealdent/lda-ruby
66
76
  licenses: []
67
-
68
77
  post_install_message:
69
- rdoc_options:
70
- - --charset=UTF-8
71
- require_paths:
78
+ rdoc_options: []
79
+ require_paths:
72
80
  - lib
73
81
  - ext
74
- required_ruby_version: !ruby/object:Gem::Requirement
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- version: "0"
79
- version:
80
- required_rubygems_version: !ruby/object:Gem::Requirement
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- version: "0"
85
- version:
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
86
94
  requirements: []
87
-
88
95
  rubyforge_project:
89
- rubygems_version: 1.3.4
96
+ rubygems_version: 1.6.2
90
97
  signing_key:
91
98
  specification_version: 3
92
99
  summary: Ruby port of Latent Dirichlet Allocation by David M. Blei.
93
- test_files:
94
- - test/lda_ruby_test.rb
95
- - test/test_helper.rb
100
+ test_files: []
data/.gitignore DELETED
@@ -1,5 +0,0 @@
1
- *.o
2
- *.bundle
3
- *.tmproj
4
- pkg
5
- test/blei