lda-ruby 0.3.1 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -15,7 +15,7 @@ The original C code relied on files for the input and output. We felt it was nec
15
15
  lda.load_vocabulary("data/vocab.txt")
16
16
  lda.print_topics(20) # print the topic 20 words per topic
17
17
 
18
- You can check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the [topic models mailing list][topic-models], since the people who monitor that are very knowledgeable.
18
+ If you have general questions about Latent Dirichlet Allocation, I urge you to use the [topic models mailing list][topic-models], since the people who monitor that are very knowledgeable. If you encounter bugs specific to lda-ruby, please post an issue on the Github project.
19
19
 
20
20
  ## Resources
21
21
 
@@ -24,7 +24,6 @@ You can check out the mailing list for this project if you have any questions or
24
24
  + [Wikipedia article on LDA][wikipedia]
25
25
  + [Sample AP data][ap-data]
26
26
 
27
-
28
27
  ## References
29
28
 
30
29
  Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022 [[pdf][pdf]].
data/Rakefile CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
+ require 'yaml'
3
4
 
4
5
  begin
5
6
  require 'jeweler'
@@ -9,9 +10,10 @@ begin
9
10
  gem.description = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
10
11
  gem.email = "jasonmadams@gmail.com"
11
12
  gem.homepage = "http://github.com/ealdent/lda-ruby"
12
- gem.authors = ['David Blei', 'Jason Adams']
13
+ gem.authors = ['David Blei', 'Jason Adams', 'Rio Akasaka']
13
14
  gem.extensions = ['ext/lda-ruby/extconf.rb']
14
15
  gem.require_paths = ['lib', 'ext']
16
+ gem.add_dependency 'shoulda'
15
17
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
18
  end
17
19
 
data/VERSION.yml CHANGED
@@ -1,4 +1,5 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 3
4
- :patch: 1
4
+ :patch: 4
5
+ :build:
@@ -778,7 +778,7 @@ static VALUE wrap_em(VALUE self, VALUE start) {
778
778
  if (!corpus_loaded)
779
779
  return Qnil;
780
780
 
781
- run_quiet_em(STR2CSTR(start), last_corpus);
781
+ run_quiet_em(StringValuePtr(start), last_corpus);
782
782
 
783
783
  return Qnil;
784
784
  }
@@ -788,7 +788,7 @@ static VALUE wrap_em(VALUE self, VALUE start) {
788
788
  * Load settings from the given file.
789
789
  */
790
790
  static VALUE wrap_load_settings(VALUE self, VALUE settings_file) {
791
- read_settings(STR2CSTR(settings_file));
791
+ read_settings(StringValuePtr(settings_file));
792
792
 
793
793
  return Qtrue;
794
794
  }
@@ -800,7 +800,7 @@ static VALUE wrap_load_settings(VALUE self, VALUE settings_file) {
800
800
  */
801
801
  static VALUE wrap_load_corpus(VALUE self, VALUE filename) {
802
802
  if (!corpus_loaded) {
803
- last_corpus = read_data(STR2CSTR(filename));
803
+ last_corpus = read_data(StringValuePtr(filename));
804
804
  corpus_loaded = TRUE;
805
805
  return Qtrue;
806
806
  } else {
@@ -15,6 +15,8 @@
15
15
  void free_lda_model(lda_model*);
16
16
  void save_lda_model(lda_model*, char*);
17
17
  lda_model* new_lda_model(int, int);
18
+ lda_model* quiet_new_lda_model(int num_terms, int num_topics);
19
+ lda_model* new_lda_model(int num_terms, int num_topics);
18
20
  lda_suffstats* new_lda_suffstats(lda_model* model);
19
21
  void free_lda_suffstats(lda_model* model, lda_suffstats* ss);
20
22
  void corpus_initialize_ss(lda_suffstats* ss, lda_model* model, corpus* c);
data/lda-ruby.gemspec CHANGED
@@ -1,78 +1,76 @@
1
1
  # Generated by jeweler
2
- # DO NOT EDIT THIS FILE
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{lda-ruby}
8
- s.version = "0.3.1"
8
+ s.version = "0.3.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["David Blei", "Jason Adams"]
12
- s.date = %q{2009-08-11}
11
+ s.authors = ["David Blei", "Jason Adams", "Rio Akasaka"]
12
+ s.date = %q{2011-07-29}
13
13
  s.description = %q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
14
14
  s.email = %q{jasonmadams@gmail.com}
15
15
  s.extensions = ["ext/lda-ruby/extconf.rb"]
16
16
  s.extra_rdoc_files = [
17
17
  "README",
18
- "README.markdown"
18
+ "README.markdown"
19
19
  ]
20
20
  s.files = [
21
- ".gitignore",
22
- "CHANGELOG",
23
- "README",
24
- "README.markdown",
25
- "Rakefile",
26
- "VERSION.yml",
27
- "ext/lda-ruby/Makefile",
28
- "ext/lda-ruby/cokus.c",
29
- "ext/lda-ruby/cokus.h",
30
- "ext/lda-ruby/extconf.rb",
31
- "ext/lda-ruby/lda-alpha.c",
32
- "ext/lda-ruby/lda-alpha.h",
33
- "ext/lda-ruby/lda-data.c",
34
- "ext/lda-ruby/lda-data.h",
35
- "ext/lda-ruby/lda-inference.c",
36
- "ext/lda-ruby/lda-inference.h",
37
- "ext/lda-ruby/lda-model.c",
38
- "ext/lda-ruby/lda-model.h",
39
- "ext/lda-ruby/lda.h",
40
- "ext/lda-ruby/utils.c",
41
- "ext/lda-ruby/utils.h",
42
- "lda-ruby.gemspec",
43
- "lib/lda-ruby.rb",
44
- "lib/lda-ruby/corpus/corpus.rb",
45
- "lib/lda-ruby/corpus/data_corpus.rb",
46
- "lib/lda-ruby/corpus/directory_corpus.rb",
47
- "lib/lda-ruby/corpus/text_corpus.rb",
48
- "lib/lda-ruby/document/data_document.rb",
49
- "lib/lda-ruby/document/document.rb",
50
- "lib/lda-ruby/document/text_document.rb",
51
- "lib/lda-ruby/vocabulary.rb",
52
- "license.txt",
53
- "test/data/.gitignore",
54
- "test/data/docs.dat",
55
- "test/data/wiki-test-docs.yml",
56
- "test/lda_ruby_test.rb",
57
- "test/test_helper.rb"
21
+ "CHANGELOG",
22
+ "README",
23
+ "README.markdown",
24
+ "Rakefile",
25
+ "VERSION.yml",
26
+ "ext/lda-ruby/Makefile",
27
+ "ext/lda-ruby/cokus.c",
28
+ "ext/lda-ruby/cokus.h",
29
+ "ext/lda-ruby/extconf.rb",
30
+ "ext/lda-ruby/lda-alpha.c",
31
+ "ext/lda-ruby/lda-alpha.h",
32
+ "ext/lda-ruby/lda-data.c",
33
+ "ext/lda-ruby/lda-data.h",
34
+ "ext/lda-ruby/lda-inference.c",
35
+ "ext/lda-ruby/lda-inference.h",
36
+ "ext/lda-ruby/lda-model.c",
37
+ "ext/lda-ruby/lda-model.h",
38
+ "ext/lda-ruby/lda.h",
39
+ "ext/lda-ruby/utils.c",
40
+ "ext/lda-ruby/utils.h",
41
+ "lda-ruby.gemspec",
42
+ "lib/lda-ruby.rb",
43
+ "lib/lda-ruby/corpus/corpus.rb",
44
+ "lib/lda-ruby/corpus/data_corpus.rb",
45
+ "lib/lda-ruby/corpus/directory_corpus.rb",
46
+ "lib/lda-ruby/corpus/text_corpus.rb",
47
+ "lib/lda-ruby/document/data_document.rb",
48
+ "lib/lda-ruby/document/document.rb",
49
+ "lib/lda-ruby/document/text_document.rb",
50
+ "lib/lda-ruby/vocabulary.rb",
51
+ "license.txt",
52
+ "test/data/.gitignore",
53
+ "test/data/docs.dat",
54
+ "test/data/sample.rb",
55
+ "test/data/wiki-test-docs.yml",
56
+ "test/lda_ruby_test.rb",
57
+ "test/test_helper.rb"
58
58
  ]
59
59
  s.homepage = %q{http://github.com/ealdent/lda-ruby}
60
- s.rdoc_options = ["--charset=UTF-8"]
61
60
  s.require_paths = ["lib", "ext"]
62
- s.rubygems_version = %q{1.3.4}
61
+ s.rubygems_version = %q{1.6.2}
63
62
  s.summary = %q{Ruby port of Latent Dirichlet Allocation by David M. Blei.}
64
- s.test_files = [
65
- "test/lda_ruby_test.rb",
66
- "test/test_helper.rb"
67
- ]
68
63
 
69
64
  if s.respond_to? :specification_version then
70
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
71
65
  s.specification_version = 3
72
66
 
73
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
67
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
68
+ s.add_runtime_dependency(%q<shoulda>, [">= 0"])
74
69
  else
70
+ s.add_dependency(%q<shoulda>, [">= 0"])
75
71
  end
76
72
  else
73
+ s.add_dependency(%q<shoulda>, [">= 0"])
77
74
  end
78
75
  end
76
+
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'lda-ruby'
5
+
6
+ # Load the Corpus. The AP data from David Blei's website is in the "DataCorpus" format
7
+ corpus = Lda::DataCorpus.new("ap/ap.dat")
8
+
9
+ # Initialize the Lda instance with the corpus
10
+ lda = Lda::Lda.new(corpus)
11
+
12
+ # Run the EM algorithm using random starting points. Fixed starting points will use the first n documents
13
+ # to initialize the topics, where n is the number of topics.
14
+ lda.em("random") # run EM algorithm using random starting points
15
+
16
+ # Load the vocabulary file necessary with DataCorpus objects
17
+ lda.load_vocabulary("ap/vocab.txt")
18
+
19
+ # Print the top 20 words per topic
20
+ lda.print_topics(20)
@@ -110,7 +110,7 @@ class LdaRubyTest < Test::Unit::TestCase
110
110
 
111
111
  context "An Lda::DataCorpus instance loaded from a file" do
112
112
  setup do
113
- @filename = 'data/docs.dat'
113
+ @filename = File.join(File.dirname(__FILE__), 'data', 'docs.dat')
114
114
  @filetext = File.open(@filename, 'r') { |f| f.read }
115
115
  @corpus = Lda::DataCorpus.new(@filename)
116
116
  end
@@ -126,7 +126,7 @@ class LdaRubyTest < Test::Unit::TestCase
126
126
 
127
127
  context "An Lda::TextCorpus instance loaded from a file" do
128
128
  setup do
129
- @filename = 'data/wiki-test-docs.yml'
129
+ @filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
130
130
  @filedocs = YAML::load_file(@filename)
131
131
  @corpus = Lda::TextCorpus.new(@filename)
132
132
  end
@@ -142,13 +142,13 @@ class LdaRubyTest < Test::Unit::TestCase
142
142
 
143
143
  context "An Lda::DirectoryCorpus instance loaded from a directory" do
144
144
  setup do
145
- @path = 'data/tmp'
145
+ @path = File.join(File.dirname(__FILE__), 'data', 'tmp')
146
146
  @extension = 'txt'
147
147
  Dir.mkdir(@path)
148
- @original_filename = 'data/wiki-test-docs.yml'
148
+ @original_filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
149
149
  @filedocs = YAML::load_file(@original_filename)
150
150
  @filedocs.each_with_index do |doc, idx|
151
- File.open("data/tmp/doc_#{idx + 1}.txt", 'w') { |f| f.write(doc) }
151
+ File.open(File.join(@path, "doc_#{idx + 1}.txt"), 'w') { |f| f.write(doc) }
152
152
  end
153
153
 
154
154
  @corpus = Lda::DirectoryCorpus.new(@path, @extension)
@@ -173,11 +173,11 @@ class LdaRubyTest < Test::Unit::TestCase
173
173
  setup do
174
174
  @vocab = Lda::Vocabulary.new
175
175
  @words = ['word1', 'word2', 'word3', 'word4', 'word5', 'word6']
176
- @filename1 = 'data/tmp_file.txt'
176
+ @filename1 = File.join(File.dirname(__FILE__), 'data', 'tmp_file.txt')
177
177
  File.open(@filename1, 'w') do |f|
178
178
  @words.each { |w| f.write("#{w}\n") }
179
179
  end
180
- @filename2 = 'data/tmp_file.yml'
180
+ @filename2 = File.join(File.dirname(__FILE__), 'data', 'tmp_file.yml')
181
181
  File.open(@filename2, 'w') { |f| YAML::dump(@words, f) }
182
182
  end
183
183
 
@@ -208,7 +208,7 @@ class LdaRubyTest < Test::Unit::TestCase
208
208
 
209
209
  context "An Lda::Lda instance" do
210
210
  setup do
211
- @filename = 'data/wiki-test-docs.yml'
211
+ @filename = File.join(File.dirname(__FILE__), 'data', 'wiki-test-docs.yml')
212
212
  @filedocs = YAML::load_file(@filename)
213
213
  @corpus = Lda::TextCorpus.new(@filename)
214
214
 
data/test/test_helper.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'test/unit'
3
3
  require 'shoulda'
4
+ require 'yaml'
4
5
 
5
6
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
7
  $LOAD_PATH.unshift(File.dirname(__FILE__))
metadata CHANGED
@@ -1,30 +1,39 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: lda-ruby
3
- version: !ruby/object:Gem::Version
4
- version: 0.3.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.4
5
+ prerelease:
5
6
  platform: ruby
6
- authors:
7
+ authors:
7
8
  - David Blei
8
9
  - Jason Adams
10
+ - Rio Akasaka
9
11
  autorequire:
10
12
  bindir: bin
11
13
  cert_chain: []
12
-
13
- date: 2009-08-11 00:00:00 -04:00
14
+ date: 2011-07-29 00:00:00.000000000 -04:00
14
15
  default_executable:
15
- dependencies: []
16
-
16
+ dependencies:
17
+ - !ruby/object:Gem::Dependency
18
+ name: shoulda
19
+ requirement: &2161566540 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ! '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: *2161566540
17
28
  description: Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.
18
29
  email: jasonmadams@gmail.com
19
30
  executables: []
20
-
21
- extensions:
31
+ extensions:
22
32
  - ext/lda-ruby/extconf.rb
23
- extra_rdoc_files:
33
+ extra_rdoc_files:
24
34
  - README
25
35
  - README.markdown
26
- files:
27
- - .gitignore
36
+ files:
28
37
  - CHANGELOG
29
38
  - README
30
39
  - README.markdown
@@ -58,38 +67,34 @@ files:
58
67
  - license.txt
59
68
  - test/data/.gitignore
60
69
  - test/data/docs.dat
70
+ - test/data/sample.rb
61
71
  - test/data/wiki-test-docs.yml
62
72
  - test/lda_ruby_test.rb
63
73
  - test/test_helper.rb
64
74
  has_rdoc: true
65
75
  homepage: http://github.com/ealdent/lda-ruby
66
76
  licenses: []
67
-
68
77
  post_install_message:
69
- rdoc_options:
70
- - --charset=UTF-8
71
- require_paths:
78
+ rdoc_options: []
79
+ require_paths:
72
80
  - lib
73
81
  - ext
74
- required_ruby_version: !ruby/object:Gem::Requirement
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- version: "0"
79
- version:
80
- required_rubygems_version: !ruby/object:Gem::Requirement
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- version: "0"
85
- version:
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
86
94
  requirements: []
87
-
88
95
  rubyforge_project:
89
- rubygems_version: 1.3.4
96
+ rubygems_version: 1.6.2
90
97
  signing_key:
91
98
  specification_version: 3
92
99
  summary: Ruby port of Latent Dirichlet Allocation by David M. Blei.
93
- test_files:
94
- - test/lda_ruby_test.rb
95
- - test/test_helper.rb
100
+ test_files: []
data/.gitignore DELETED
@@ -1,5 +0,0 @@
1
- *.o
2
- *.bundle
3
- *.tmproj
4
- pkg
5
- test/blei