ealdent-lda-ruby 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +17 -0
- data/VERSION.yml +1 -1
- data/lda-ruby.gemspec +6 -2
- data/lib/lda-ruby.rb +12 -1
- data/lib/lda-ruby/vocabulary.rb +9 -2
- metadata +3 -2
data/CHANGELOG
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
Version 0.3.0
|
2
|
+
=============
|
3
|
+
|
4
|
+
- Completely broke backwards compatibility
|
5
|
+
- Reworked many classes to make functionality more reasonable
|
6
|
+
- Added ability to load documents from text files
|
7
|
+
|
8
|
+
Version 0.2.3
|
9
|
+
=============
|
10
|
+
|
11
|
+
- Bug fixes by Todd Foster
|
12
|
+
|
13
|
+
|
14
|
+
Version 0.2.2
|
15
|
+
=============
|
16
|
+
|
17
|
+
- First stable release
|
data/VERSION.yml
CHANGED
data/lda-ruby.gemspec
CHANGED
@@ -1,12 +1,15 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
1
4
|
# -*- encoding: utf-8 -*-
|
2
5
|
|
3
6
|
Gem::Specification.new do |s|
|
4
7
|
s.name = %q{lda-ruby}
|
5
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.1"
|
6
9
|
|
7
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
11
|
s.authors = ["David Blei", "Jason Adams"]
|
9
|
-
s.date = %q{2009-
|
12
|
+
s.date = %q{2009-08-11}
|
10
13
|
s.description = %q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
|
11
14
|
s.email = %q{jasonmadams@gmail.com}
|
12
15
|
s.extensions = ["ext/lda-ruby/extconf.rb"]
|
@@ -16,6 +19,7 @@ Gem::Specification.new do |s|
|
|
16
19
|
]
|
17
20
|
s.files = [
|
18
21
|
".gitignore",
|
22
|
+
"CHANGELOG",
|
19
23
|
"README",
|
20
24
|
"README.markdown",
|
21
25
|
"Rakefile",
|
data/lib/lda-ruby.rb
CHANGED
@@ -86,7 +86,7 @@ module Lda
|
|
86
86
|
#
|
87
87
|
# See also +print_topics+.
|
88
88
|
#
|
89
|
-
def
|
89
|
+
def top_word_indices(words_per_topic = 10)
|
90
90
|
raise 'No vocabulary loaded.' unless @vocab
|
91
91
|
|
92
92
|
# find the highest scoring words per topic
|
@@ -100,6 +100,17 @@ module Lda
|
|
100
100
|
topics
|
101
101
|
end
|
102
102
|
|
103
|
+
def top_words(words_per_topic = 10)
|
104
|
+
output = Hash.new
|
105
|
+
|
106
|
+
topics = top_word_indices(words_per_topic)
|
107
|
+
topics.each_pair do |topic_num, words|
|
108
|
+
output[topic_num] = words.map { |w| @vocab[w] }
|
109
|
+
end
|
110
|
+
|
111
|
+
output
|
112
|
+
end
|
113
|
+
|
103
114
|
#
|
104
115
|
# Get the phi matrix which can be used to assign probabilities to words
|
105
116
|
# belonging to a specific topic in each document. The return value is a
|
data/lib/lda-ruby/vocabulary.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Lda
|
2
2
|
class Vocabulary
|
3
|
-
attr_reader :words
|
3
|
+
attr_reader :words, :indexes
|
4
4
|
|
5
5
|
def initialize(words = nil)
|
6
6
|
@words = Hash.new do |hash, key|
|
@@ -13,10 +13,17 @@ module Lda
|
|
13
13
|
end
|
14
14
|
|
15
15
|
words.each { |w| @words[w] } if words
|
16
|
+
@indexes = Hash.new
|
17
|
+
|
18
|
+
@words.each_pair do |w, i|
|
19
|
+
@indexes[i] = w
|
20
|
+
end
|
16
21
|
end
|
17
22
|
|
18
23
|
def check_word(word)
|
19
|
-
@words[word.dup]
|
24
|
+
w = @words[word.dup]
|
25
|
+
@indexes[w] = word.dup
|
26
|
+
w
|
20
27
|
end
|
21
28
|
|
22
29
|
def load_file(filename)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ealdent-lda-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Blei
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-
|
13
|
+
date: 2009-08-11 00:00:00 -07:00
|
14
14
|
default_executable:
|
15
15
|
dependencies: []
|
16
16
|
|
@@ -25,6 +25,7 @@ extra_rdoc_files:
|
|
25
25
|
- README.markdown
|
26
26
|
files:
|
27
27
|
- .gitignore
|
28
|
+
- CHANGELOG
|
28
29
|
- README
|
29
30
|
- README.markdown
|
30
31
|
- Rakefile
|