ealdent-lda-ruby 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ Version 0.3.0
2
+ =============
3
+
4
+ - Completely broke backwards compatibility
5
+ - Reworked many classes to make functionality more reasonable
6
+ - Added ability to load documents from text files
7
+
8
+ Version 0.2.3
9
+ =============
10
+
11
+ - Bug fixes by Todd Foster
12
+
13
+
14
+ Version 0.2.2
15
+ =============
16
+
17
+ - First stable release
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 3
4
- :patch: 0
4
+ :patch: 1
@@ -1,12 +1,15 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
1
4
  # -*- encoding: utf-8 -*-
2
5
 
3
6
  Gem::Specification.new do |s|
4
7
  s.name = %q{lda-ruby}
5
- s.version = "0.3.0"
8
+ s.version = "0.3.1"
6
9
 
7
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
11
  s.authors = ["David Blei", "Jason Adams"]
9
- s.date = %q{2009-07-24}
12
+ s.date = %q{2009-08-11}
10
13
  s.description = %q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
11
14
  s.email = %q{jasonmadams@gmail.com}
12
15
  s.extensions = ["ext/lda-ruby/extconf.rb"]
@@ -16,6 +19,7 @@ Gem::Specification.new do |s|
16
19
  ]
17
20
  s.files = [
18
21
  ".gitignore",
22
+ "CHANGELOG",
19
23
  "README",
20
24
  "README.markdown",
21
25
  "Rakefile",
@@ -86,7 +86,7 @@ module Lda
86
86
  #
87
87
  # See also +print_topics+.
88
88
  #
89
- def top_words(words_per_topic = 10)
89
+ def top_word_indices(words_per_topic = 10)
90
90
  raise 'No vocabulary loaded.' unless @vocab
91
91
 
92
92
  # find the highest scoring words per topic
@@ -100,6 +100,17 @@ module Lda
100
100
  topics
101
101
  end
102
102
 
103
+ def top_words(words_per_topic = 10)
104
+ output = Hash.new
105
+
106
+ topics = top_word_indices(words_per_topic)
107
+ topics.each_pair do |topic_num, words|
108
+ output[topic_num] = words.map { |w| @vocab[w] }
109
+ end
110
+
111
+ output
112
+ end
113
+
103
114
  #
104
115
  # Get the phi matrix which can be used to assign probabilities to words
105
116
  # belonging to a specific topic in each document. The return value is a
@@ -1,6 +1,6 @@
1
1
  module Lda
2
2
  class Vocabulary
3
- attr_reader :words
3
+ attr_reader :words, :indexes
4
4
 
5
5
  def initialize(words = nil)
6
6
  @words = Hash.new do |hash, key|
@@ -13,10 +13,17 @@ module Lda
13
13
  end
14
14
 
15
15
  words.each { |w| @words[w] } if words
16
+ @indexes = Hash.new
17
+
18
+ @words.each_pair do |w, i|
19
+ @indexes[i] = w
20
+ end
16
21
  end
17
22
 
18
23
  def check_word(word)
19
- @words[word.dup]
24
+ w = @words[word.dup]
25
+ @indexes[w] = word.dup
26
+ w
20
27
  end
21
28
 
22
29
  def load_file(filename)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ealdent-lda-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Blei
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-07-24 00:00:00 -07:00
13
+ date: 2009-08-11 00:00:00 -07:00
14
14
  default_executable:
15
15
  dependencies: []
16
16
 
@@ -25,6 +25,7 @@ extra_rdoc_files:
25
25
  - README.markdown
26
26
  files:
27
27
  - .gitignore
28
+ - CHANGELOG
28
29
  - README
29
30
  - README.markdown
30
31
  - Rakefile