ealdent-lda-ruby 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ Version 0.3.0
2
+ =============
3
+
4
+ - Completely broke backwards compatibility
5
+ - Reworked many classes to make functionality more reasonable
6
+ - Added ability to load documents from text files
7
+
8
+ Version 0.2.3
9
+ =============
10
+
11
+ - Bug fixes by Todd Foster
12
+
13
+
14
+ Version 0.2.2
15
+ =============
16
+
17
+ - First stable release
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 3
4
- :patch: 0
4
+ :patch: 1
@@ -1,12 +1,15 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
1
4
  # -*- encoding: utf-8 -*-
2
5
 
3
6
  Gem::Specification.new do |s|
4
7
  s.name = %q{lda-ruby}
5
- s.version = "0.3.0"
8
+ s.version = "0.3.1"
6
9
 
7
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
11
  s.authors = ["David Blei", "Jason Adams"]
9
- s.date = %q{2009-07-24}
12
+ s.date = %q{2009-08-11}
10
13
  s.description = %q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
11
14
  s.email = %q{jasonmadams@gmail.com}
12
15
  s.extensions = ["ext/lda-ruby/extconf.rb"]
@@ -16,6 +19,7 @@ Gem::Specification.new do |s|
16
19
  ]
17
20
  s.files = [
18
21
  ".gitignore",
22
+ "CHANGELOG",
19
23
  "README",
20
24
  "README.markdown",
21
25
  "Rakefile",
@@ -86,7 +86,7 @@ module Lda
86
86
  #
87
87
  # See also +print_topics+.
88
88
  #
89
- def top_words(words_per_topic = 10)
89
+ def top_word_indices(words_per_topic = 10)
90
90
  raise 'No vocabulary loaded.' unless @vocab
91
91
 
92
92
  # find the highest scoring words per topic
@@ -100,6 +100,17 @@ module Lda
100
100
  topics
101
101
  end
102
102
 
103
+ def top_words(words_per_topic = 10)
104
+ output = Hash.new
105
+
106
+ topics = top_word_indices(words_per_topic)
107
+ topics.each_pair do |topic_num, words|
108
+ output[topic_num] = words.map { |w| @vocab[w] }
109
+ end
110
+
111
+ output
112
+ end
113
+
103
114
  #
104
115
  # Get the phi matrix which can be used to assign probabilities to words
105
116
  # belonging to a specific topic in each document. The return value is a
@@ -1,6 +1,6 @@
1
1
  module Lda
2
2
  class Vocabulary
3
- attr_reader :words
3
+ attr_reader :words, :indexes
4
4
 
5
5
  def initialize(words = nil)
6
6
  @words = Hash.new do |hash, key|
@@ -13,10 +13,17 @@ module Lda
13
13
  end
14
14
 
15
15
  words.each { |w| @words[w] } if words
16
+ @indexes = Hash.new
17
+
18
+ @words.each_pair do |w, i|
19
+ @indexes[i] = w
20
+ end
16
21
  end
17
22
 
18
23
  def check_word(word)
19
- @words[word.dup]
24
+ w = @words[word.dup]
25
+ @indexes[w] = word.dup
26
+ w
20
27
  end
21
28
 
22
29
  def load_file(filename)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ealdent-lda-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Blei
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-07-24 00:00:00 -07:00
13
+ date: 2009-08-11 00:00:00 -07:00
14
14
  default_executable:
15
15
  dependencies: []
16
16
 
@@ -25,6 +25,7 @@ extra_rdoc_files:
25
25
  - README.markdown
26
26
  files:
27
27
  - .gitignore
28
+ - CHANGELOG
28
29
  - README
29
30
  - README.markdown
30
31
  - Rakefile