lda-ruby 0.3.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/{CHANGELOG → CHANGELOG.md} +20 -0
- data/Gemfile +9 -0
- data/README.md +157 -0
- data/VERSION.yml +4 -4
- data/docs/modernization-handoff.md +190 -0
- data/docs/porting-strategy.md +127 -0
- data/docs/precompiled-platform-policy.md +68 -0
- data/docs/release-runbook.md +157 -0
- data/ext/lda-ruby/extconf.rb +10 -6
- data/ext/lda-ruby/lda-inference.c +21 -5
- data/ext/lda-ruby-rust/Cargo.toml +12 -0
- data/ext/lda-ruby-rust/README.md +48 -0
- data/ext/lda-ruby-rust/extconf.rb +123 -0
- data/ext/lda-ruby-rust/src/lib.rs +456 -0
- data/lda-ruby.gemspec +70 -71
- data/lib/lda-ruby/backends/base.rb +129 -0
- data/lib/lda-ruby/backends/native.rb +158 -0
- data/lib/lda-ruby/backends/pure_ruby.rb +613 -0
- data/lib/lda-ruby/backends/rust.rb +226 -0
- data/lib/lda-ruby/backends.rb +58 -0
- data/lib/lda-ruby/corpus/corpus.rb +17 -15
- data/lib/lda-ruby/corpus/data_corpus.rb +2 -2
- data/lib/lda-ruby/corpus/directory_corpus.rb +2 -2
- data/lib/lda-ruby/corpus/text_corpus.rb +14 -9
- data/lib/lda-ruby/document/document.rb +6 -8
- data/lib/lda-ruby/document/text_document.rb +5 -4
- data/lib/lda-ruby/rust_build_policy.rb +21 -0
- data/lib/lda-ruby/version.rb +5 -0
- data/lib/lda-ruby.rb +293 -48
- data/test/backend_compatibility_test.rb +146 -0
- data/test/backends_selection_test.rb +100 -0
- data/test/gemspec_test.rb +27 -0
- data/test/lda_ruby_test.rb +49 -11
- data/test/packaged_gem_smoke_test.rb +33 -0
- data/test/release_scripts_test.rb +54 -0
- data/test/rust_build_policy_test.rb +23 -0
- data/test/simple_pipeline_test.rb +22 -0
- data/test/simple_yaml.rb +1 -7
- data/test/test_helper.rb +5 -6
- metadata +62 -75
- data/README +0 -21
- data/README.markdown +0 -37
- data/Rakefile +0 -61
- data/ext/lda-ruby/Makefile +0 -181
- data/test/data/.gitignore +0 -2
- data/test/simple_test.rb +0 -26
data/test/lda_ruby_test.rb
CHANGED
|
@@ -1,11 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
require 'test/unit'
|
|
3
|
-
require 'shoulda'
|
|
4
|
-
require 'yaml'
|
|
5
|
-
|
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
7
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
|
-
require 'lda-ruby'
|
|
1
|
+
require_relative "test_helper"
|
|
9
2
|
|
|
10
3
|
class LdaRubyTest < Test::Unit::TestCase
|
|
11
4
|
context "A Document instance" do
|
|
@@ -19,7 +12,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
19
12
|
end
|
|
20
13
|
|
|
21
14
|
should "not have text" do
|
|
22
|
-
assert !@document.
|
|
15
|
+
assert !@document.text?
|
|
23
16
|
end
|
|
24
17
|
|
|
25
18
|
should "be empty" do
|
|
@@ -51,7 +44,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
51
44
|
end
|
|
52
45
|
|
|
53
46
|
should "not have text" do
|
|
54
|
-
assert !@document.
|
|
47
|
+
assert !@document.text?
|
|
55
48
|
end
|
|
56
49
|
|
|
57
50
|
should "have word count equal to what was added" do
|
|
@@ -78,7 +71,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
78
71
|
end
|
|
79
72
|
|
|
80
73
|
should "have text" do
|
|
81
|
-
assert @document.
|
|
74
|
+
assert @document.text?
|
|
82
75
|
end
|
|
83
76
|
|
|
84
77
|
should "have word count equal to what was added" do
|
|
@@ -240,6 +233,33 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
240
233
|
assert !@lda.est_alpha.nil?
|
|
241
234
|
end
|
|
242
235
|
|
|
236
|
+
should "expose the selected backend name" do
|
|
237
|
+
assert(["native", "pure_ruby", "rust"].include?(@lda.backend_name))
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
should "raise when rust backend is requested but extension is unavailable" do
|
|
241
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
242
|
+
assert true
|
|
243
|
+
else
|
|
244
|
+
assert_raise(LoadError) { Lda::Lda.new(@corpus, backend: :rust) }
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
should "run with rust backend when extension is available" do
|
|
249
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
250
|
+
rust_lda = Lda::Lda.new(@corpus, backend: :rust, random_seed: 1234)
|
|
251
|
+
rust_lda.verbose = false
|
|
252
|
+
rust_lda.num_topics = 4
|
|
253
|
+
rust_lda.em("seeded")
|
|
254
|
+
|
|
255
|
+
assert_equal "rust", rust_lda.backend_name
|
|
256
|
+
assert_equal @corpus.num_docs, rust_lda.gamma.size
|
|
257
|
+
assert_equal @corpus.num_docs, rust_lda.phi.size
|
|
258
|
+
else
|
|
259
|
+
assert true
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
243
263
|
context "after running em" do
|
|
244
264
|
setup do
|
|
245
265
|
@lda.verbose = false
|
|
@@ -277,5 +297,23 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
277
297
|
end
|
|
278
298
|
end
|
|
279
299
|
end
|
|
300
|
+
|
|
301
|
+
context "using the pure-ruby backend" do
|
|
302
|
+
setup do
|
|
303
|
+
@lda = Lda::Lda.new(@corpus, backend: :pure, random_seed: 1234)
|
|
304
|
+
@lda.verbose = false
|
|
305
|
+
@lda.num_topics = 6
|
|
306
|
+
@lda.max_iter = 20
|
|
307
|
+
@lda.em_max_iter = 30
|
|
308
|
+
@lda.em('random')
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
should "run em and generate model matrices" do
|
|
312
|
+
assert_equal "pure_ruby", @lda.backend_name
|
|
313
|
+
assert_equal @lda.num_topics, @lda.beta.size
|
|
314
|
+
assert_equal @corpus.num_docs, @lda.gamma.size
|
|
315
|
+
assert_equal @corpus.num_docs, @lda.phi.size
|
|
316
|
+
end
|
|
317
|
+
end
|
|
280
318
|
end
|
|
281
319
|
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "tmpdir"
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require_relative "../bin/packaged-gem-smoke"
|
|
5
|
+
|
|
6
|
+
class PackagedGemSmokeTest < Test::Unit::TestCase
|
|
7
|
+
def test_gem_path_under_prefix_handles_symlinked_prefixes
|
|
8
|
+
Dir.mktmpdir("packaged-smoke") do |tmpdir|
|
|
9
|
+
real_root = File.join(tmpdir, "real")
|
|
10
|
+
link_root = File.join(tmpdir, "link")
|
|
11
|
+
gem_dir = File.join(real_root, "gems", "lda-ruby-0.4.0")
|
|
12
|
+
|
|
13
|
+
FileUtils.mkdir_p(gem_dir)
|
|
14
|
+
File.symlink(real_root, link_root)
|
|
15
|
+
|
|
16
|
+
assert(
|
|
17
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(gem_dir, link_root),
|
|
18
|
+
"expected symlinked prefix to match real gem path"
|
|
19
|
+
)
|
|
20
|
+
assert(
|
|
21
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(File.join(link_root, "gems", "lda-ruby-0.4.0"), real_root),
|
|
22
|
+
"expected real prefix to match symlinked gem path"
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def test_gem_path_under_prefix_rejects_neighbor_prefixes
|
|
28
|
+
assert(
|
|
29
|
+
!Lda::PackagedGemSmoke.gem_path_under_prefix?("/tmp/gemhome-other/gems/lda-ruby-0.4.0", "/tmp/gemhome"),
|
|
30
|
+
"neighbor prefixes should not match"
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "open3"
|
|
3
|
+
|
|
4
|
+
class ReleaseScriptsTest < Test::Unit::TestCase
|
|
5
|
+
def setup
|
|
6
|
+
@repo_root = File.expand_path("..", __dir__)
|
|
7
|
+
@check_version_sync = File.join(@repo_root, "bin", "check-version-sync")
|
|
8
|
+
@release_prepare = File.join(@repo_root, "bin", "release-prepare")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_check_version_sync_passes_for_repository_versions
|
|
12
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, chdir: @repo_root)
|
|
13
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
14
|
+
assert_match(/Version sync OK:/, stdout)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_check_version_sync_fails_for_mismatched_tag
|
|
18
|
+
_stdout, stderr, status = Open3.capture3(@check_version_sync, "--tag", "v9.9.9", chdir: @repo_root)
|
|
19
|
+
assert(!status.success?, "expected check-version-sync to fail for mismatched tag")
|
|
20
|
+
assert_match(/does not match expected tag/, stderr)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def test_check_version_sync_print_tag_matches_library_version
|
|
24
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, "--print-tag", chdir: @repo_root)
|
|
25
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
26
|
+
assert_equal("v#{Lda::VERSION}", stdout.strip)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_release_prepare_dry_run_does_not_change_files
|
|
30
|
+
version_rb_path = File.join(@repo_root, "lib", "lda-ruby", "version.rb")
|
|
31
|
+
version_yml_path = File.join(@repo_root, "VERSION.yml")
|
|
32
|
+
changelog_path = File.join(@repo_root, "CHANGELOG.md")
|
|
33
|
+
|
|
34
|
+
baseline = {
|
|
35
|
+
version_rb_path => File.read(version_rb_path),
|
|
36
|
+
version_yml_path => File.read(version_yml_path),
|
|
37
|
+
changelog_path => File.read(changelog_path)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
stdout, stderr, status = Open3.capture3(
|
|
41
|
+
@release_prepare,
|
|
42
|
+
"9.9.9",
|
|
43
|
+
"--allow-dirty",
|
|
44
|
+
"--dry-run",
|
|
45
|
+
chdir: @repo_root
|
|
46
|
+
)
|
|
47
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
48
|
+
assert_match(/Dry run: would update/, stdout)
|
|
49
|
+
|
|
50
|
+
baseline.each do |path, original|
|
|
51
|
+
assert_equal(original, File.read(path), "#{path} changed during dry-run")
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "lda-ruby/rust_build_policy"
|
|
3
|
+
|
|
4
|
+
class RustBuildPolicyTest < Test::Unit::TestCase
|
|
5
|
+
def test_default_policy_is_auto
|
|
6
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(nil)
|
|
7
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("")
|
|
8
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(" ")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_resolves_valid_values_case_insensitively
|
|
12
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("always")
|
|
13
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("ALWAYS")
|
|
14
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve("never")
|
|
15
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve(" NeVeR ")
|
|
16
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("AUTO")
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_invalid_policy_falls_back_to_auto
|
|
20
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("sometimes")
|
|
21
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("true")
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class SimplePipelineTest < Test::Unit::TestCase
|
|
4
|
+
def test_end_to_end_pipeline_on_small_corpus
|
|
5
|
+
corpus = Lda::Corpus.new
|
|
6
|
+
document1 = Lda::TextDocument.new(corpus, "Dom Cobb is a skilled thief who steals secrets from dreams.")
|
|
7
|
+
document2 = Lda::TextDocument.new(corpus, "Jake Sully joins the mission on Pandora and learns from the Na'vi.")
|
|
8
|
+
|
|
9
|
+
corpus.add_document(document1)
|
|
10
|
+
corpus.add_document(document2)
|
|
11
|
+
corpus.remove_word("cobb")
|
|
12
|
+
|
|
13
|
+
lda = Lda::Lda.new(corpus)
|
|
14
|
+
lda.verbose = false
|
|
15
|
+
lda.num_topics = 2
|
|
16
|
+
lda.em("random")
|
|
17
|
+
|
|
18
|
+
topics = lda.top_words(5)
|
|
19
|
+
assert_equal 2, topics.size
|
|
20
|
+
topics.each_value { |words| assert_equal 5, words.size }
|
|
21
|
+
end
|
|
22
|
+
end
|
data/test/simple_yaml.rb
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
require 'shoulda'
|
|
3
|
-
require 'yaml'
|
|
4
|
-
require 'lda-ruby'
|
|
5
|
-
|
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
7
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
1
|
+
require_relative "test_helper"
|
|
8
2
|
|
|
9
3
|
class Test::Unit::TestCase
|
|
10
4
|
|
data/test/test_helper.rb
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
require
|
|
2
|
-
require
|
|
3
|
-
require
|
|
4
|
-
require 'yaml'
|
|
1
|
+
require "test/unit"
|
|
2
|
+
require "shoulda-context"
|
|
3
|
+
require "yaml"
|
|
5
4
|
|
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),
|
|
5
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
|
7
6
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
|
-
require
|
|
7
|
+
require "lda-ruby"
|
|
9
8
|
|
|
10
9
|
class Test::Unit::TestCase
|
|
11
10
|
end
|
metadata
CHANGED
|
@@ -1,55 +1,39 @@
|
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lda-ruby
|
|
3
|
-
version: !ruby/object:Gem::Version
|
|
4
|
-
|
|
5
|
-
prerelease:
|
|
6
|
-
segments:
|
|
7
|
-
- 0
|
|
8
|
-
- 3
|
|
9
|
-
- 8
|
|
10
|
-
version: 0.3.8
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.4.0
|
|
11
5
|
platform: ruby
|
|
12
|
-
authors:
|
|
6
|
+
authors:
|
|
13
7
|
- David Blei
|
|
14
8
|
- Jason Adams
|
|
15
9
|
- Rio Akasaka
|
|
16
|
-
autorequire:
|
|
10
|
+
autorequire:
|
|
17
11
|
bindir: bin
|
|
18
12
|
cert_chain: []
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
prerelease: false
|
|
26
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
|
27
|
-
none: false
|
|
28
|
-
requirements:
|
|
29
|
-
- - ">="
|
|
30
|
-
- !ruby/object:Gem::Version
|
|
31
|
-
hash: 3
|
|
32
|
-
segments:
|
|
33
|
-
- 0
|
|
34
|
-
version: "0"
|
|
35
|
-
type: :runtime
|
|
36
|
-
version_requirements: *id001
|
|
37
|
-
description: Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.
|
|
38
|
-
email: jasonmadams@gmail.com
|
|
13
|
+
date: 2026-02-25 00:00:00.000000000 Z
|
|
14
|
+
dependencies: []
|
|
15
|
+
description: Ruby wrapper and toolkit for Latent Dirichlet Allocation based on the
|
|
16
|
+
original lda-c implementation by David M. Blei.
|
|
17
|
+
email:
|
|
18
|
+
- jasonmadams@gmail.com
|
|
39
19
|
executables: []
|
|
40
|
-
|
|
41
|
-
extensions:
|
|
20
|
+
extensions:
|
|
42
21
|
- ext/lda-ruby/extconf.rb
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
-
|
|
48
|
-
- README
|
|
49
|
-
- README.markdown
|
|
50
|
-
- Rakefile
|
|
22
|
+
- ext/lda-ruby-rust/extconf.rb
|
|
23
|
+
extra_rdoc_files: []
|
|
24
|
+
files:
|
|
25
|
+
- CHANGELOG.md
|
|
26
|
+
- Gemfile
|
|
27
|
+
- README.md
|
|
51
28
|
- VERSION.yml
|
|
52
|
-
-
|
|
29
|
+
- docs/modernization-handoff.md
|
|
30
|
+
- docs/porting-strategy.md
|
|
31
|
+
- docs/precompiled-platform-policy.md
|
|
32
|
+
- docs/release-runbook.md
|
|
33
|
+
- ext/lda-ruby-rust/Cargo.toml
|
|
34
|
+
- ext/lda-ruby-rust/README.md
|
|
35
|
+
- ext/lda-ruby-rust/extconf.rb
|
|
36
|
+
- ext/lda-ruby-rust/src/lib.rs
|
|
53
37
|
- ext/lda-ruby/cokus.c
|
|
54
38
|
- ext/lda-ruby/cokus.h
|
|
55
39
|
- ext/lda-ruby/extconf.rb
|
|
@@ -66,6 +50,11 @@ files:
|
|
|
66
50
|
- ext/lda-ruby/utils.h
|
|
67
51
|
- lda-ruby.gemspec
|
|
68
52
|
- lib/lda-ruby.rb
|
|
53
|
+
- lib/lda-ruby/backends.rb
|
|
54
|
+
- lib/lda-ruby/backends/base.rb
|
|
55
|
+
- lib/lda-ruby/backends/native.rb
|
|
56
|
+
- lib/lda-ruby/backends/pure_ruby.rb
|
|
57
|
+
- lib/lda-ruby/backends/rust.rb
|
|
69
58
|
- lib/lda-ruby/config/stopwords.yml
|
|
70
59
|
- lib/lda-ruby/corpus/corpus.rb
|
|
71
60
|
- lib/lda-ruby/corpus/data_corpus.rb
|
|
@@ -74,50 +63,48 @@ files:
|
|
|
74
63
|
- lib/lda-ruby/document/data_document.rb
|
|
75
64
|
- lib/lda-ruby/document/document.rb
|
|
76
65
|
- lib/lda-ruby/document/text_document.rb
|
|
66
|
+
- lib/lda-ruby/rust_build_policy.rb
|
|
67
|
+
- lib/lda-ruby/version.rb
|
|
77
68
|
- lib/lda-ruby/vocabulary.rb
|
|
78
69
|
- license.txt
|
|
79
|
-
- test/
|
|
70
|
+
- test/backend_compatibility_test.rb
|
|
71
|
+
- test/backends_selection_test.rb
|
|
80
72
|
- test/data/docs.dat
|
|
81
73
|
- test/data/sample.rb
|
|
82
74
|
- test/data/wiki-test-docs.yml
|
|
75
|
+
- test/gemspec_test.rb
|
|
83
76
|
- test/lda_ruby_test.rb
|
|
84
|
-
- test/
|
|
77
|
+
- test/packaged_gem_smoke_test.rb
|
|
78
|
+
- test/release_scripts_test.rb
|
|
79
|
+
- test/rust_build_policy_test.rb
|
|
80
|
+
- test/simple_pipeline_test.rb
|
|
85
81
|
- test/simple_yaml.rb
|
|
86
82
|
- test/test_helper.rb
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
83
|
+
homepage: https://github.com/ealdent/lda-ruby
|
|
84
|
+
licenses:
|
|
85
|
+
- GPL-2.0-or-later
|
|
86
|
+
metadata:
|
|
87
|
+
homepage_uri: https://github.com/ealdent/lda-ruby
|
|
88
|
+
source_code_uri: https://github.com/ealdent/lda-ruby
|
|
89
|
+
changelog_uri: https://github.com/ealdent/lda-ruby/blob/master/CHANGELOG.md
|
|
90
|
+
lda_ruby_gem_variant: source
|
|
91
|
+
post_install_message:
|
|
92
92
|
rdoc_options: []
|
|
93
|
-
|
|
94
|
-
require_paths:
|
|
93
|
+
require_paths:
|
|
95
94
|
- lib
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
none: false
|
|
99
|
-
requirements:
|
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
96
|
+
requirements:
|
|
100
97
|
- - ">="
|
|
101
|
-
- !ruby/object:Gem::Version
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
version: "0"
|
|
106
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
|
-
none: false
|
|
108
|
-
requirements:
|
|
98
|
+
- !ruby/object:Gem::Version
|
|
99
|
+
version: '3.2'
|
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
101
|
+
requirements:
|
|
109
102
|
- - ">="
|
|
110
|
-
- !ruby/object:Gem::Version
|
|
111
|
-
|
|
112
|
-
segments:
|
|
113
|
-
- 0
|
|
114
|
-
version: "0"
|
|
103
|
+
- !ruby/object:Gem::Version
|
|
104
|
+
version: '0'
|
|
115
105
|
requirements: []
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
specification_version: 3
|
|
121
|
-
summary: Ruby port of Latent Dirichlet Allocation by David M. Blei.
|
|
106
|
+
rubygems_version: 3.5.22
|
|
107
|
+
signing_key:
|
|
108
|
+
specification_version: 4
|
|
109
|
+
summary: Ruby implementation of Latent Dirichlet Allocation (LDA).
|
|
122
110
|
test_files: []
|
|
123
|
-
|
data/README
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
Latent Dirichlet Allocation – Ruby Wrapper
|
|
2
|
-
|
|
3
|
-
This wrapper is based on C-code by David M. Blei. In a nutshell, it can be used to automatically cluster documents into topics. The number of topics are chosen beforehand and the topics found are usually fairly intuitive. Details of the implementation can be found in the paper by Blei, Ng, and Jordan.
|
|
4
|
-
|
|
5
|
-
The original C code relied on files for the input and output. We felt it was necessary to depart from that model and use Ruby objects for these steps instead. The only file necessary will be the data file (in a format similar to that used by SVMlight). Optionally you may need a vocabulary file to be able to extract the words belonging to topics.
|
|
6
|
-
|
|
7
|
-
Example usage:
|
|
8
|
-
|
|
9
|
-
require 'lda'
|
|
10
|
-
corpus = Lda::DataCorpus.new("data/data_file.dat")
|
|
11
|
-
lda = Lda::Lda.new(corpus) # create an Lda object for training
|
|
12
|
-
lda.em("random") # run EM algorithm using random starting points
|
|
13
|
-
lda.load_vocabulary("data/vocab.txt")
|
|
14
|
-
lda.print_topics(20) # print the topic 20 words per topic
|
|
15
|
-
|
|
16
|
-
You can check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the topic models mailing list, since the people who monitor that are very knowledgeable.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
References
|
|
20
|
-
|
|
21
|
-
Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022.
|
data/README.markdown
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# Latent Dirichlet Allocation – Ruby Wrapper
|
|
2
|
-
|
|
3
|
-
## What is LDA-Ruby?
|
|
4
|
-
|
|
5
|
-
This wrapper is based on C-code by David M. Blei. In a nutshell, it can be used to automatically cluster documents into topics. The number of topics are chosen beforehand and the topics found are usually fairly intuitive. Details of the implementation can be found in the paper by Blei, Ng, and Jordan.
|
|
6
|
-
|
|
7
|
-
The original C code relied on files for the input and output. We felt it was necessary to depart from that model and use Ruby objects for these steps instead. The only file necessary will be the data file (in a format similar to that used by [SVMlight][svmlight]). Optionally you may need a vocabulary file to be able to extract the words belonging to topics.
|
|
8
|
-
|
|
9
|
-
### Example usage:
|
|
10
|
-
|
|
11
|
-
require 'lda-ruby'
|
|
12
|
-
corpus = Lda::DataCorpus.new("data/data_file.dat")
|
|
13
|
-
lda = Lda::Lda.new(corpus) # create an Lda object for training
|
|
14
|
-
lda.em("random") # run EM algorithm using random starting points
|
|
15
|
-
lda.load_vocabulary("data/vocab.txt")
|
|
16
|
-
lda.print_topics(20) # print the topic 20 words per topic
|
|
17
|
-
|
|
18
|
-
If you have general questions about Latent Dirichlet Allocation, I urge you to use the [topic models mailing list][topic-models], since the people who monitor that are very knowledgeable. If you encounter bugs specific to lda-ruby, please post an issue on the Github project.
|
|
19
|
-
|
|
20
|
-
## Resources
|
|
21
|
-
|
|
22
|
-
+ [Blog post about LDA-Ruby][lda-ruby]
|
|
23
|
-
+ [David Blei's lda-c code][blei]
|
|
24
|
-
+ [Wikipedia article on LDA][wikipedia]
|
|
25
|
-
+ [Sample AP data][ap-data]
|
|
26
|
-
|
|
27
|
-
## References
|
|
28
|
-
|
|
29
|
-
Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022 [[pdf][pdf]].
|
|
30
|
-
|
|
31
|
-
[svmlight]: http://svmlight.joachims.org
|
|
32
|
-
[lda-ruby]: http://mendicantbug.com/2008/11/17/lda-in-ruby/
|
|
33
|
-
[blei]: http://www.cs.princeton.edu/~blei/lda-c/
|
|
34
|
-
[wikipedia]: http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation
|
|
35
|
-
[ap-data]: http://www.cs.princeton.edu/~blei/lda-c/ap.tgz
|
|
36
|
-
[pdf]: http://www.cs.princeton.edu/picasso/mats/BleiNgJordan2003_blei.pdf
|
|
37
|
-
[topic-models]: https://lists.cs.princeton.edu/mailman/listinfo/topic-models
|
data/Rakefile
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
require 'rubygems'
|
|
2
|
-
require 'rake'
|
|
3
|
-
require 'yaml'
|
|
4
|
-
|
|
5
|
-
begin
|
|
6
|
-
require 'jeweler'
|
|
7
|
-
Jeweler::Tasks.new do |gem|
|
|
8
|
-
gem.name = "lda-ruby"
|
|
9
|
-
gem.summary = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei.}
|
|
10
|
-
gem.description = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
|
|
11
|
-
gem.email = "jasonmadams@gmail.com"
|
|
12
|
-
gem.homepage = "http://github.com/ealdent/lda-ruby"
|
|
13
|
-
gem.authors = ['David Blei', 'Jason Adams', 'Rio Akasaka']
|
|
14
|
-
gem.extensions = ['ext/lda-ruby/extconf.rb']
|
|
15
|
-
gem.files.include 'stopwords.txt'
|
|
16
|
-
gem.require_paths = ['lib', 'ext']
|
|
17
|
-
gem.add_dependency 'shoulda'
|
|
18
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
rescue LoadError
|
|
22
|
-
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
require 'rake/testtask'
|
|
26
|
-
Rake::TestTask.new(:test) do |test|
|
|
27
|
-
test.libs << 'lib' << 'test'
|
|
28
|
-
test.pattern = 'test/**/*_test.rb'
|
|
29
|
-
test.verbose = true
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
begin
|
|
33
|
-
require 'rcov/rcovtask'
|
|
34
|
-
Rcov::RcovTask.new do |test|
|
|
35
|
-
test.libs << 'test'
|
|
36
|
-
test.pattern = 'test/**/*_test.rb'
|
|
37
|
-
test.verbose = true
|
|
38
|
-
end
|
|
39
|
-
rescue LoadError
|
|
40
|
-
task :rcov do
|
|
41
|
-
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
task :default => :test
|
|
46
|
-
|
|
47
|
-
require 'rake/rdoctask'
|
|
48
|
-
Rake::RDocTask.new do |rdoc|
|
|
49
|
-
if File.exist?('VERSION.yml')
|
|
50
|
-
config = YAML.load(File.read('VERSION.yml'))
|
|
51
|
-
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
|
52
|
-
else
|
|
53
|
-
version = ""
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
rdoc.rdoc_dir = 'rdoc'
|
|
57
|
-
rdoc.title = "lda-ruby #{version}"
|
|
58
|
-
rdoc.rdoc_files.include('README*')
|
|
59
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
60
|
-
end
|
|
61
|
-
|