lda-ruby 0.3.9 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/CHANGELOG.md +8 -0
- data/Gemfile +9 -0
- data/README.md +123 -3
- data/VERSION.yml +3 -3
- data/docs/modernization-handoff.md +190 -0
- data/docs/porting-strategy.md +127 -0
- data/docs/precompiled-platform-policy.md +68 -0
- data/docs/release-runbook.md +157 -0
- data/ext/lda-ruby/extconf.rb +10 -6
- data/ext/lda-ruby/lda-inference.c +21 -5
- data/ext/lda-ruby-rust/Cargo.toml +12 -0
- data/ext/lda-ruby-rust/README.md +48 -0
- data/ext/lda-ruby-rust/extconf.rb +123 -0
- data/ext/lda-ruby-rust/src/lib.rs +456 -0
- data/lda-ruby.gemspec +0 -0
- data/lib/lda-ruby/backends/base.rb +129 -0
- data/lib/lda-ruby/backends/native.rb +158 -0
- data/lib/lda-ruby/backends/pure_ruby.rb +613 -0
- data/lib/lda-ruby/backends/rust.rb +226 -0
- data/lib/lda-ruby/backends.rb +58 -0
- data/lib/lda-ruby/corpus/corpus.rb +17 -15
- data/lib/lda-ruby/corpus/data_corpus.rb +2 -2
- data/lib/lda-ruby/corpus/directory_corpus.rb +2 -2
- data/lib/lda-ruby/corpus/text_corpus.rb +2 -2
- data/lib/lda-ruby/document/document.rb +6 -6
- data/lib/lda-ruby/document/text_document.rb +5 -4
- data/lib/lda-ruby/rust_build_policy.rb +21 -0
- data/lib/lda-ruby/version.rb +5 -0
- data/lib/lda-ruby.rb +293 -48
- data/test/backend_compatibility_test.rb +146 -0
- data/test/backends_selection_test.rb +100 -0
- data/test/gemspec_test.rb +27 -0
- data/test/lda_ruby_test.rb +49 -11
- data/test/packaged_gem_smoke_test.rb +33 -0
- data/test/release_scripts_test.rb +54 -0
- data/test/rust_build_policy_test.rb +23 -0
- data/test/simple_pipeline_test.rb +22 -0
- data/test/simple_yaml.rb +1 -7
- data/test/test_helper.rb +5 -6
- metadata +48 -38
- data/Rakefile +0 -61
- data/ext/lda-ruby/Makefile +0 -181
- data/test/data/.gitignore +0 -2
- data/test/simple_test.rb +0 -26
data/test/lda_ruby_test.rb
CHANGED
|
@@ -1,11 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
require 'test/unit'
|
|
3
|
-
require 'shoulda'
|
|
4
|
-
require 'yaml'
|
|
5
|
-
|
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
7
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
|
-
require 'lda-ruby'
|
|
1
|
+
require_relative "test_helper"
|
|
9
2
|
|
|
10
3
|
class LdaRubyTest < Test::Unit::TestCase
|
|
11
4
|
context "A Document instance" do
|
|
@@ -19,7 +12,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
19
12
|
end
|
|
20
13
|
|
|
21
14
|
should "not have text" do
|
|
22
|
-
assert !@document.
|
|
15
|
+
assert !@document.text?
|
|
23
16
|
end
|
|
24
17
|
|
|
25
18
|
should "be empty" do
|
|
@@ -51,7 +44,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
51
44
|
end
|
|
52
45
|
|
|
53
46
|
should "not have text" do
|
|
54
|
-
assert !@document.
|
|
47
|
+
assert !@document.text?
|
|
55
48
|
end
|
|
56
49
|
|
|
57
50
|
should "have word count equal to what was added" do
|
|
@@ -78,7 +71,7 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
78
71
|
end
|
|
79
72
|
|
|
80
73
|
should "have text" do
|
|
81
|
-
assert @document.
|
|
74
|
+
assert @document.text?
|
|
82
75
|
end
|
|
83
76
|
|
|
84
77
|
should "have word count equal to what was added" do
|
|
@@ -240,6 +233,33 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
240
233
|
assert !@lda.est_alpha.nil?
|
|
241
234
|
end
|
|
242
235
|
|
|
236
|
+
should "expose the selected backend name" do
|
|
237
|
+
assert(["native", "pure_ruby", "rust"].include?(@lda.backend_name))
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
should "raise when rust backend is requested but extension is unavailable" do
|
|
241
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
242
|
+
assert true
|
|
243
|
+
else
|
|
244
|
+
assert_raise(LoadError) { Lda::Lda.new(@corpus, backend: :rust) }
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
should "run with rust backend when extension is available" do
|
|
249
|
+
if Lda::RUST_EXTENSION_LOADED
|
|
250
|
+
rust_lda = Lda::Lda.new(@corpus, backend: :rust, random_seed: 1234)
|
|
251
|
+
rust_lda.verbose = false
|
|
252
|
+
rust_lda.num_topics = 4
|
|
253
|
+
rust_lda.em("seeded")
|
|
254
|
+
|
|
255
|
+
assert_equal "rust", rust_lda.backend_name
|
|
256
|
+
assert_equal @corpus.num_docs, rust_lda.gamma.size
|
|
257
|
+
assert_equal @corpus.num_docs, rust_lda.phi.size
|
|
258
|
+
else
|
|
259
|
+
assert true
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
243
263
|
context "after running em" do
|
|
244
264
|
setup do
|
|
245
265
|
@lda.verbose = false
|
|
@@ -277,5 +297,23 @@ class LdaRubyTest < Test::Unit::TestCase
|
|
|
277
297
|
end
|
|
278
298
|
end
|
|
279
299
|
end
|
|
300
|
+
|
|
301
|
+
context "using the pure-ruby backend" do
|
|
302
|
+
setup do
|
|
303
|
+
@lda = Lda::Lda.new(@corpus, backend: :pure, random_seed: 1234)
|
|
304
|
+
@lda.verbose = false
|
|
305
|
+
@lda.num_topics = 6
|
|
306
|
+
@lda.max_iter = 20
|
|
307
|
+
@lda.em_max_iter = 30
|
|
308
|
+
@lda.em('random')
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
should "run em and generate model matrices" do
|
|
312
|
+
assert_equal "pure_ruby", @lda.backend_name
|
|
313
|
+
assert_equal @lda.num_topics, @lda.beta.size
|
|
314
|
+
assert_equal @corpus.num_docs, @lda.gamma.size
|
|
315
|
+
assert_equal @corpus.num_docs, @lda.phi.size
|
|
316
|
+
end
|
|
317
|
+
end
|
|
280
318
|
end
|
|
281
319
|
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "tmpdir"
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require_relative "../bin/packaged-gem-smoke"
|
|
5
|
+
|
|
6
|
+
class PackagedGemSmokeTest < Test::Unit::TestCase
|
|
7
|
+
def test_gem_path_under_prefix_handles_symlinked_prefixes
|
|
8
|
+
Dir.mktmpdir("packaged-smoke") do |tmpdir|
|
|
9
|
+
real_root = File.join(tmpdir, "real")
|
|
10
|
+
link_root = File.join(tmpdir, "link")
|
|
11
|
+
gem_dir = File.join(real_root, "gems", "lda-ruby-0.4.0")
|
|
12
|
+
|
|
13
|
+
FileUtils.mkdir_p(gem_dir)
|
|
14
|
+
File.symlink(real_root, link_root)
|
|
15
|
+
|
|
16
|
+
assert(
|
|
17
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(gem_dir, link_root),
|
|
18
|
+
"expected symlinked prefix to match real gem path"
|
|
19
|
+
)
|
|
20
|
+
assert(
|
|
21
|
+
Lda::PackagedGemSmoke.gem_path_under_prefix?(File.join(link_root, "gems", "lda-ruby-0.4.0"), real_root),
|
|
22
|
+
"expected real prefix to match symlinked gem path"
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def test_gem_path_under_prefix_rejects_neighbor_prefixes
|
|
28
|
+
assert(
|
|
29
|
+
!Lda::PackagedGemSmoke.gem_path_under_prefix?("/tmp/gemhome-other/gems/lda-ruby-0.4.0", "/tmp/gemhome"),
|
|
30
|
+
"neighbor prefixes should not match"
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "open3"
|
|
3
|
+
|
|
4
|
+
class ReleaseScriptsTest < Test::Unit::TestCase
|
|
5
|
+
def setup
|
|
6
|
+
@repo_root = File.expand_path("..", __dir__)
|
|
7
|
+
@check_version_sync = File.join(@repo_root, "bin", "check-version-sync")
|
|
8
|
+
@release_prepare = File.join(@repo_root, "bin", "release-prepare")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_check_version_sync_passes_for_repository_versions
|
|
12
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, chdir: @repo_root)
|
|
13
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
14
|
+
assert_match(/Version sync OK:/, stdout)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_check_version_sync_fails_for_mismatched_tag
|
|
18
|
+
_stdout, stderr, status = Open3.capture3(@check_version_sync, "--tag", "v9.9.9", chdir: @repo_root)
|
|
19
|
+
assert(!status.success?, "expected check-version-sync to fail for mismatched tag")
|
|
20
|
+
assert_match(/does not match expected tag/, stderr)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def test_check_version_sync_print_tag_matches_library_version
|
|
24
|
+
stdout, stderr, status = Open3.capture3(@check_version_sync, "--print-tag", chdir: @repo_root)
|
|
25
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
26
|
+
assert_equal("v#{Lda::VERSION}", stdout.strip)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_release_prepare_dry_run_does_not_change_files
|
|
30
|
+
version_rb_path = File.join(@repo_root, "lib", "lda-ruby", "version.rb")
|
|
31
|
+
version_yml_path = File.join(@repo_root, "VERSION.yml")
|
|
32
|
+
changelog_path = File.join(@repo_root, "CHANGELOG.md")
|
|
33
|
+
|
|
34
|
+
baseline = {
|
|
35
|
+
version_rb_path => File.read(version_rb_path),
|
|
36
|
+
version_yml_path => File.read(version_yml_path),
|
|
37
|
+
changelog_path => File.read(changelog_path)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
stdout, stderr, status = Open3.capture3(
|
|
41
|
+
@release_prepare,
|
|
42
|
+
"9.9.9",
|
|
43
|
+
"--allow-dirty",
|
|
44
|
+
"--dry-run",
|
|
45
|
+
chdir: @repo_root
|
|
46
|
+
)
|
|
47
|
+
assert(status.success?, "stdout=#{stdout}\nstderr=#{stderr}")
|
|
48
|
+
assert_match(/Dry run: would update/, stdout)
|
|
49
|
+
|
|
50
|
+
baseline.each do |path, original|
|
|
51
|
+
assert_equal(original, File.read(path), "#{path} changed during dry-run")
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
require "lda-ruby/rust_build_policy"
|
|
3
|
+
|
|
4
|
+
class RustBuildPolicyTest < Test::Unit::TestCase
|
|
5
|
+
def test_default_policy_is_auto
|
|
6
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(nil)
|
|
7
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("")
|
|
8
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve(" ")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_resolves_valid_values_case_insensitively
|
|
12
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("always")
|
|
13
|
+
assert_equal "always", Lda::RustBuildPolicy.resolve("ALWAYS")
|
|
14
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve("never")
|
|
15
|
+
assert_equal "never", Lda::RustBuildPolicy.resolve(" NeVeR ")
|
|
16
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("AUTO")
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_invalid_policy_falls_back_to_auto
|
|
20
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("sometimes")
|
|
21
|
+
assert_equal "auto", Lda::RustBuildPolicy.resolve("true")
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class SimplePipelineTest < Test::Unit::TestCase
|
|
4
|
+
def test_end_to_end_pipeline_on_small_corpus
|
|
5
|
+
corpus = Lda::Corpus.new
|
|
6
|
+
document1 = Lda::TextDocument.new(corpus, "Dom Cobb is a skilled thief who steals secrets from dreams.")
|
|
7
|
+
document2 = Lda::TextDocument.new(corpus, "Jake Sully joins the mission on Pandora and learns from the Na'vi.")
|
|
8
|
+
|
|
9
|
+
corpus.add_document(document1)
|
|
10
|
+
corpus.add_document(document2)
|
|
11
|
+
corpus.remove_word("cobb")
|
|
12
|
+
|
|
13
|
+
lda = Lda::Lda.new(corpus)
|
|
14
|
+
lda.verbose = false
|
|
15
|
+
lda.num_topics = 2
|
|
16
|
+
lda.em("random")
|
|
17
|
+
|
|
18
|
+
topics = lda.top_words(5)
|
|
19
|
+
assert_equal 2, topics.size
|
|
20
|
+
topics.each_value { |words| assert_equal 5, words.size }
|
|
21
|
+
end
|
|
22
|
+
end
|
data/test/simple_yaml.rb
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
require 'shoulda'
|
|
3
|
-
require 'yaml'
|
|
4
|
-
require 'lda-ruby'
|
|
5
|
-
|
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
7
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
1
|
+
require_relative "test_helper"
|
|
8
2
|
|
|
9
3
|
class Test::Unit::TestCase
|
|
10
4
|
|
data/test/test_helper.rb
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
require
|
|
2
|
-
require
|
|
3
|
-
require
|
|
4
|
-
require 'yaml'
|
|
1
|
+
require "test/unit"
|
|
2
|
+
require "shoulda-context"
|
|
3
|
+
require "yaml"
|
|
5
4
|
|
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),
|
|
5
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
|
7
6
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
|
-
require
|
|
7
|
+
require "lda-ruby"
|
|
9
8
|
|
|
10
9
|
class Test::Unit::TestCase
|
|
11
10
|
end
|
metadata
CHANGED
|
@@ -1,44 +1,39 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lda-ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David Blei
|
|
8
8
|
- Jason Adams
|
|
9
9
|
- Rio Akasaka
|
|
10
|
-
autorequire:
|
|
10
|
+
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date:
|
|
14
|
-
dependencies:
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
- - ! '>='
|
|
20
|
-
- !ruby/object:Gem::Version
|
|
21
|
-
version: '0'
|
|
22
|
-
type: :runtime
|
|
23
|
-
prerelease: false
|
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
-
requirements:
|
|
26
|
-
- - ! '>='
|
|
27
|
-
- !ruby/object:Gem::Version
|
|
28
|
-
version: '0'
|
|
29
|
-
description: Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.
|
|
30
|
-
email: jasonmadams@gmail.com
|
|
13
|
+
date: 2026-02-25 00:00:00.000000000 Z
|
|
14
|
+
dependencies: []
|
|
15
|
+
description: Ruby wrapper and toolkit for Latent Dirichlet Allocation based on the
|
|
16
|
+
original lda-c implementation by David M. Blei.
|
|
17
|
+
email:
|
|
18
|
+
- jasonmadams@gmail.com
|
|
31
19
|
executables: []
|
|
32
20
|
extensions:
|
|
33
21
|
- ext/lda-ruby/extconf.rb
|
|
34
|
-
|
|
35
|
-
|
|
22
|
+
- ext/lda-ruby-rust/extconf.rb
|
|
23
|
+
extra_rdoc_files: []
|
|
36
24
|
files:
|
|
37
25
|
- CHANGELOG.md
|
|
26
|
+
- Gemfile
|
|
38
27
|
- README.md
|
|
39
|
-
- Rakefile
|
|
40
28
|
- VERSION.yml
|
|
41
|
-
-
|
|
29
|
+
- docs/modernization-handoff.md
|
|
30
|
+
- docs/porting-strategy.md
|
|
31
|
+
- docs/precompiled-platform-policy.md
|
|
32
|
+
- docs/release-runbook.md
|
|
33
|
+
- ext/lda-ruby-rust/Cargo.toml
|
|
34
|
+
- ext/lda-ruby-rust/README.md
|
|
35
|
+
- ext/lda-ruby-rust/extconf.rb
|
|
36
|
+
- ext/lda-ruby-rust/src/lib.rs
|
|
42
37
|
- ext/lda-ruby/cokus.c
|
|
43
38
|
- ext/lda-ruby/cokus.h
|
|
44
39
|
- ext/lda-ruby/extconf.rb
|
|
@@ -55,6 +50,11 @@ files:
|
|
|
55
50
|
- ext/lda-ruby/utils.h
|
|
56
51
|
- lda-ruby.gemspec
|
|
57
52
|
- lib/lda-ruby.rb
|
|
53
|
+
- lib/lda-ruby/backends.rb
|
|
54
|
+
- lib/lda-ruby/backends/base.rb
|
|
55
|
+
- lib/lda-ruby/backends/native.rb
|
|
56
|
+
- lib/lda-ruby/backends/pure_ruby.rb
|
|
57
|
+
- lib/lda-ruby/backends/rust.rb
|
|
58
58
|
- lib/lda-ruby/config/stopwords.yml
|
|
59
59
|
- lib/lda-ruby/corpus/corpus.rb
|
|
60
60
|
- lib/lda-ruby/corpus/data_corpus.rb
|
|
@@ -63,38 +63,48 @@ files:
|
|
|
63
63
|
- lib/lda-ruby/document/data_document.rb
|
|
64
64
|
- lib/lda-ruby/document/document.rb
|
|
65
65
|
- lib/lda-ruby/document/text_document.rb
|
|
66
|
+
- lib/lda-ruby/rust_build_policy.rb
|
|
67
|
+
- lib/lda-ruby/version.rb
|
|
66
68
|
- lib/lda-ruby/vocabulary.rb
|
|
67
69
|
- license.txt
|
|
68
|
-
- test/
|
|
70
|
+
- test/backend_compatibility_test.rb
|
|
71
|
+
- test/backends_selection_test.rb
|
|
69
72
|
- test/data/docs.dat
|
|
70
73
|
- test/data/sample.rb
|
|
71
74
|
- test/data/wiki-test-docs.yml
|
|
75
|
+
- test/gemspec_test.rb
|
|
72
76
|
- test/lda_ruby_test.rb
|
|
73
|
-
- test/
|
|
77
|
+
- test/packaged_gem_smoke_test.rb
|
|
78
|
+
- test/release_scripts_test.rb
|
|
79
|
+
- test/rust_build_policy_test.rb
|
|
80
|
+
- test/simple_pipeline_test.rb
|
|
74
81
|
- test/simple_yaml.rb
|
|
75
82
|
- test/test_helper.rb
|
|
76
|
-
homepage:
|
|
77
|
-
licenses:
|
|
78
|
-
|
|
79
|
-
|
|
83
|
+
homepage: https://github.com/ealdent/lda-ruby
|
|
84
|
+
licenses:
|
|
85
|
+
- GPL-2.0-or-later
|
|
86
|
+
metadata:
|
|
87
|
+
homepage_uri: https://github.com/ealdent/lda-ruby
|
|
88
|
+
source_code_uri: https://github.com/ealdent/lda-ruby
|
|
89
|
+
changelog_uri: https://github.com/ealdent/lda-ruby/blob/master/CHANGELOG.md
|
|
90
|
+
lda_ruby_gem_variant: source
|
|
91
|
+
post_install_message:
|
|
80
92
|
rdoc_options: []
|
|
81
93
|
require_paths:
|
|
82
94
|
- lib
|
|
83
|
-
- ext
|
|
84
95
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
96
|
requirements:
|
|
86
|
-
- -
|
|
97
|
+
- - ">="
|
|
87
98
|
- !ruby/object:Gem::Version
|
|
88
|
-
version: '
|
|
99
|
+
version: '3.2'
|
|
89
100
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
101
|
requirements:
|
|
91
|
-
- -
|
|
102
|
+
- - ">="
|
|
92
103
|
- !ruby/object:Gem::Version
|
|
93
104
|
version: '0'
|
|
94
105
|
requirements: []
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
signing_key:
|
|
106
|
+
rubygems_version: 3.5.22
|
|
107
|
+
signing_key:
|
|
98
108
|
specification_version: 4
|
|
99
|
-
summary: Ruby
|
|
109
|
+
summary: Ruby implementation of Latent Dirichlet Allocation (LDA).
|
|
100
110
|
test_files: []
|
data/Rakefile
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
require 'rubygems'
|
|
2
|
-
require 'rake'
|
|
3
|
-
require 'yaml'
|
|
4
|
-
|
|
5
|
-
begin
|
|
6
|
-
require 'jeweler'
|
|
7
|
-
Jeweler::Tasks.new do |gem|
|
|
8
|
-
gem.name = "lda-ruby"
|
|
9
|
-
gem.summary = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei.}
|
|
10
|
-
gem.description = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
|
|
11
|
-
gem.email = "jasonmadams@gmail.com"
|
|
12
|
-
gem.homepage = "http://github.com/ealdent/lda-ruby"
|
|
13
|
-
gem.authors = ['David Blei', 'Jason Adams', 'Rio Akasaka']
|
|
14
|
-
gem.extensions = ['ext/lda-ruby/extconf.rb']
|
|
15
|
-
gem.files.include 'stopwords.txt'
|
|
16
|
-
gem.require_paths = ['lib', 'ext']
|
|
17
|
-
gem.add_dependency 'shoulda'
|
|
18
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
rescue LoadError
|
|
22
|
-
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
require 'rake/testtask'
|
|
26
|
-
Rake::TestTask.new(:test) do |test|
|
|
27
|
-
test.libs << 'lib' << 'test'
|
|
28
|
-
test.pattern = 'test/**/*_test.rb'
|
|
29
|
-
test.verbose = true
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
begin
|
|
33
|
-
require 'rcov/rcovtask'
|
|
34
|
-
Rcov::RcovTask.new do |test|
|
|
35
|
-
test.libs << 'test'
|
|
36
|
-
test.pattern = 'test/**/*_test.rb'
|
|
37
|
-
test.verbose = true
|
|
38
|
-
end
|
|
39
|
-
rescue LoadError
|
|
40
|
-
task :rcov do
|
|
41
|
-
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
task :default => :test
|
|
46
|
-
|
|
47
|
-
require 'rake/rdoctask'
|
|
48
|
-
Rake::RDocTask.new do |rdoc|
|
|
49
|
-
if File.exist?('VERSION.yml')
|
|
50
|
-
config = YAML.load(File.read('VERSION.yml'))
|
|
51
|
-
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
|
52
|
-
else
|
|
53
|
-
version = ""
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
rdoc.rdoc_dir = 'rdoc'
|
|
57
|
-
rdoc.title = "lda-ruby #{version}"
|
|
58
|
-
rdoc.rdoc_files.include('README*')
|
|
59
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
60
|
-
end
|
|
61
|
-
|
data/ext/lda-ruby/Makefile
DELETED
|
@@ -1,181 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
SHELL = /bin/sh
|
|
3
|
-
|
|
4
|
-
#### Start of system configuration section. ####
|
|
5
|
-
|
|
6
|
-
srcdir = .
|
|
7
|
-
topdir = /home/taf2/.local/include/ruby-1.9.1
|
|
8
|
-
hdrdir = /home/taf2/.local/include/ruby-1.9.1
|
|
9
|
-
arch_hdrdir = /home/taf2/.local/include/ruby-1.9.1/$(arch)
|
|
10
|
-
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
|
11
|
-
prefix = $(DESTDIR)/home/taf2/.local
|
|
12
|
-
exec_prefix = $(prefix)
|
|
13
|
-
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
|
14
|
-
sitehdrdir = $(rubyhdrdir)/site_ruby
|
|
15
|
-
rubyhdrdir = $(includedir)/$(RUBY_INSTALL_NAME)-$(ruby_version)
|
|
16
|
-
vendordir = $(libdir)/$(RUBY_INSTALL_NAME)/vendor_ruby
|
|
17
|
-
sitedir = $(libdir)/$(RUBY_INSTALL_NAME)/site_ruby
|
|
18
|
-
mandir = $(datarootdir)/man
|
|
19
|
-
localedir = $(datarootdir)/locale
|
|
20
|
-
libdir = $(exec_prefix)/lib
|
|
21
|
-
psdir = $(docdir)
|
|
22
|
-
pdfdir = $(docdir)
|
|
23
|
-
dvidir = $(docdir)
|
|
24
|
-
htmldir = $(docdir)
|
|
25
|
-
infodir = $(datarootdir)/info
|
|
26
|
-
docdir = $(datarootdir)/doc/$(PACKAGE)
|
|
27
|
-
oldincludedir = $(DESTDIR)/usr/include
|
|
28
|
-
includedir = $(prefix)/include
|
|
29
|
-
localstatedir = $(prefix)/var
|
|
30
|
-
sharedstatedir = $(prefix)/com
|
|
31
|
-
sysconfdir = $(prefix)/etc
|
|
32
|
-
datadir = $(datarootdir)
|
|
33
|
-
datarootdir = $(prefix)/share
|
|
34
|
-
libexecdir = $(exec_prefix)/libexec
|
|
35
|
-
sbindir = $(exec_prefix)/sbin
|
|
36
|
-
bindir = $(exec_prefix)/bin
|
|
37
|
-
rubylibdir = $(libdir)/$(ruby_install_name)/$(ruby_version)
|
|
38
|
-
archdir = $(rubylibdir)/$(arch)
|
|
39
|
-
sitelibdir = $(sitedir)/$(ruby_version)
|
|
40
|
-
sitearchdir = $(sitelibdir)/$(sitearch)
|
|
41
|
-
vendorlibdir = $(vendordir)/$(ruby_version)
|
|
42
|
-
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
|
43
|
-
|
|
44
|
-
CC = gcc
|
|
45
|
-
CXX = g++
|
|
46
|
-
LIBRUBY = $(LIBRUBY_SO)
|
|
47
|
-
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
|
48
|
-
LIBRUBYARG_SHARED = -Wl,-R -Wl,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)
|
|
49
|
-
LIBRUBYARG_STATIC = -Wl,-R -Wl,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)-static
|
|
50
|
-
OUTFLAG = -o
|
|
51
|
-
COUTFLAG = -o
|
|
52
|
-
|
|
53
|
-
RUBY_EXTCONF_H =
|
|
54
|
-
cflags = $(optflags) $(debugflags) $(warnflags)
|
|
55
|
-
optflags = -O0
|
|
56
|
-
debugflags = -g3 -ggdb
|
|
57
|
-
warnflags = -Wall -Wno-parentheses
|
|
58
|
-
CFLAGS = -fPIC $(cflags) -fPIC -Wall -ggdb -O0
|
|
59
|
-
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
|
|
60
|
-
DEFS =
|
|
61
|
-
CPPFLAGS = -D USE_RUBY $(DEFS) $(cppflags)
|
|
62
|
-
CXXFLAGS = $(CFLAGS) $(cxxflags)
|
|
63
|
-
ldflags = -L. -rdynamic -Wl,-export-dynamic
|
|
64
|
-
dldflags =
|
|
65
|
-
archflag =
|
|
66
|
-
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
|
67
|
-
LDSHARED = $(CC) -shared
|
|
68
|
-
LDSHAREDXX = $(CXX) -shared
|
|
69
|
-
AR = ar
|
|
70
|
-
EXEEXT =
|
|
71
|
-
|
|
72
|
-
RUBY_INSTALL_NAME = ruby
|
|
73
|
-
RUBY_SO_NAME = ruby
|
|
74
|
-
arch = x86_64-linux
|
|
75
|
-
sitearch = x86_64-linux
|
|
76
|
-
ruby_version = 1.9.1
|
|
77
|
-
ruby = /home/taf2/.local/bin/ruby
|
|
78
|
-
RUBY = $(ruby)
|
|
79
|
-
RM = rm -f
|
|
80
|
-
RM_RF = $(RUBY) -run -e rm -- -rf
|
|
81
|
-
RMDIRS = $(RUBY) -run -e rmdir -- -p
|
|
82
|
-
MAKEDIRS = mkdir -p
|
|
83
|
-
INSTALL = /usr/bin/install -c
|
|
84
|
-
INSTALL_PROG = $(INSTALL) -m 0755
|
|
85
|
-
INSTALL_DATA = $(INSTALL) -m 644
|
|
86
|
-
COPY = cp
|
|
87
|
-
|
|
88
|
-
#### End of system configuration section. ####
|
|
89
|
-
|
|
90
|
-
preload =
|
|
91
|
-
|
|
92
|
-
libpath = . $(libdir)
|
|
93
|
-
LIBPATH = -L. -L$(libdir) -Wl,-R$(libdir)
|
|
94
|
-
DEFFILE =
|
|
95
|
-
|
|
96
|
-
CLEANFILES = mkmf.log
|
|
97
|
-
DISTCLEANFILES =
|
|
98
|
-
DISTCLEANDIRS =
|
|
99
|
-
|
|
100
|
-
extout =
|
|
101
|
-
extout_prefix =
|
|
102
|
-
target_prefix =
|
|
103
|
-
LOCAL_LIBS =
|
|
104
|
-
LIBS = $(LIBRUBYARG_SHARED) -lpthread -lrt -ldl -lcrypt -lm -lc
|
|
105
|
-
SRCS = lda-model.c lda-data.c utils.c lda-alpha.c cokus.c lda-inference.c
|
|
106
|
-
OBJS = lda-model.o lda-data.o utils.o lda-alpha.o cokus.o lda-inference.o
|
|
107
|
-
TARGET = lda_ext
|
|
108
|
-
DLLIB = $(TARGET).so
|
|
109
|
-
EXTSTATIC =
|
|
110
|
-
STATIC_LIB =
|
|
111
|
-
|
|
112
|
-
BINDIR = $(bindir)
|
|
113
|
-
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
|
114
|
-
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
|
115
|
-
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
|
116
|
-
HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
|
|
117
|
-
ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
|
|
118
|
-
|
|
119
|
-
TARGET_SO = $(DLLIB)
|
|
120
|
-
CLEANLIBS = $(TARGET).so
|
|
121
|
-
CLEANOBJS = *.o *.bak
|
|
122
|
-
|
|
123
|
-
all: $(DLLIB)
|
|
124
|
-
static: $(STATIC_LIB)
|
|
125
|
-
|
|
126
|
-
clean-rb-default::
|
|
127
|
-
clean-rb::
|
|
128
|
-
clean-so::
|
|
129
|
-
clean: clean-so clean-rb-default clean-rb
|
|
130
|
-
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
|
131
|
-
|
|
132
|
-
distclean-rb-default::
|
|
133
|
-
distclean-rb::
|
|
134
|
-
distclean-so::
|
|
135
|
-
distclean: clean distclean-so distclean-rb-default distclean-rb
|
|
136
|
-
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
|
137
|
-
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
|
138
|
-
@-$(RMDIRS) $(DISTCLEANDIRS)
|
|
139
|
-
|
|
140
|
-
realclean: distclean
|
|
141
|
-
install: install-so install-rb
|
|
142
|
-
|
|
143
|
-
install-so: $(RUBYARCHDIR)
|
|
144
|
-
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
|
145
|
-
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
|
146
|
-
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
|
147
|
-
install-rb: pre-install-rb install-rb-default
|
|
148
|
-
install-rb-default: pre-install-rb-default
|
|
149
|
-
pre-install-rb: Makefile
|
|
150
|
-
pre-install-rb-default: Makefile
|
|
151
|
-
$(RUBYARCHDIR):
|
|
152
|
-
$(MAKEDIRS) $@
|
|
153
|
-
|
|
154
|
-
site-install: site-install-so site-install-rb
|
|
155
|
-
site-install-so: install-so
|
|
156
|
-
site-install-rb: install-rb
|
|
157
|
-
|
|
158
|
-
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
|
159
|
-
|
|
160
|
-
.cc.o:
|
|
161
|
-
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
|
162
|
-
|
|
163
|
-
.cxx.o:
|
|
164
|
-
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
|
165
|
-
|
|
166
|
-
.cpp.o:
|
|
167
|
-
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
|
168
|
-
|
|
169
|
-
.C.o:
|
|
170
|
-
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
|
171
|
-
|
|
172
|
-
.c.o:
|
|
173
|
-
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
|
|
174
|
-
|
|
175
|
-
$(DLLIB): $(OBJS) Makefile
|
|
176
|
-
@-$(RM) $(@)
|
|
177
|
-
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
$(OBJS): $(hdrdir)/ruby.h $(hdrdir)/ruby/defines.h $(arch_hdrdir)/ruby/config.h
|
data/test/data/.gitignore
DELETED
data/test/simple_test.rb
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
require 'rubygems'
|
|
2
|
-
require 'shoulda'
|
|
3
|
-
require 'yaml'
|
|
4
|
-
require 'lda-ruby'
|
|
5
|
-
|
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
7
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
|
-
|
|
9
|
-
class Test::Unit::TestCase
|
|
10
|
-
|
|
11
|
-
@corpus = Lda::Corpus.new
|
|
12
|
-
@document1 = Lda::TextDocument.new(@corpus, 'Dom Cobb is a skilled thief, the absolute best in the dangerous art of extraction, stealing valuable secrets from deep within the subconscious during the dream state, when the mind is at its most vulnerable. Cobb\'s rare ability has made him a coveted player in this treacherous new world of corporate espionage, but it has also made him an international fugitive and cost him everything he has ever loved. Now Cobb is being offered a chance at redemption. One last job could give him his life back but only if he can accomplish the impossible-inception. Instead of the perfect heist, Cobb and his team of specialists have to pull off the reverse: their task is not to steal an idea but to plant one. If they succeed, it could be the perfect crime. But no amount of careful planning or expertise can prepare the team for the dangerous enemy that seems to predict their every move. An enemy that only Cobb could have seen coming.')
|
|
13
|
-
@document2 = Lda::TextDocument.new(@corpus, 'When his brother is killed in a robbery, paraplegic Marine Jake Sully decides to take his place in a mission on the distant world of Pandora. There he learns of greedy corporate figurehead Parker Selfridge\'s intentions of driving off the native humanoid \"Na\'vi\" in order to mine for the precious material scattered throughout their rich woodland. In exchange for the spinal surgery that will fix his legs, Jake gathers intel for the cooperating military unit spearheaded by gung-ho Colonel Quaritch, while simultaneously attempting to infiltrate the Na\'vi people with the use of an \"avatar\" identity. While Jake begins to bond with the native tribe and quickly falls in love with the beautiful alien Neytiri, the restless Colonel moves forward with his ruthless extermination tactics, forcing the soldier to take a stand - and fight back in an epic battle for the fate of Pandora.')
|
|
14
|
-
|
|
15
|
-
@corpus.add_document(@document1)
|
|
16
|
-
@corpus.add_document(@document2)
|
|
17
|
-
@corpus.remove_word("cobb")
|
|
18
|
-
@lda = Lda::Lda.new(@corpus)
|
|
19
|
-
|
|
20
|
-
@lda.verbose = false
|
|
21
|
-
@lda.num_topics = 2
|
|
22
|
-
@lda.em('random')
|
|
23
|
-
topics = @lda.top_words(5)
|
|
24
|
-
puts topics
|
|
25
|
-
|
|
26
|
-
end
|