tomoto 0.3.1-x86_64-linux → 0.3.3-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/LICENSE.txt +1 -1
- data/README.md +4 -14
- data/ext/tomoto/extconf.rb +11 -6
- data/ext/tomoto/llda.cpp +13 -0
- data/lib/tomoto/2.7/tomoto.so +0 -0
- data/lib/tomoto/3.0/tomoto.so +0 -0
- data/lib/tomoto/3.1/tomoto.so +0 -0
- data/lib/tomoto/3.2/tomoto.so +0 -0
- data/lib/tomoto/lda.rb +6 -1
- data/lib/tomoto/version.rb +1 -1
- data/vendor/EigenRand/EigenRand/EigenRand +2 -2
- data/vendor/EigenRand/README.md +4 -0
- data/vendor/tomotopy/README.kr.rst +7 -0
- data/vendor/tomotopy/README.rst +22 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bac6fcf87194d591166c8adaa6a5cf5e46a5c31b8c7a9b88b7924d41f3888a2
|
4
|
+
data.tar.gz: '0549edad5ab0133ea02b86b53b59e6bd76c3a4b9251ecbfb1b4525d1c721d287'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fffeaedac57ec10c59faf46ad0179394c94d05f098e0d32e0228bb744a80f3207b700a9acf1b1fe862ce6832274936f881b5566fddd4d4301583fdc22ce3a677
|
7
|
+
data.tar.gz: 974fd982373f3b7a90eea1eeb48f9e889656a1f10e5c2886ad38901d79f3e67983fd56fc696a44112c2d6f02c406a0e4b8de2a040ea60454baa9bdb10b22a1bb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 0.3.3 (2023-02-01)
|
2
|
+
|
3
|
+
- Added `topic_label_dict` method to `LLDA`
|
4
|
+
- Fixed error with `infer` with loaded model
|
5
|
+
|
6
|
+
## 0.3.2 (2023-01-22)
|
7
|
+
|
8
|
+
- Added precompiled gem for Mac ARM
|
9
|
+
- Updated tomoto to 0.12.4
|
10
|
+
|
1
11
|
## 0.3.1 (2023-01-12)
|
2
12
|
|
3
13
|
- Added support for Ruby 3.2
|
data/LICENSE.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
MIT License
|
2
2
|
|
3
3
|
Copyright (c) 2019, bab2min
|
4
|
-
Copyright (c) 2020-
|
4
|
+
Copyright (c) 2020-2023 Andrew Kane
|
5
5
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -12,17 +12,15 @@ Add this line to your application’s Gemfile:
|
|
12
12
|
gem "tomoto"
|
13
13
|
```
|
14
14
|
|
15
|
-
ARM is not currently supported
|
16
|
-
|
17
15
|
## Getting Started
|
18
16
|
|
19
17
|
Train a model
|
20
18
|
|
21
19
|
```ruby
|
22
20
|
model = Tomoto::LDA.new(k: 2)
|
23
|
-
model.add_doc("
|
24
|
-
model.add_doc("
|
25
|
-
model.add_doc("
|
21
|
+
model.add_doc(["tokens", "from", "document", "one"])
|
22
|
+
model.add_doc(["tokens", "from", "document", "two"])
|
23
|
+
model.add_doc(["tokens", "from", "document", "three"])
|
26
24
|
model.train(100) # iterations
|
27
25
|
```
|
28
26
|
|
@@ -78,7 +76,7 @@ model.ll_per_word
|
|
78
76
|
Perform inference for unseen documents
|
79
77
|
|
80
78
|
```ruby
|
81
|
-
doc = model.make_doc("unseen doc")
|
79
|
+
doc = model.make_doc(["unseen", "doc"])
|
82
80
|
topic_dist, ll = model.infer(doc)
|
83
81
|
```
|
84
82
|
|
@@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue.
|
|
114
112
|
- [LDA](examples/lda_basic.rb)
|
115
113
|
- [HDP](examples/hdp_basic.rb)
|
116
114
|
|
117
|
-
## Tokenization
|
118
|
-
|
119
|
-
Documents are tokenized by whitespace by default, or you can perform your own tokenization.
|
120
|
-
|
121
|
-
```ruby
|
122
|
-
model.add_doc(["tokens", "from", "document", "one"])
|
123
|
-
```
|
124
|
-
|
125
115
|
## Performance
|
126
116
|
|
127
117
|
tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
|
data/ext/tomoto/extconf.rb
CHANGED
@@ -3,12 +3,17 @@ require "mkmf-rice"
|
|
3
3
|
$CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
|
4
4
|
|
5
5
|
unless ENV["RUBY_CC_VERSION"]
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
default_optflags =
|
7
|
+
if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
|
8
|
+
# -march=native not supported with Mac ARM
|
9
|
+
""
|
10
|
+
else
|
11
|
+
# AVX-512F not supported yet
|
12
|
+
# https://github.com/bab2min/tomotopy/issues/188
|
13
|
+
"-march=native -mno-avx512f"
|
14
|
+
end
|
15
|
+
|
16
|
+
$CXXFLAGS << " " << with_config("optflags", default_optflags)
|
12
17
|
end
|
13
18
|
|
14
19
|
apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
data/ext/tomoto/llda.cpp
CHANGED
@@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) {
|
|
29
29
|
"topics_per_label",
|
30
30
|
[](tomoto::ILLDAModel& self) {
|
31
31
|
return self.getNumTopicsPerLabel();
|
32
|
+
})
|
33
|
+
.define_method(
|
34
|
+
"topic_label_dict",
|
35
|
+
[](tomoto::ILLDAModel& self) {
|
36
|
+
auto dict = self.getTopicLabelDict();
|
37
|
+
Array res;
|
38
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
39
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
40
|
+
VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
|
41
|
+
Object obj(value);
|
42
|
+
res.push(obj.call("force_encoding", utf8));
|
43
|
+
}
|
44
|
+
return res;
|
32
45
|
});
|
33
46
|
}
|
data/lib/tomoto/2.7/tomoto.so
CHANGED
Binary file
|
data/lib/tomoto/3.0/tomoto.so
CHANGED
Binary file
|
data/lib/tomoto/3.1/tomoto.so
CHANGED
Binary file
|
data/lib/tomoto/3.2/tomoto.so
CHANGED
Binary file
|
data/lib/tomoto/lda.rb
CHANGED
@@ -24,7 +24,7 @@ module Tomoto
|
|
24
24
|
|
25
25
|
# TODO support multiple docs
|
26
26
|
def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
|
27
|
-
raise "cannot infer with untrained model" unless
|
27
|
+
raise "cannot infer with untrained model" unless trained?
|
28
28
|
_infer(doc, iter, tolerance, workers, to_ps(parallel), together)
|
29
29
|
end
|
30
30
|
|
@@ -86,6 +86,7 @@ module Tomoto
|
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
89
|
+
# TODO raise error if iterations < 1
|
89
90
|
def train(iterations = 10, workers: 0, parallel: :default)
|
90
91
|
prepare
|
91
92
|
_train(iterations, workers, to_ps(parallel))
|
@@ -97,6 +98,10 @@ module Tomoto
|
|
97
98
|
|
98
99
|
private
|
99
100
|
|
101
|
+
def trained?
|
102
|
+
global_step.positive?
|
103
|
+
end
|
104
|
+
|
100
105
|
def prepare
|
101
106
|
unless defined?(@prepared)
|
102
107
|
_prepare(@min_cf, @min_df, @rm_top)
|
data/lib/tomoto/version.rb
CHANGED
data/vendor/EigenRand/README.md
CHANGED
@@ -385,6 +385,10 @@ MIT License
|
|
385
385
|
|
386
386
|
## History
|
387
387
|
|
388
|
+
### 0.4.1 (2022-08-13)
|
389
|
+
* Fixed a bug where double-type generation with std::mt19937 fails compilation.
|
390
|
+
* Fixed a bug where `UniformIntGen` in scalar mode generates numbers in the wrong range.
|
391
|
+
|
388
392
|
### 0.4.0 alpha (2021-09-28)
|
389
393
|
* Now EigenRand supports ARM & ARM64 NEON architecture experimentally. Please report issues about ARM & ARM64 NEON.
|
390
394
|
* Now EigenRand has compatibility to `Eigen 3.4.0`.
|
@@ -305,6 +305,13 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
|
|
305
305
|
|
306
306
|
역사
|
307
307
|
-------
|
308
|
+
* 0.12.4 (2023-01-22)
|
309
|
+
* New features
|
310
|
+
* macOS ARM64 아키텍처에 대한 지원을 추가했습니다.
|
311
|
+
* Bug fixes
|
312
|
+
* `tomotopy.Document.get_sub_topic_dist()`가 bad argument 예외를 발생시키는 문제를 해결했습니다.
|
313
|
+
* 예외 발생이 종종 크래시를 발생시키는 문제를 해결했습니다.
|
314
|
+
|
308
315
|
* 0.12.3 (2022-07-19)
|
309
316
|
* 기능 개선
|
310
317
|
* 이제 `tomotopy.LDAModel.add_doc()`로 빈 문서를 삽입할 경우 예외를 발생시키는 대신 그냥 무시합니다. 새로 추가된 인자인 `ignore_empty_words`를 False로 설정할 경우 이전처럼 예외를 발생시킵니다.
|
data/vendor/tomotopy/README.rst
CHANGED
@@ -309,6 +309,13 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh
|
|
309
309
|
|
310
310
|
History
|
311
311
|
-------
|
312
|
+
* 0.12.4 (2023-01-22)
|
313
|
+
* New features
|
314
|
+
* Added support for macOS ARM64 architecture.
|
315
|
+
* Bug fixes
|
316
|
+
* Fixed an issue where `tomotopy.Document.get_sub_topic_dist()` raises a bad argument exception.
|
317
|
+
* Fixed an issue where exception raising sometimes causes crashes.
|
318
|
+
|
312
319
|
* 0.12.3 (2022-07-19)
|
313
320
|
* New features
|
314
321
|
* Now, inserting an empty document using `tomotopy.LDAModel.add_doc()` just ignores it instead of raising an exception. If the newly added argument `ignore_empty_words` is set to False, an exception is raised as before.
|
@@ -514,3 +521,18 @@ Bundled Libraries and Their License
|
|
514
521
|
|
515
522
|
* Mapbox Variant: `BSD License
|
516
523
|
<licenses_bundled/MapboxVariant>`_
|
524
|
+
|
525
|
+
Citation
|
526
|
+
---------
|
527
|
+
::
|
528
|
+
|
529
|
+
@software{minchul_lee_2022_6868418,
|
530
|
+
author = {Minchul Lee},
|
531
|
+
title = {bab2min/tomotopy: 0.12.3},
|
532
|
+
month = jul,
|
533
|
+
year = 2022,
|
534
|
+
publisher = {Zenodo},
|
535
|
+
version = {v0.12.3},
|
536
|
+
doi = {10.5281/zenodo.6868418},
|
537
|
+
url = {https://doi.org/10.5281/zenodo.6868418}
|
538
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomoto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|