tomoto 0.3.1-x86_64-darwin → 0.3.3-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/LICENSE.txt +1 -1
- data/README.md +4 -14
- data/ext/tomoto/extconf.rb +11 -6
- data/ext/tomoto/llda.cpp +13 -0
- data/lib/tomoto/2.7/tomoto.bundle +0 -0
- data/lib/tomoto/3.0/tomoto.bundle +0 -0
- data/lib/tomoto/3.1/tomoto.bundle +0 -0
- data/lib/tomoto/3.2/tomoto.bundle +0 -0
- data/lib/tomoto/lda.rb +6 -1
- data/lib/tomoto/version.rb +1 -1
- data/vendor/EigenRand/EigenRand/EigenRand +2 -2
- data/vendor/EigenRand/README.md +4 -0
- data/vendor/tomotopy/README.kr.rst +7 -0
- data/vendor/tomotopy/README.rst +22 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b79fb57f7e14e6b483109a2ee2b9905b3ad30c5dc026477494d42238f6c3719d
|
4
|
+
data.tar.gz: 22f74746b73ad822f1fdd1cf8cabdcc28b995d1d3f18097c90ca2894dadb38f2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acbd74efa07f328b5326944bd836bbb55c310a9d4877f645785072bb08aaabf52453b18c2371f70573acb5806f491659460fdb7881d5733bb576b2694727275f
|
7
|
+
data.tar.gz: cd255e7ce35ef651c1cada54406631b9ddf71d7ee29af66a223bb12053b237e1619e294f2c6ff8a3481eac205a37e1d1e60c0b9d30cd2280c39b6f2c6c32f2ee
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 0.3.3 (2023-02-01)
|
2
|
+
|
3
|
+
- Added `topic_label_dict` method to `LLDA`
|
4
|
+
- Fixed error with `infer` with loaded model
|
5
|
+
|
6
|
+
## 0.3.2 (2023-01-22)
|
7
|
+
|
8
|
+
- Added precompiled gem for Mac ARM
|
9
|
+
- Updated tomoto to 0.12.4
|
10
|
+
|
1
11
|
## 0.3.1 (2023-01-12)
|
2
12
|
|
3
13
|
- Added support for Ruby 3.2
|
data/LICENSE.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
MIT License
|
2
2
|
|
3
3
|
Copyright (c) 2019, bab2min
|
4
|
-
Copyright (c) 2020-
|
4
|
+
Copyright (c) 2020-2023 Andrew Kane
|
5
5
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -12,17 +12,15 @@ Add this line to your application’s Gemfile:
|
|
12
12
|
gem "tomoto"
|
13
13
|
```
|
14
14
|
|
15
|
-
ARM is not currently supported
|
16
|
-
|
17
15
|
## Getting Started
|
18
16
|
|
19
17
|
Train a model
|
20
18
|
|
21
19
|
```ruby
|
22
20
|
model = Tomoto::LDA.new(k: 2)
|
23
|
-
model.add_doc("
|
24
|
-
model.add_doc("
|
25
|
-
model.add_doc("
|
21
|
+
model.add_doc(["tokens", "from", "document", "one"])
|
22
|
+
model.add_doc(["tokens", "from", "document", "two"])
|
23
|
+
model.add_doc(["tokens", "from", "document", "three"])
|
26
24
|
model.train(100) # iterations
|
27
25
|
```
|
28
26
|
|
@@ -78,7 +76,7 @@ model.ll_per_word
|
|
78
76
|
Perform inference for unseen documents
|
79
77
|
|
80
78
|
```ruby
|
81
|
-
doc = model.make_doc("unseen doc")
|
79
|
+
doc = model.make_doc(["unseen", "doc"])
|
82
80
|
topic_dist, ll = model.infer(doc)
|
83
81
|
```
|
84
82
|
|
@@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue.
|
|
114
112
|
- [LDA](examples/lda_basic.rb)
|
115
113
|
- [HDP](examples/hdp_basic.rb)
|
116
114
|
|
117
|
-
## Tokenization
|
118
|
-
|
119
|
-
Documents are tokenized by whitespace by default, or you can perform your own tokenization.
|
120
|
-
|
121
|
-
```ruby
|
122
|
-
model.add_doc(["tokens", "from", "document", "one"])
|
123
|
-
```
|
124
|
-
|
125
115
|
## Performance
|
126
116
|
|
127
117
|
tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
|
data/ext/tomoto/extconf.rb
CHANGED
@@ -3,12 +3,17 @@ require "mkmf-rice"
|
|
3
3
|
$CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
|
4
4
|
|
5
5
|
unless ENV["RUBY_CC_VERSION"]
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
default_optflags =
|
7
|
+
if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
|
8
|
+
# -march=native not supported with Mac ARM
|
9
|
+
""
|
10
|
+
else
|
11
|
+
# AVX-512F not supported yet
|
12
|
+
# https://github.com/bab2min/tomotopy/issues/188
|
13
|
+
"-march=native -mno-avx512f"
|
14
|
+
end
|
15
|
+
|
16
|
+
$CXXFLAGS << " " << with_config("optflags", default_optflags)
|
12
17
|
end
|
13
18
|
|
14
19
|
apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
data/ext/tomoto/llda.cpp
CHANGED
@@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) {
|
|
29
29
|
"topics_per_label",
|
30
30
|
[](tomoto::ILLDAModel& self) {
|
31
31
|
return self.getNumTopicsPerLabel();
|
32
|
+
})
|
33
|
+
.define_method(
|
34
|
+
"topic_label_dict",
|
35
|
+
[](tomoto::ILLDAModel& self) {
|
36
|
+
auto dict = self.getTopicLabelDict();
|
37
|
+
Array res;
|
38
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
39
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
40
|
+
VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
|
41
|
+
Object obj(value);
|
42
|
+
res.push(obj.call("force_encoding", utf8));
|
43
|
+
}
|
44
|
+
return res;
|
32
45
|
});
|
33
46
|
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/tomoto/lda.rb
CHANGED
@@ -24,7 +24,7 @@ module Tomoto
|
|
24
24
|
|
25
25
|
# TODO support multiple docs
|
26
26
|
def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
|
27
|
-
raise "cannot infer with untrained model" unless
|
27
|
+
raise "cannot infer with untrained model" unless trained?
|
28
28
|
_infer(doc, iter, tolerance, workers, to_ps(parallel), together)
|
29
29
|
end
|
30
30
|
|
@@ -86,6 +86,7 @@ module Tomoto
|
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
89
|
+
# TODO raise error if iterations < 1
|
89
90
|
def train(iterations = 10, workers: 0, parallel: :default)
|
90
91
|
prepare
|
91
92
|
_train(iterations, workers, to_ps(parallel))
|
@@ -97,6 +98,10 @@ module Tomoto
|
|
97
98
|
|
98
99
|
private
|
99
100
|
|
101
|
+
def trained?
|
102
|
+
global_step.positive?
|
103
|
+
end
|
104
|
+
|
100
105
|
def prepare
|
101
106
|
unless defined?(@prepared)
|
102
107
|
_prepare(@min_cf, @min_df, @rm_top)
|
data/lib/tomoto/version.rb
CHANGED
data/vendor/EigenRand/README.md
CHANGED
@@ -385,6 +385,10 @@ MIT License
|
|
385
385
|
|
386
386
|
## History
|
387
387
|
|
388
|
+
### 0.4.1 (2022-08-13)
|
389
|
+
* Fixed a bug where double-type generation with std::mt19937 fails compilation.
|
390
|
+
* Fixed a bug where `UniformIntGen` in scalar mode generates numbers in the wrong range.
|
391
|
+
|
388
392
|
### 0.4.0 alpha (2021-09-28)
|
389
393
|
* Now EigenRand supports ARM & ARM64 NEON architecture experimentally. Please report issues about ARM & ARM64 NEON.
|
390
394
|
* Now EigenRand has compatibility to `Eigen 3.4.0`.
|
@@ -305,6 +305,13 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
|
|
305
305
|
|
306
306
|
역사
|
307
307
|
-------
|
308
|
+
* 0.12.4 (2023-01-22)
|
309
|
+
* New features
|
310
|
+
* macOS ARM64 아키텍처에 대한 지원을 추가했습니다.
|
311
|
+
* Bug fixes
|
312
|
+
* `tomotopy.Document.get_sub_topic_dist()`가 bad argument 예외를 발생시키는 문제를 해결했습니다.
|
313
|
+
* 예외 발생이 종종 크래시를 발생시키는 문제를 해결했습니다.
|
314
|
+
|
308
315
|
* 0.12.3 (2022-07-19)
|
309
316
|
* 기능 개선
|
310
317
|
* 이제 `tomotopy.LDAModel.add_doc()`로 빈 문서를 삽입할 경우 예외를 발생시키는 대신 그냥 무시합니다. 새로 추가된 인자인 `ignore_empty_words`를 False로 설정할 경우 이전처럼 예외를 발생시킵니다.
|
data/vendor/tomotopy/README.rst
CHANGED
@@ -309,6 +309,13 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh
|
|
309
309
|
|
310
310
|
History
|
311
311
|
-------
|
312
|
+
* 0.12.4 (2023-01-22)
|
313
|
+
* New features
|
314
|
+
* Added support for macOS ARM64 architecture.
|
315
|
+
* Bug fixes
|
316
|
+
* Fixed an issue where `tomotopy.Document.get_sub_topic_dist()` raises a bad argument exception.
|
317
|
+
* Fixed an issue where exception raising sometimes causes crashes.
|
318
|
+
|
312
319
|
* 0.12.3 (2022-07-19)
|
313
320
|
* New features
|
314
321
|
* Now, inserting an empty document using `tomotopy.LDAModel.add_doc()` just ignores it instead of raising an exception. If the newly added argument `ignore_empty_words` is set to False, an exception is raised as before.
|
@@ -514,3 +521,18 @@ Bundled Libraries and Their License
|
|
514
521
|
|
515
522
|
* Mapbox Variant: `BSD License
|
516
523
|
<licenses_bundled/MapboxVariant>`_
|
524
|
+
|
525
|
+
Citation
|
526
|
+
---------
|
527
|
+
::
|
528
|
+
|
529
|
+
@software{minchul_lee_2022_6868418,
|
530
|
+
author = {Minchul Lee},
|
531
|
+
title = {bab2min/tomotopy: 0.12.3},
|
532
|
+
month = jul,
|
533
|
+
year = 2022,
|
534
|
+
publisher = {Zenodo},
|
535
|
+
version = {v0.12.3},
|
536
|
+
doi = {10.5281/zenodo.6868418},
|
537
|
+
url = {https://doi.org/10.5281/zenodo.6868418}
|
538
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomoto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|