tomoto 0.3.2-x86_64-linux → 0.4.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/LICENSE.txt +1 -1
- data/README.md +4 -14
- data/ext/tomoto/llda.cpp +13 -0
- data/lib/tomoto/3.0/tomoto.so +0 -0
- data/lib/tomoto/3.1/tomoto.so +0 -0
- data/lib/tomoto/3.2/tomoto.so +0 -0
- data/lib/tomoto/{2.7 → 3.3}/tomoto.so +0 -0
- data/lib/tomoto/lda.rb +6 -1
- data/lib/tomoto/version.rb +1 -1
- data/lib/tomoto.rb +14 -14
- data/vendor/tomotopy/README.kr.rst +17 -0
- data/vendor/tomotopy/README.rst +17 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a88c0212592b6c7d2f98c1d0656f23c5fa71f18741f90f811f3adb931bdbb25
|
4
|
+
data.tar.gz: 1be2f8b2d06af53e8d4fa453c00e88a1197b6a9beb9fb882594b820d34836967
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bcb844982b7b8398b5dbe1c6d52411c03aadfdc44b2b2f8ff17d7faf33ff314d4a61bc31b34d50deea0d4385b90700fc56188c8ed99234672eb14a0817bb93cd
|
7
|
+
data.tar.gz: cb5aa7bead319ee64727f4a1b261b7d84a3047b0c4863c38a39268abfa449068d84d2c10517efd06dad9744ad62fe0d96f177654309698aa0bb37f568cd3ba94
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## 0.4.0 (2023-12-28)
|
2
|
+
|
3
|
+
- Added precompiled gem for Linux ARM
|
4
|
+
- Updated tomoto to 0.12.7
|
5
|
+
- Dropped support for Ruby < 3
|
6
|
+
|
7
|
+
## 0.3.3 (2023-02-01)
|
8
|
+
|
9
|
+
- Added `topic_label_dict` method to `LLDA`
|
10
|
+
- Fixed error with `infer` with loaded model
|
11
|
+
|
1
12
|
## 0.3.2 (2023-01-22)
|
2
13
|
|
3
14
|
- Added precompiled gem for Mac ARM
|
data/LICENSE.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
MIT License
|
2
2
|
|
3
3
|
Copyright (c) 2019, bab2min
|
4
|
-
Copyright (c) 2020-
|
4
|
+
Copyright (c) 2020-2023 Andrew Kane
|
5
5
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -12,17 +12,15 @@ Add this line to your application’s Gemfile:
|
|
12
12
|
gem "tomoto"
|
13
13
|
```
|
14
14
|
|
15
|
-
ARM is not currently supported
|
16
|
-
|
17
15
|
## Getting Started
|
18
16
|
|
19
17
|
Train a model
|
20
18
|
|
21
19
|
```ruby
|
22
20
|
model = Tomoto::LDA.new(k: 2)
|
23
|
-
model.add_doc("
|
24
|
-
model.add_doc("
|
25
|
-
model.add_doc("
|
21
|
+
model.add_doc(["tokens", "from", "document", "one"])
|
22
|
+
model.add_doc(["tokens", "from", "document", "two"])
|
23
|
+
model.add_doc(["tokens", "from", "document", "three"])
|
26
24
|
model.train(100) # iterations
|
27
25
|
```
|
28
26
|
|
@@ -78,7 +76,7 @@ model.ll_per_word
|
|
78
76
|
Perform inference for unseen documents
|
79
77
|
|
80
78
|
```ruby
|
81
|
-
doc = model.make_doc("unseen doc")
|
79
|
+
doc = model.make_doc(["unseen", "doc"])
|
82
80
|
topic_dist, ll = model.infer(doc)
|
83
81
|
```
|
84
82
|
|
@@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue.
|
|
114
112
|
- [LDA](examples/lda_basic.rb)
|
115
113
|
- [HDP](examples/hdp_basic.rb)
|
116
114
|
|
117
|
-
## Tokenization
|
118
|
-
|
119
|
-
Documents are tokenized by whitespace by default, or you can perform your own tokenization.
|
120
|
-
|
121
|
-
```ruby
|
122
|
-
model.add_doc(["tokens", "from", "document", "one"])
|
123
|
-
```
|
124
|
-
|
125
115
|
## Performance
|
126
116
|
|
127
117
|
tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
|
data/ext/tomoto/llda.cpp
CHANGED
@@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) {
|
|
29
29
|
"topics_per_label",
|
30
30
|
[](tomoto::ILLDAModel& self) {
|
31
31
|
return self.getNumTopicsPerLabel();
|
32
|
+
})
|
33
|
+
.define_method(
|
34
|
+
"topic_label_dict",
|
35
|
+
[](tomoto::ILLDAModel& self) {
|
36
|
+
auto dict = self.getTopicLabelDict();
|
37
|
+
Array res;
|
38
|
+
auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
|
39
|
+
for (size_t i = 0; i < dict.size(); i++) {
|
40
|
+
VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
|
41
|
+
Object obj(value);
|
42
|
+
res.push(obj.call("force_encoding", utf8));
|
43
|
+
}
|
44
|
+
return res;
|
32
45
|
});
|
33
46
|
}
|
data/lib/tomoto/3.0/tomoto.so
CHANGED
Binary file
|
data/lib/tomoto/3.1/tomoto.so
CHANGED
Binary file
|
data/lib/tomoto/3.2/tomoto.so
CHANGED
Binary file
|
Binary file
|
data/lib/tomoto/lda.rb
CHANGED
@@ -24,7 +24,7 @@ module Tomoto
|
|
24
24
|
|
25
25
|
# TODO support multiple docs
|
26
26
|
def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
|
27
|
-
raise "cannot infer with untrained model" unless
|
27
|
+
raise "cannot infer with untrained model" unless trained?
|
28
28
|
_infer(doc, iter, tolerance, workers, to_ps(parallel), together)
|
29
29
|
end
|
30
30
|
|
@@ -86,6 +86,7 @@ module Tomoto
|
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
89
|
+
# TODO raise error if iterations < 1
|
89
90
|
def train(iterations = 10, workers: 0, parallel: :default)
|
90
91
|
prepare
|
91
92
|
_train(iterations, workers, to_ps(parallel))
|
@@ -97,6 +98,10 @@ module Tomoto
|
|
97
98
|
|
98
99
|
private
|
99
100
|
|
101
|
+
def trained?
|
102
|
+
global_step.positive?
|
103
|
+
end
|
104
|
+
|
100
105
|
def prepare
|
101
106
|
unless defined?(@prepared)
|
102
107
|
_prepare(@min_cf, @min_df, @rm_top)
|
data/lib/tomoto/version.rb
CHANGED
data/lib/tomoto.rb
CHANGED
@@ -6,20 +6,20 @@ rescue LoadError
|
|
6
6
|
end
|
7
7
|
|
8
8
|
# modules
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
9
|
+
require_relative "tomoto/ct"
|
10
|
+
require_relative "tomoto/dmr"
|
11
|
+
require_relative "tomoto/dt"
|
12
|
+
require_relative "tomoto/gdmr"
|
13
|
+
require_relative "tomoto/hdp"
|
14
|
+
require_relative "tomoto/hlda"
|
15
|
+
require_relative "tomoto/hpa"
|
16
|
+
require_relative "tomoto/lda"
|
17
|
+
require_relative "tomoto/llda"
|
18
|
+
require_relative "tomoto/mglda"
|
19
|
+
require_relative "tomoto/pa"
|
20
|
+
require_relative "tomoto/plda"
|
21
|
+
require_relative "tomoto/slda"
|
22
|
+
require_relative "tomoto/version"
|
23
23
|
|
24
24
|
module Tomoto
|
25
25
|
PARALLEL_SCHEME = [:default, :none, :copy_merge, :partition]
|
@@ -305,6 +305,23 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
|
|
305
305
|
|
306
306
|
역사
|
307
307
|
-------
|
308
|
+
* 0.12.7 (2023-12-19)
|
309
|
+
* 신규 기능
|
310
|
+
* 토픽 모델 뷰어인 `tomotopy.viewer.open_viewer()`가 추가되었습니다.
|
311
|
+
* `tomotopy.utils.Corpus.process()`의 속도를 개선했습니다.
|
312
|
+
* Bug fixes
|
313
|
+
* `Document.span`이 이제 바이트 단위가 아니라 문자 단위로 범위를 제대로 반환합니다.
|
314
|
+
|
315
|
+
* 0.12.6 (2023-12-11)
|
316
|
+
* 신규 기능
|
317
|
+
* `tomotopy.LDAModel.train`과 `tomotopy.LDAModel.set_word_prior`에 몇가지 편의 기능을 추가했습니다.
|
318
|
+
* `LDAModel.train`가 이제 학습 진행상황을 모니터링할 수 있는 `callback`, `callback_interval`, `show_progres` 인자를 지원합니다.
|
319
|
+
* `LDAModel.set_word_prior`가 이제 `prior` 인자로 `Dict[int, float]` 타입도 받을 수 있게 되었습니다.
|
320
|
+
|
321
|
+
* 0.12.5 (2023-08-03)
|
322
|
+
* 신규 기능
|
323
|
+
* Linux ARM64 아키텍처에 대한 지원을 추가했습니다.
|
324
|
+
|
308
325
|
* 0.12.4 (2023-01-22)
|
309
326
|
* New features
|
310
327
|
* macOS ARM64 아키텍처에 대한 지원을 추가했습니다.
|
data/vendor/tomotopy/README.rst
CHANGED
@@ -309,6 +309,23 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh
|
|
309
309
|
|
310
310
|
History
|
311
311
|
-------
|
312
|
+
* 0.12.7 (2023-12-19)
|
313
|
+
* New features
|
314
|
+
* Added Topic Model Viewer `tomotopy.viewer.open_viewer()`
|
315
|
+
* Optimized the performance of `tomotopy.utils.Corpus.process()`
|
316
|
+
* Bug fixes
|
317
|
+
* `Document.span` now returns the ranges in character unit, not in byte unit.
|
318
|
+
|
319
|
+
* 0.12.6 (2023-12-11)
|
320
|
+
* New features
|
321
|
+
* Added some convenience features to `tomotopy.LDAModel.train` and `tomotopy.LDAModel.set_word_prior`.
|
322
|
+
* `LDAModel.train` now has new arguments `callback`, `callback_interval` and `show_progres` to monitor the training progress.
|
323
|
+
* `LDAModel.set_word_prior` now can accept `Dict[int, float]` type as its argument `prior`.
|
324
|
+
|
325
|
+
* 0.12.5 (2023-08-03)
|
326
|
+
* New features
|
327
|
+
* Added support for Linux ARM64 architecture.
|
328
|
+
|
312
329
|
* 0.12.4 (2023-01-22)
|
313
330
|
* New features
|
314
331
|
* Added support for macOS ARM64 architecture.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomoto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -36,10 +36,10 @@ files:
|
|
36
36
|
- ext/tomoto/tomoto.cpp
|
37
37
|
- ext/tomoto/utils.h
|
38
38
|
- lib/tomoto.rb
|
39
|
-
- lib/tomoto/2.7/tomoto.so
|
40
39
|
- lib/tomoto/3.0/tomoto.so
|
41
40
|
- lib/tomoto/3.1/tomoto.so
|
42
41
|
- lib/tomoto/3.2/tomoto.so
|
42
|
+
- lib/tomoto/3.3/tomoto.so
|
43
43
|
- lib/tomoto/ct.rb
|
44
44
|
- lib/tomoto/dmr.rb
|
45
45
|
- lib/tomoto/dt.rb
|
@@ -124,10 +124,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
124
124
|
requirements:
|
125
125
|
- - ">="
|
126
126
|
- !ruby/object:Gem::Version
|
127
|
-
version: '
|
127
|
+
version: '3.0'
|
128
128
|
- - "<"
|
129
129
|
- !ruby/object:Gem::Version
|
130
|
-
version: 3.
|
130
|
+
version: 3.4.dev
|
131
131
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
132
|
requirements:
|
133
133
|
- - ">="
|