tomoto 0.3.2-x86_64-linux → 0.4.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ee297a6bc0b0924af7d5eb880daf0922e57635fa24dbced92261c99a5a321330
4
- data.tar.gz: '0163269051abc276e4748337c1ad5d9e92cf563fce54a5b541cc88db5958acef'
3
+ metadata.gz: 8a88c0212592b6c7d2f98c1d0656f23c5fa71f18741f90f811f3adb931bdbb25
4
+ data.tar.gz: 1be2f8b2d06af53e8d4fa453c00e88a1197b6a9beb9fb882594b820d34836967
5
5
  SHA512:
6
- metadata.gz: 73459d05fa990e8a5b44c845b20bc599659db1d8d22dc15dcdd5a28aa99acb40e8d0b338601126b1d39afeeb297e8ec235029dbc6d175df9b4811d9afbf5b997
7
- data.tar.gz: fe0f88f2c4fbbae79674827ef69592fba7a6ea5d08e2d19f357f94a58bbdfae7c9c3c847027fb17710581332624ac67f29088ab3c83ab34139d85d40fcee41f2
6
+ metadata.gz: bcb844982b7b8398b5dbe1c6d52411c03aadfdc44b2b2f8ff17d7faf33ff314d4a61bc31b34d50deea0d4385b90700fc56188c8ed99234672eb14a0817bb93cd
7
+ data.tar.gz: cb5aa7bead319ee64727f4a1b261b7d84a3047b0c4863c38a39268abfa449068d84d2c10517efd06dad9744ad62fe0d96f177654309698aa0bb37f568cd3ba94
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 0.4.0 (2023-12-28)
2
+
3
+ - Added precompiled gem for Linux ARM
4
+ - Updated tomoto to 0.12.7
5
+ - Dropped support for Ruby < 3
6
+
7
+ ## 0.3.3 (2023-02-01)
8
+
9
+ - Added `topic_label_dict` method to `LLDA`
10
+ - Fixed error with `infer` with loaded model
11
+
1
12
  ## 0.3.2 (2023-01-22)
2
13
 
3
14
  - Added precompiled gem for Mac ARM
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2019, bab2min
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2023 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -12,17 +12,15 @@ Add this line to your application’s Gemfile:
12
12
  gem "tomoto"
13
13
  ```
14
14
 
15
- ARM is not currently supported
16
-
17
15
  ## Getting Started
18
16
 
19
17
  Train a model
20
18
 
21
19
  ```ruby
22
20
  model = Tomoto::LDA.new(k: 2)
23
- model.add_doc("text from document one")
24
- model.add_doc("text from document two")
25
- model.add_doc("text from document three")
21
+ model.add_doc(["tokens", "from", "document", "one"])
22
+ model.add_doc(["tokens", "from", "document", "two"])
23
+ model.add_doc(["tokens", "from", "document", "three"])
26
24
  model.train(100) # iterations
27
25
  ```
28
26
 
@@ -78,7 +76,7 @@ model.ll_per_word
78
76
  Perform inference for unseen documents
79
77
 
80
78
  ```ruby
81
- doc = model.make_doc("unseen doc")
79
+ doc = model.make_doc(["unseen", "doc"])
82
80
  topic_dist, ll = model.infer(doc)
83
81
  ```
84
82
 
@@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue.
114
112
  - [LDA](examples/lda_basic.rb)
115
113
  - [HDP](examples/hdp_basic.rb)
116
114
 
117
- ## Tokenization
118
-
119
- Documents are tokenized by whitespace by default, or you can perform your own tokenization.
120
-
121
- ```ruby
122
- model.add_doc(["tokens", "from", "document", "one"])
123
- ```
124
-
125
115
  ## Performance
126
116
 
127
117
  tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
data/ext/tomoto/llda.cpp CHANGED
@@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) {
29
29
  "topics_per_label",
30
30
  [](tomoto::ILLDAModel& self) {
31
31
  return self.getNumTopicsPerLabel();
32
+ })
33
+ .define_method(
34
+ "topic_label_dict",
35
+ [](tomoto::ILLDAModel& self) {
36
+ auto dict = self.getTopicLabelDict();
37
+ Array res;
38
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
39
+ for (size_t i = 0; i < dict.size(); i++) {
40
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
41
+ Object obj(value);
42
+ res.push(obj.call("force_encoding", utf8));
43
+ }
44
+ return res;
32
45
  });
33
46
  }
Binary file
Binary file
Binary file
Binary file
data/lib/tomoto/lda.rb CHANGED
@@ -24,7 +24,7 @@ module Tomoto
24
24
 
25
25
  # TODO support multiple docs
26
26
  def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
27
- raise "cannot infer with untrained model" unless defined?(@prepared)
27
+ raise "cannot infer with untrained model" unless trained?
28
28
  _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
29
29
  end
30
30
 
@@ -86,6 +86,7 @@ module Tomoto
86
86
  end
87
87
  end
88
88
 
89
+ # TODO raise error if iterations < 1
89
90
  def train(iterations = 10, workers: 0, parallel: :default)
90
91
  prepare
91
92
  _train(iterations, workers, to_ps(parallel))
@@ -97,6 +98,10 @@ module Tomoto
97
98
 
98
99
  private
99
100
 
101
+ def trained?
102
+ global_step.positive?
103
+ end
104
+
100
105
  def prepare
101
106
  unless defined?(@prepared)
102
107
  _prepare(@min_cf, @min_df, @rm_top)
@@ -1,3 +1,3 @@
1
1
  module Tomoto
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/tomoto.rb CHANGED
@@ -6,20 +6,20 @@ rescue LoadError
6
6
  end
7
7
 
8
8
  # modules
9
- require "tomoto/ct"
10
- require "tomoto/dmr"
11
- require "tomoto/dt"
12
- require "tomoto/gdmr"
13
- require "tomoto/hdp"
14
- require "tomoto/hlda"
15
- require "tomoto/hpa"
16
- require "tomoto/lda"
17
- require "tomoto/llda"
18
- require "tomoto/mglda"
19
- require "tomoto/pa"
20
- require "tomoto/plda"
21
- require "tomoto/slda"
22
- require "tomoto/version"
9
+ require_relative "tomoto/ct"
10
+ require_relative "tomoto/dmr"
11
+ require_relative "tomoto/dt"
12
+ require_relative "tomoto/gdmr"
13
+ require_relative "tomoto/hdp"
14
+ require_relative "tomoto/hlda"
15
+ require_relative "tomoto/hpa"
16
+ require_relative "tomoto/lda"
17
+ require_relative "tomoto/llda"
18
+ require_relative "tomoto/mglda"
19
+ require_relative "tomoto/pa"
20
+ require_relative "tomoto/plda"
21
+ require_relative "tomoto/slda"
22
+ require_relative "tomoto/version"
23
23
 
24
24
  module Tomoto
25
25
  PARALLEL_SCHEME = [:default, :none, :copy_merge, :partition]
@@ -305,6 +305,23 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
305
305
 
306
306
  역사
307
307
  -------
308
+ * 0.12.7 (2023-12-19)
309
+ * 신규 기능
310
+ * 토픽 모델 뷰어인 `tomotopy.viewer.open_viewer()`가 추가되었습니다.
311
+ * `tomotopy.utils.Corpus.process()`의 속도를 개선했습니다.
312
+ * Bug fixes
313
+ * `Document.span`이 이제 바이트 단위가 아니라 문자 단위로 범위를 제대로 반환합니다.
314
+
315
+ * 0.12.6 (2023-12-11)
316
+ * 신규 기능
317
+ * `tomotopy.LDAModel.train`과 `tomotopy.LDAModel.set_word_prior`에 몇가지 편의 기능을 추가했습니다.
318
+ * `LDAModel.train`가 이제 학습 진행상황을 모니터링할 수 있는 `callback`, `callback_interval`, `show_progres` 인자를 지원합니다.
319
+ * `LDAModel.set_word_prior`가 이제 `prior` 인자로 `Dict[int, float]` 타입도 받을 수 있게 되었습니다.
320
+
321
+ * 0.12.5 (2023-08-03)
322
+ * 신규 기능
323
+ * Linux ARM64 아키텍처에 대한 지원을 추가했습니다.
324
+
308
325
  * 0.12.4 (2023-01-22)
309
326
  * New features
310
327
  * macOS ARM64 아키텍처에 대한 지원을 추가했습니다.
@@ -309,6 +309,23 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh
309
309
 
310
310
  History
311
311
  -------
312
+ * 0.12.7 (2023-12-19)
313
+ * New features
314
+ * Added Topic Model Viewer `tomotopy.viewer.open_viewer()`
315
+ * Optimized the performance of `tomotopy.utils.Corpus.process()`
316
+ * Bug fixes
317
+ * `Document.span` now returns the ranges in character unit, not in byte unit.
318
+
319
+ * 0.12.6 (2023-12-11)
320
+ * New features
321
+ * Added some convenience features to `tomotopy.LDAModel.train` and `tomotopy.LDAModel.set_word_prior`.
322
+ * `LDAModel.train` now has new arguments `callback`, `callback_interval` and `show_progres` to monitor the training progress.
323
+ * `LDAModel.set_word_prior` now can accept `Dict[int, float]` type as its argument `prior`.
324
+
325
+ * 0.12.5 (2023-08-03)
326
+ * New features
327
+ * Added support for Linux ARM64 architecture.
328
+
312
329
  * 0.12.4 (2023-01-22)
313
330
  * New features
314
331
  * Added support for macOS ARM64 architecture.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomoto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-23 00:00:00.000000000 Z
11
+ date: 2023-12-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -36,10 +36,10 @@ files:
36
36
  - ext/tomoto/tomoto.cpp
37
37
  - ext/tomoto/utils.h
38
38
  - lib/tomoto.rb
39
- - lib/tomoto/2.7/tomoto.so
40
39
  - lib/tomoto/3.0/tomoto.so
41
40
  - lib/tomoto/3.1/tomoto.so
42
41
  - lib/tomoto/3.2/tomoto.so
42
+ - lib/tomoto/3.3/tomoto.so
43
43
  - lib/tomoto/ct.rb
44
44
  - lib/tomoto/dmr.rb
45
45
  - lib/tomoto/dt.rb
@@ -124,10 +124,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
124
124
  requirements:
125
125
  - - ">="
126
126
  - !ruby/object:Gem::Version
127
- version: '2.7'
127
+ version: '3.0'
128
128
  - - "<"
129
129
  - !ruby/object:Gem::Version
130
- version: 3.3.dev
130
+ version: 3.4.dev
131
131
  required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  requirements:
133
133
  - - ">="