tomoto 0.3.2-x86_64-darwin → 0.4.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d119a9cebe3238d7adec0b7599d44be4d0236b8141f926f7d38fbb7cac55b4c
4
- data.tar.gz: 821d52e3399b0d380012c8c9e4baf1d7681d1879363921f1342fa14c427e239e
3
+ metadata.gz: aeb4e2b99fb8418ef17fce29fa3616c880d29717aef2945f74726d6bb2672690
4
+ data.tar.gz: b232190c5d7a9e8179294427f1dc31f32f7ccf817d4a98d6340da4aaa956d155
5
5
  SHA512:
6
- metadata.gz: 4213d5f13f26e2fd41a1f6569fc54b4813315c9e99d03ece9639e6fe7311bb3918fe81d297954589f0ae5e499509816a9b97461b6e1d20c3a8553873799a8e1b
7
- data.tar.gz: aa404ce2e31f8311245916ce022b8f3e6776d4db8cfcda25b9e9314b6bb83e7d85c559985911b2005bdf29e7ab1f82d5bd4379adb5414575767f31e039c8e762
6
+ metadata.gz: 3565cff86b310ca393384eddea2269e1b795a0634d2565126fc53ac1d87189bcce289dd7f107f3d52d8f1873c5f948e1bbeeb13d41877ffe8992c65accb6f196
7
+ data.tar.gz: 8672a0c82706582c494b70eef801234bb3d7b3e08368c5e874eb2112588dec5dbdf300b450333ea6a84c0ebe6fa934512f15d3e5365c44a5eba4b0a101bbb8e3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 0.4.0 (2023-12-28)
2
+
3
+ - Added precompiled gem for Linux ARM
4
+ - Updated tomoto to 0.12.7
5
+ - Dropped support for Ruby < 3
6
+
7
+ ## 0.3.3 (2023-02-01)
8
+
9
+ - Added `topic_label_dict` method to `LLDA`
10
+ - Fixed error with `infer` with loaded model
11
+
1
12
  ## 0.3.2 (2023-01-22)
2
13
 
3
14
  - Added precompiled gem for Mac ARM
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2019, bab2min
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2023 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -12,17 +12,15 @@ Add this line to your application’s Gemfile:
12
12
  gem "tomoto"
13
13
  ```
14
14
 
15
- ARM is not currently supported
16
-
17
15
  ## Getting Started
18
16
 
19
17
  Train a model
20
18
 
21
19
  ```ruby
22
20
  model = Tomoto::LDA.new(k: 2)
23
- model.add_doc("text from document one")
24
- model.add_doc("text from document two")
25
- model.add_doc("text from document three")
21
+ model.add_doc(["tokens", "from", "document", "one"])
22
+ model.add_doc(["tokens", "from", "document", "two"])
23
+ model.add_doc(["tokens", "from", "document", "three"])
26
24
  model.train(100) # iterations
27
25
  ```
28
26
 
@@ -78,7 +76,7 @@ model.ll_per_word
78
76
  Perform inference for unseen documents
79
77
 
80
78
  ```ruby
81
- doc = model.make_doc("unseen doc")
79
+ doc = model.make_doc(["unseen", "doc"])
82
80
  topic_dist, ll = model.infer(doc)
83
81
  ```
84
82
 
@@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue.
114
112
  - [LDA](examples/lda_basic.rb)
115
113
  - [HDP](examples/hdp_basic.rb)
116
114
 
117
- ## Tokenization
118
-
119
- Documents are tokenized by whitespace by default, or you can perform your own tokenization.
120
-
121
- ```ruby
122
- model.add_doc(["tokens", "from", "document", "one"])
123
- ```
124
-
125
115
  ## Performance
126
116
 
127
117
  tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
data/ext/tomoto/llda.cpp CHANGED
@@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) {
29
29
  "topics_per_label",
30
30
  [](tomoto::ILLDAModel& self) {
31
31
  return self.getNumTopicsPerLabel();
32
+ })
33
+ .define_method(
34
+ "topic_label_dict",
35
+ [](tomoto::ILLDAModel& self) {
36
+ auto dict = self.getTopicLabelDict();
37
+ Array res;
38
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
39
+ for (size_t i = 0; i < dict.size(); i++) {
40
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
41
+ Object obj(value);
42
+ res.push(obj.call("force_encoding", utf8));
43
+ }
44
+ return res;
32
45
  });
33
46
  }
Binary file
Binary file
Binary file
Binary file
data/lib/tomoto/lda.rb CHANGED
@@ -24,7 +24,7 @@ module Tomoto
24
24
 
25
25
  # TODO support multiple docs
26
26
  def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
27
- raise "cannot infer with untrained model" unless defined?(@prepared)
27
+ raise "cannot infer with untrained model" unless trained?
28
28
  _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
29
29
  end
30
30
 
@@ -86,6 +86,7 @@ module Tomoto
86
86
  end
87
87
  end
88
88
 
89
+ # TODO raise error if iterations < 1
89
90
  def train(iterations = 10, workers: 0, parallel: :default)
90
91
  prepare
91
92
  _train(iterations, workers, to_ps(parallel))
@@ -97,6 +98,10 @@ module Tomoto
97
98
 
98
99
  private
99
100
 
101
+ def trained?
102
+ global_step.positive?
103
+ end
104
+
100
105
  def prepare
101
106
  unless defined?(@prepared)
102
107
  _prepare(@min_cf, @min_df, @rm_top)
@@ -1,3 +1,3 @@
1
1
  module Tomoto
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/tomoto.rb CHANGED
@@ -6,20 +6,20 @@ rescue LoadError
6
6
  end
7
7
 
8
8
  # modules
9
- require "tomoto/ct"
10
- require "tomoto/dmr"
11
- require "tomoto/dt"
12
- require "tomoto/gdmr"
13
- require "tomoto/hdp"
14
- require "tomoto/hlda"
15
- require "tomoto/hpa"
16
- require "tomoto/lda"
17
- require "tomoto/llda"
18
- require "tomoto/mglda"
19
- require "tomoto/pa"
20
- require "tomoto/plda"
21
- require "tomoto/slda"
22
- require "tomoto/version"
9
+ require_relative "tomoto/ct"
10
+ require_relative "tomoto/dmr"
11
+ require_relative "tomoto/dt"
12
+ require_relative "tomoto/gdmr"
13
+ require_relative "tomoto/hdp"
14
+ require_relative "tomoto/hlda"
15
+ require_relative "tomoto/hpa"
16
+ require_relative "tomoto/lda"
17
+ require_relative "tomoto/llda"
18
+ require_relative "tomoto/mglda"
19
+ require_relative "tomoto/pa"
20
+ require_relative "tomoto/plda"
21
+ require_relative "tomoto/slda"
22
+ require_relative "tomoto/version"
23
23
 
24
24
  module Tomoto
25
25
  PARALLEL_SCHEME = [:default, :none, :copy_merge, :partition]
@@ -305,6 +305,23 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
305
305
 
306
306
  역사
307
307
  -------
308
+ * 0.12.7 (2023-12-19)
309
+ * 신규 기능
310
+ * 토픽 모델 뷰어인 `tomotopy.viewer.open_viewer()`가 추가되었습니다.
311
+ * `tomotopy.utils.Corpus.process()`의 속도를 개선했습니다.
312
+ * Bug fixes
313
+ * `Document.span`이 이제 바이트 단위가 아니라 문자 단위로 범위를 제대로 반환합니다.
314
+
315
+ * 0.12.6 (2023-12-11)
316
+ * 신규 기능
317
+ * `tomotopy.LDAModel.train`과 `tomotopy.LDAModel.set_word_prior`에 몇가지 편의 기능을 추가했습니다.
318
+ * `LDAModel.train`가 이제 학습 진행상황을 모니터링할 수 있는 `callback`, `callback_interval`, `show_progres` 인자를 지원합니다.
319
+ * `LDAModel.set_word_prior`가 이제 `prior` 인자로 `Dict[int, float]` 타입도 받을 수 있게 되었습니다.
320
+
321
+ * 0.12.5 (2023-08-03)
322
+ * 신규 기능
323
+ * Linux ARM64 아키텍처에 대한 지원을 추가했습니다.
324
+
308
325
  * 0.12.4 (2023-01-22)
309
326
  * New features
310
327
  * macOS ARM64 아키텍처에 대한 지원을 추가했습니다.
@@ -309,6 +309,23 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh
309
309
 
310
310
  History
311
311
  -------
312
+ * 0.12.7 (2023-12-19)
313
+ * New features
314
+ * Added Topic Model Viewer `tomotopy.viewer.open_viewer()`
315
+ * Optimized the performance of `tomotopy.utils.Corpus.process()`
316
+ * Bug fixes
317
+ * `Document.span` now returns the ranges in character unit, not in byte unit.
318
+
319
+ * 0.12.6 (2023-12-11)
320
+ * New features
321
+ * Added some convenience features to `tomotopy.LDAModel.train` and `tomotopy.LDAModel.set_word_prior`.
322
+ * `LDAModel.train` now has new arguments `callback`, `callback_interval` and `show_progres` to monitor the training progress.
323
+ * `LDAModel.set_word_prior` now can accept `Dict[int, float]` type as its argument `prior`.
324
+
325
+ * 0.12.5 (2023-08-03)
326
+ * New features
327
+ * Added support for Linux ARM64 architecture.
328
+
312
329
  * 0.12.4 (2023-01-22)
313
330
  * New features
314
331
  * Added support for macOS ARM64 architecture.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomoto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-23 00:00:00.000000000 Z
11
+ date: 2023-12-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -36,10 +36,10 @@ files:
36
36
  - ext/tomoto/tomoto.cpp
37
37
  - ext/tomoto/utils.h
38
38
  - lib/tomoto.rb
39
- - lib/tomoto/2.7/tomoto.bundle
40
39
  - lib/tomoto/3.0/tomoto.bundle
41
40
  - lib/tomoto/3.1/tomoto.bundle
42
41
  - lib/tomoto/3.2/tomoto.bundle
42
+ - lib/tomoto/3.3/tomoto.bundle
43
43
  - lib/tomoto/ct.rb
44
44
  - lib/tomoto/dmr.rb
45
45
  - lib/tomoto/dt.rb
@@ -124,10 +124,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
124
124
  requirements:
125
125
  - - ">="
126
126
  - !ruby/object:Gem::Version
127
- version: '2.7'
127
+ version: '3.0'
128
128
  - - "<"
129
129
  - !ruby/object:Gem::Version
130
- version: 3.3.dev
130
+ version: 3.4.dev
131
131
  required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  requirements:
133
133
  - - ">="