tomoto 0.3.1-x86_64-darwin → 0.3.3-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3491de664af833b4524812895f15b9939976eb5014dd4812e70f2a497a666d67
4
- data.tar.gz: 65b39aa5e4ff68d68a6f0d11dc3caf62b4d709d29f1f1fb24a4bb4219f3151b5
3
+ metadata.gz: b79fb57f7e14e6b483109a2ee2b9905b3ad30c5dc026477494d42238f6c3719d
4
+ data.tar.gz: 22f74746b73ad822f1fdd1cf8cabdcc28b995d1d3f18097c90ca2894dadb38f2
5
5
  SHA512:
6
- metadata.gz: 2cf24cb5bca2263a28044e8fb6298cd39192f28ff7fdfe085470a45b17ebebbf69c75a12bd1d9c61766998a407bf128838419a73c7646ba4218bd6626ce192bf
7
- data.tar.gz: 6cae693808f346e69f2a7817803b1fe2c2c94af936f8b8929e86f2518976949b401dec159d3ab035c4d6aedec44734521a8763a9dcb7acd83dba696888389612
6
+ metadata.gz: acbd74efa07f328b5326944bd836bbb55c310a9d4877f645785072bb08aaabf52453b18c2371f70573acb5806f491659460fdb7881d5733bb576b2694727275f
7
+ data.tar.gz: cd255e7ce35ef651c1cada54406631b9ddf71d7ee29af66a223bb12053b237e1619e294f2c6ff8a3481eac205a37e1d1e60c0b9d30cd2280c39b6f2c6c32f2ee
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.3.3 (2023-02-01)
2
+
3
+ - Added `topic_label_dict` method to `LLDA`
4
+ - Fixed error with `infer` with loaded model
5
+
6
+ ## 0.3.2 (2023-01-22)
7
+
8
+ - Added precompiled gem for Mac ARM
9
+ - Updated tomoto to 0.12.4
10
+
1
11
  ## 0.3.1 (2023-01-12)
2
12
 
3
13
  - Added support for Ruby 3.2
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2019, bab2min
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2023 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -12,17 +12,15 @@ Add this line to your application’s Gemfile:
12
12
  gem "tomoto"
13
13
  ```
14
14
 
15
- ARM is not currently supported
16
-
17
15
  ## Getting Started
18
16
 
19
17
  Train a model
20
18
 
21
19
  ```ruby
22
20
  model = Tomoto::LDA.new(k: 2)
23
- model.add_doc("text from document one")
24
- model.add_doc("text from document two")
25
- model.add_doc("text from document three")
21
+ model.add_doc(["tokens", "from", "document", "one"])
22
+ model.add_doc(["tokens", "from", "document", "two"])
23
+ model.add_doc(["tokens", "from", "document", "three"])
26
24
  model.train(100) # iterations
27
25
  ```
28
26
 
@@ -78,7 +76,7 @@ model.ll_per_word
78
76
  Perform inference for unseen documents
79
77
 
80
78
  ```ruby
81
- doc = model.make_doc("unseen doc")
79
+ doc = model.make_doc(["unseen", "doc"])
82
80
  topic_dist, ll = model.infer(doc)
83
81
  ```
84
82
 
@@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue.
114
112
  - [LDA](examples/lda_basic.rb)
115
113
  - [HDP](examples/hdp_basic.rb)
116
114
 
117
- ## Tokenization
118
-
119
- Documents are tokenized by whitespace by default, or you can perform your own tokenization.
120
-
121
- ```ruby
122
- model.add_doc(["tokens", "from", "document", "one"])
123
- ```
124
-
125
115
  ## Performance
126
116
 
127
117
  tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
@@ -3,12 +3,17 @@ require "mkmf-rice"
3
3
  $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
4
4
 
5
5
  unless ENV["RUBY_CC_VERSION"]
6
- # ARM not supported yet
7
- # https://github.com/bab2min/tomotopy/issues/170
8
-
9
- # AVX-512F not supported yet
10
- # https://github.com/bab2min/tomotopy/issues/188
11
- $CXXFLAGS << " " << with_config("optflags", "-march=native -mno-avx512f")
6
+ default_optflags =
7
+ if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
8
+ # -march=native not supported with Mac ARM
9
+ ""
10
+ else
11
+ # AVX-512F not supported yet
12
+ # https://github.com/bab2min/tomotopy/issues/188
13
+ "-march=native -mno-avx512f"
14
+ end
15
+
16
+ $CXXFLAGS << " " << with_config("optflags", default_optflags)
12
17
  end
13
18
 
14
19
  apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
data/ext/tomoto/llda.cpp CHANGED
@@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) {
29
29
  "topics_per_label",
30
30
  [](tomoto::ILLDAModel& self) {
31
31
  return self.getNumTopicsPerLabel();
32
+ })
33
+ .define_method(
34
+ "topic_label_dict",
35
+ [](tomoto::ILLDAModel& self) {
36
+ auto dict = self.getTopicLabelDict();
37
+ Array res;
38
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
39
+ for (size_t i = 0; i < dict.size(); i++) {
40
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
41
+ Object obj(value);
42
+ res.push(obj.call("force_encoding", utf8));
43
+ }
44
+ return res;
32
45
  });
33
46
  }
Binary file
Binary file
Binary file
Binary file
data/lib/tomoto/lda.rb CHANGED
@@ -24,7 +24,7 @@ module Tomoto
24
24
 
25
25
  # TODO support multiple docs
26
26
  def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
27
- raise "cannot infer with untrained model" unless defined?(@prepared)
27
+ raise "cannot infer with untrained model" unless trained?
28
28
  _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
29
29
  end
30
30
 
@@ -86,6 +86,7 @@ module Tomoto
86
86
  end
87
87
  end
88
88
 
89
+ # TODO raise error if iterations < 1
89
90
  def train(iterations = 10, workers: 0, parallel: :default)
90
91
  prepare
91
92
  _train(iterations, workers, to_ps(parallel))
@@ -97,6 +98,10 @@ module Tomoto
97
98
 
98
99
  private
99
100
 
101
+ def trained?
102
+ global_step.positive?
103
+ end
104
+
100
105
  def prepare
101
106
  unless defined?(@prepared)
102
107
  _prepare(@min_cf, @min_df, @rm_top)
@@ -1,3 +1,3 @@
1
1
  module Tomoto
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.3"
3
3
  end
@@ -2,8 +2,8 @@
2
2
  * @file EigenRand
3
3
  * @author bab2min (bab2min@gmail.com)
4
4
  * @brief
5
- * @version 0.4.0
6
- * @date 2021-09-17
5
+ * @version 0.4.1
6
+ * @date 2022-08-13
7
7
  *
8
8
  * @copyright Copyright (c) 2020-2021
9
9
  *
@@ -385,6 +385,10 @@ MIT License
385
385
 
386
386
  ## History
387
387
 
388
+ ### 0.4.1 (2022-08-13)
389
+ * Fixed a bug where double-type generation with std::mt19937 fails compilation.
390
+ * Fixed a bug where `UniformIntGen` in scalar mode generates numbers in the wrong range.
391
+
388
392
  ### 0.4.0 alpha (2021-09-28)
389
393
  * Now EigenRand supports ARM & ARM64 NEON architecture experimentally. Please report issues about ARM & ARM64 NEON.
390
394
  * Now EigenRand has compatibility to `Eigen 3.4.0`.
@@ -305,6 +305,13 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
305
305
 
306
306
  역사
307
307
  -------
308
+ * 0.12.4 (2023-01-22)
309
+ * New features
310
+ * macOS ARM64 아키텍처에 대한 지원을 추가했습니다.
311
+ * Bug fixes
312
+ * `tomotopy.Document.get_sub_topic_dist()`가 bad argument 예외를 발생시키는 문제를 해결했습니다.
313
+ * 예외 발생이 종종 크래시를 발생시키는 문제를 해결했습니다.
314
+
308
315
  * 0.12.3 (2022-07-19)
309
316
  * 기능 개선
310
317
  * 이제 `tomotopy.LDAModel.add_doc()`로 빈 문서를 삽입할 경우 예외를 발생시키는 대신 그냥 무시합니다. 새로 추가된 인자인 `ignore_empty_words`를 False로 설정할 경우 이전처럼 예외를 발생시킵니다.
@@ -309,6 +309,13 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh
309
309
 
310
310
  History
311
311
  -------
312
+ * 0.12.4 (2023-01-22)
313
+ * New features
314
+ * Added support for macOS ARM64 architecture.
315
+ * Bug fixes
316
+ * Fixed an issue where `tomotopy.Document.get_sub_topic_dist()` raises a bad argument exception.
317
+ * Fixed an issue where exception raising sometimes causes crashes.
318
+
312
319
  * 0.12.3 (2022-07-19)
313
320
  * New features
314
321
  * Now, inserting an empty document using `tomotopy.LDAModel.add_doc()` just ignores it instead of raising an exception. If the newly added argument `ignore_empty_words` is set to False, an exception is raised as before.
@@ -514,3 +521,18 @@ Bundled Libraries and Their License
514
521
 
515
522
  * Mapbox Variant: `BSD License
516
523
  <licenses_bundled/MapboxVariant>`_
524
+
525
+ Citation
526
+ ---------
527
+ ::
528
+
529
+ @software{minchul_lee_2022_6868418,
530
+ author = {Minchul Lee},
531
+ title = {bab2min/tomotopy: 0.12.3},
532
+ month = jul,
533
+ year = 2022,
534
+ publisher = {Zenodo},
535
+ version = {v0.12.3},
536
+ doi = {10.5281/zenodo.6868418},
537
+ url = {https://doi.org/10.5281/zenodo.6868418}
538
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomoto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-13 00:00:00.000000000 Z
11
+ date: 2023-02-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org