tomoto 0.3.1-x86_64-darwin → 0.3.3-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3491de664af833b4524812895f15b9939976eb5014dd4812e70f2a497a666d67
4
- data.tar.gz: 65b39aa5e4ff68d68a6f0d11dc3caf62b4d709d29f1f1fb24a4bb4219f3151b5
3
+ metadata.gz: b79fb57f7e14e6b483109a2ee2b9905b3ad30c5dc026477494d42238f6c3719d
4
+ data.tar.gz: 22f74746b73ad822f1fdd1cf8cabdcc28b995d1d3f18097c90ca2894dadb38f2
5
5
  SHA512:
6
- metadata.gz: 2cf24cb5bca2263a28044e8fb6298cd39192f28ff7fdfe085470a45b17ebebbf69c75a12bd1d9c61766998a407bf128838419a73c7646ba4218bd6626ce192bf
7
- data.tar.gz: 6cae693808f346e69f2a7817803b1fe2c2c94af936f8b8929e86f2518976949b401dec159d3ab035c4d6aedec44734521a8763a9dcb7acd83dba696888389612
6
+ metadata.gz: acbd74efa07f328b5326944bd836bbb55c310a9d4877f645785072bb08aaabf52453b18c2371f70573acb5806f491659460fdb7881d5733bb576b2694727275f
7
+ data.tar.gz: cd255e7ce35ef651c1cada54406631b9ddf71d7ee29af66a223bb12053b237e1619e294f2c6ff8a3481eac205a37e1d1e60c0b9d30cd2280c39b6f2c6c32f2ee
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.3.3 (2023-02-01)
2
+
3
+ - Added `topic_label_dict` method to `LLDA`
4
+ - Fixed error with `infer` with loaded model
5
+
6
+ ## 0.3.2 (2023-01-22)
7
+
8
+ - Added precompiled gem for Mac ARM
9
+ - Updated tomoto to 0.12.4
10
+
1
11
  ## 0.3.1 (2023-01-12)
2
12
 
3
13
  - Added support for Ruby 3.2
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2019, bab2min
4
- Copyright (c) 2020-2021 Andrew Kane
4
+ Copyright (c) 2020-2023 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -12,17 +12,15 @@ Add this line to your application’s Gemfile:
12
12
  gem "tomoto"
13
13
  ```
14
14
 
15
- ARM is not currently supported
16
-
17
15
  ## Getting Started
18
16
 
19
17
  Train a model
20
18
 
21
19
  ```ruby
22
20
  model = Tomoto::LDA.new(k: 2)
23
- model.add_doc("text from document one")
24
- model.add_doc("text from document two")
25
- model.add_doc("text from document three")
21
+ model.add_doc(["tokens", "from", "document", "one"])
22
+ model.add_doc(["tokens", "from", "document", "two"])
23
+ model.add_doc(["tokens", "from", "document", "three"])
26
24
  model.train(100) # iterations
27
25
  ```
28
26
 
@@ -78,7 +76,7 @@ model.ll_per_word
78
76
  Perform inference for unseen documents
79
77
 
80
78
  ```ruby
81
- doc = model.make_doc("unseen doc")
79
+ doc = model.make_doc(["unseen", "doc"])
82
80
  topic_dist, ll = model.infer(doc)
83
81
  ```
84
82
 
@@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue.
114
112
  - [LDA](examples/lda_basic.rb)
115
113
  - [HDP](examples/hdp_basic.rb)
116
114
 
117
- ## Tokenization
118
-
119
- Documents are tokenized by whitespace by default, or you can perform your own tokenization.
120
-
121
- ```ruby
122
- model.add_doc(["tokens", "from", "document", "one"])
123
- ```
124
-
125
115
  ## Performance
126
116
 
127
117
  tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
@@ -3,12 +3,17 @@ require "mkmf-rice"
3
3
  $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
4
4
 
5
5
  unless ENV["RUBY_CC_VERSION"]
6
- # ARM not supported yet
7
- # https://github.com/bab2min/tomotopy/issues/170
8
-
9
- # AVX-512F not supported yet
10
- # https://github.com/bab2min/tomotopy/issues/188
11
- $CXXFLAGS << " " << with_config("optflags", "-march=native -mno-avx512f")
6
+ default_optflags =
7
+ if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
8
+ # -march=native not supported with Mac ARM
9
+ ""
10
+ else
11
+ # AVX-512F not supported yet
12
+ # https://github.com/bab2min/tomotopy/issues/188
13
+ "-march=native -mno-avx512f"
14
+ end
15
+
16
+ $CXXFLAGS << " " << with_config("optflags", default_optflags)
12
17
  end
13
18
 
14
19
  apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
data/ext/tomoto/llda.cpp CHANGED
@@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) {
29
29
  "topics_per_label",
30
30
  [](tomoto::ILLDAModel& self) {
31
31
  return self.getNumTopicsPerLabel();
32
+ })
33
+ .define_method(
34
+ "topic_label_dict",
35
+ [](tomoto::ILLDAModel& self) {
36
+ auto dict = self.getTopicLabelDict();
37
+ Array res;
38
+ auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
39
+ for (size_t i = 0; i < dict.size(); i++) {
40
+ VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
41
+ Object obj(value);
42
+ res.push(obj.call("force_encoding", utf8));
43
+ }
44
+ return res;
32
45
  });
33
46
  }
Binary file
Binary file
Binary file
Binary file
data/lib/tomoto/lda.rb CHANGED
@@ -24,7 +24,7 @@ module Tomoto
24
24
 
25
25
  # TODO support multiple docs
26
26
  def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
27
- raise "cannot infer with untrained model" unless defined?(@prepared)
27
+ raise "cannot infer with untrained model" unless trained?
28
28
  _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
29
29
  end
30
30
 
@@ -86,6 +86,7 @@ module Tomoto
86
86
  end
87
87
  end
88
88
 
89
+ # TODO raise error if iterations < 1
89
90
  def train(iterations = 10, workers: 0, parallel: :default)
90
91
  prepare
91
92
  _train(iterations, workers, to_ps(parallel))
@@ -97,6 +98,10 @@ module Tomoto
97
98
 
98
99
  private
99
100
 
101
+ def trained?
102
+ global_step.positive?
103
+ end
104
+
100
105
  def prepare
101
106
  unless defined?(@prepared)
102
107
  _prepare(@min_cf, @min_df, @rm_top)
@@ -1,3 +1,3 @@
1
1
  module Tomoto
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.3"
3
3
  end
@@ -2,8 +2,8 @@
2
2
  * @file EigenRand
3
3
  * @author bab2min (bab2min@gmail.com)
4
4
  * @brief
5
- * @version 0.4.0
6
- * @date 2021-09-17
5
+ * @version 0.4.1
6
+ * @date 2022-08-13
7
7
  *
8
8
  * @copyright Copyright (c) 2020-2021
9
9
  *
@@ -385,6 +385,10 @@ MIT License
385
385
 
386
386
  ## History
387
387
 
388
+ ### 0.4.1 (2022-08-13)
389
+ * Fixed a bug where double-type generation with std::mt19937 fails compilation.
390
+ * Fixed a bug where `UniformIntGen` in scalar mode generates numbers in the wrong range.
391
+
388
392
  ### 0.4.0 alpha (2021-09-28)
389
393
  * Now EigenRand supports ARM & ARM64 NEON architecture experimentally. Please report issues about ARM & ARM64 NEON.
390
394
  * Now EigenRand has compatibility to `Eigen 3.4.0`.
@@ -305,6 +305,13 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma
305
305
 
306
306
  역사
307
307
  -------
308
+ * 0.12.4 (2023-01-22)
309
+ * New features
310
+ * macOS ARM64 아키텍처에 대한 지원을 추가했습니다.
311
+ * Bug fixes
312
+ * `tomotopy.Document.get_sub_topic_dist()`가 bad argument 예외를 발생시키는 문제를 해결했습니다.
313
+ * 예외 발생이 종종 크래시를 발생시키는 문제를 해결했습니다.
314
+
308
315
  * 0.12.3 (2022-07-19)
309
316
  * 기능 개선
310
317
  * 이제 `tomotopy.LDAModel.add_doc()`로 빈 문서를 삽입할 경우 예외를 발생시키는 대신 그냥 무시합니다. 새로 추가된 인자인 `ignore_empty_words`를 False로 설정할 경우 이전처럼 예외를 발생시킵니다.
@@ -309,6 +309,13 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh
309
309
 
310
310
  History
311
311
  -------
312
+ * 0.12.4 (2023-01-22)
313
+ * New features
314
+ * Added support for macOS ARM64 architecture.
315
+ * Bug fixes
316
+ * Fixed an issue where `tomotopy.Document.get_sub_topic_dist()` raises a bad argument exception.
317
+ * Fixed an issue where exception raising sometimes causes crashes.
318
+
312
319
  * 0.12.3 (2022-07-19)
313
320
  * New features
314
321
  * Now, inserting an empty document using `tomotopy.LDAModel.add_doc()` just ignores it instead of raising an exception. If the newly added argument `ignore_empty_words` is set to False, an exception is raised as before.
@@ -514,3 +521,18 @@ Bundled Libraries and Their License
514
521
 
515
522
  * Mapbox Variant: `BSD License
516
523
  <licenses_bundled/MapboxVariant>`_
524
+
525
+ Citation
526
+ ---------
527
+ ::
528
+
529
+ @software{minchul_lee_2022_6868418,
530
+ author = {Minchul Lee},
531
+ title = {bab2min/tomotopy: 0.12.3},
532
+ month = jul,
533
+ year = 2022,
534
+ publisher = {Zenodo},
535
+ version = {v0.12.3},
536
+ doi = {10.5281/zenodo.6868418},
537
+ url = {https://doi.org/10.5281/zenodo.6868418}
538
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tomoto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-13 00:00:00.000000000 Z
11
+ date: 2023-02-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org