tomoto 0.3.1-x86_64-darwin → 0.3.3-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/LICENSE.txt +1 -1
- data/README.md +4 -14
- data/ext/tomoto/extconf.rb +11 -6
- data/ext/tomoto/llda.cpp +13 -0
- data/lib/tomoto/2.7/tomoto.bundle +0 -0
- data/lib/tomoto/3.0/tomoto.bundle +0 -0
- data/lib/tomoto/3.1/tomoto.bundle +0 -0
- data/lib/tomoto/3.2/tomoto.bundle +0 -0
- data/lib/tomoto/lda.rb +6 -1
- data/lib/tomoto/version.rb +1 -1
- data/vendor/EigenRand/EigenRand/EigenRand +2 -2
- data/vendor/EigenRand/README.md +4 -0
- data/vendor/tomotopy/README.kr.rst +7 -0
- data/vendor/tomotopy/README.rst +22 -0
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b79fb57f7e14e6b483109a2ee2b9905b3ad30c5dc026477494d42238f6c3719d
         | 
| 4 | 
            +
              data.tar.gz: 22f74746b73ad822f1fdd1cf8cabdcc28b995d1d3f18097c90ca2894dadb38f2
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: acbd74efa07f328b5326944bd836bbb55c310a9d4877f645785072bb08aaabf52453b18c2371f70573acb5806f491659460fdb7881d5733bb576b2694727275f
         | 
| 7 | 
            +
              data.tar.gz: cd255e7ce35ef651c1cada54406631b9ddf71d7ee29af66a223bb12053b237e1619e294f2c6ff8a3481eac205a37e1d1e60c0b9d30cd2280c39b6f2c6c32f2ee
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -1,3 +1,13 @@ | |
| 1 | 
            +
            ## 0.3.3 (2023-02-01)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            - Added `topic_label_dict` method to `LLDA`
         | 
| 4 | 
            +
            - Fixed error with `infer` with loaded model
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            ## 0.3.2 (2023-01-22)
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            - Added precompiled gem for Mac ARM
         | 
| 9 | 
            +
            - Updated tomoto to 0.12.4
         | 
| 10 | 
            +
             | 
| 1 11 | 
             
            ## 0.3.1 (2023-01-12)
         | 
| 2 12 |  | 
| 3 13 | 
             
            - Added support for Ruby 3.2
         | 
    
        data/LICENSE.txt
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            MIT License
         | 
| 2 2 |  | 
| 3 3 | 
             
            Copyright (c) 2019, bab2min
         | 
| 4 | 
            -
            Copyright (c) 2020- | 
| 4 | 
            +
            Copyright (c) 2020-2023 Andrew Kane
         | 
| 5 5 |  | 
| 6 6 | 
             
            Permission is hereby granted, free of charge, to any person obtaining a copy
         | 
| 7 7 | 
             
            of this software and associated documentation files (the "Software"), to deal
         | 
    
        data/README.md
    CHANGED
    
    | @@ -12,17 +12,15 @@ Add this line to your application’s Gemfile: | |
| 12 12 | 
             
            gem "tomoto"
         | 
| 13 13 | 
             
            ```
         | 
| 14 14 |  | 
| 15 | 
            -
            ARM is not currently supported
         | 
| 16 | 
            -
             | 
| 17 15 | 
             
            ## Getting Started
         | 
| 18 16 |  | 
| 19 17 | 
             
            Train a model
         | 
| 20 18 |  | 
| 21 19 | 
             
            ```ruby
         | 
| 22 20 | 
             
            model = Tomoto::LDA.new(k: 2)
         | 
| 23 | 
            -
            model.add_doc(" | 
| 24 | 
            -
            model.add_doc(" | 
| 25 | 
            -
            model.add_doc(" | 
| 21 | 
            +
            model.add_doc(["tokens", "from", "document", "one"])
         | 
| 22 | 
            +
            model.add_doc(["tokens", "from", "document", "two"])
         | 
| 23 | 
            +
            model.add_doc(["tokens", "from", "document", "three"])
         | 
| 26 24 | 
             
            model.train(100) # iterations
         | 
| 27 25 | 
             
            ```
         | 
| 28 26 |  | 
| @@ -78,7 +76,7 @@ model.ll_per_word | |
| 78 76 | 
             
            Perform inference for unseen documents
         | 
| 79 77 |  | 
| 80 78 | 
             
            ```ruby
         | 
| 81 | 
            -
            doc = model.make_doc("unseen doc")
         | 
| 79 | 
            +
            doc = model.make_doc(["unseen", "doc"])
         | 
| 82 80 | 
             
            topic_dist, ll = model.infer(doc)
         | 
| 83 81 | 
             
            ```
         | 
| 84 82 |  | 
| @@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue. | |
| 114 112 | 
             
            - [LDA](examples/lda_basic.rb)
         | 
| 115 113 | 
             
            - [HDP](examples/hdp_basic.rb)
         | 
| 116 114 |  | 
| 117 | 
            -
            ## Tokenization
         | 
| 118 | 
            -
             | 
| 119 | 
            -
            Documents are tokenized by whitespace by default, or you can perform your own tokenization.
         | 
| 120 | 
            -
             | 
| 121 | 
            -
            ```ruby
         | 
| 122 | 
            -
            model.add_doc(["tokens", "from", "document", "one"])
         | 
| 123 | 
            -
            ```
         | 
| 124 | 
            -
             | 
| 125 115 | 
             
            ## Performance
         | 
| 126 116 |  | 
| 127 117 | 
             
            tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
         | 
    
        data/ext/tomoto/extconf.rb
    CHANGED
    
    | @@ -3,12 +3,17 @@ require "mkmf-rice" | |
| 3 3 | 
             
            $CXXFLAGS += " -std=c++17 $(optflags) -DEIGEN_MPL2_ONLY"
         | 
| 4 4 |  | 
| 5 5 | 
             
            unless ENV["RUBY_CC_VERSION"]
         | 
| 6 | 
            -
               | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 6 | 
            +
              default_optflags =
         | 
| 7 | 
            +
                if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
         | 
| 8 | 
            +
                  # -march=native not supported with Mac ARM
         | 
| 9 | 
            +
                  ""
         | 
| 10 | 
            +
                else
         | 
| 11 | 
            +
                  # AVX-512F not supported yet
         | 
| 12 | 
            +
                  # https://github.com/bab2min/tomotopy/issues/188
         | 
| 13 | 
            +
                  "-march=native -mno-avx512f"
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              $CXXFLAGS << " " << with_config("optflags", default_optflags)
         | 
| 12 17 | 
             
            end
         | 
| 13 18 |  | 
| 14 19 | 
             
            apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
         | 
    
        data/ext/tomoto/llda.cpp
    CHANGED
    
    | @@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) { | |
| 29 29 | 
             
                  "topics_per_label",
         | 
| 30 30 | 
             
                  [](tomoto::ILLDAModel& self) {
         | 
| 31 31 | 
             
                    return self.getNumTopicsPerLabel();
         | 
| 32 | 
            +
                  })
         | 
| 33 | 
            +
                .define_method(
         | 
| 34 | 
            +
                  "topic_label_dict",
         | 
| 35 | 
            +
                  [](tomoto::ILLDAModel& self) {
         | 
| 36 | 
            +
                    auto dict = self.getTopicLabelDict();
         | 
| 37 | 
            +
                    Array res;
         | 
| 38 | 
            +
                    auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
         | 
| 39 | 
            +
                    for (size_t i = 0; i < dict.size(); i++) {
         | 
| 40 | 
            +
                      VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
         | 
| 41 | 
            +
                      Object obj(value);
         | 
| 42 | 
            +
                      res.push(obj.call("force_encoding", utf8));
         | 
| 43 | 
            +
                    }
         | 
| 44 | 
            +
                    return res;
         | 
| 32 45 | 
             
                  });
         | 
| 33 46 | 
             
            }
         | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
    
        data/lib/tomoto/lda.rb
    CHANGED
    
    | @@ -24,7 +24,7 @@ module Tomoto | |
| 24 24 |  | 
| 25 25 | 
             
                # TODO support multiple docs
         | 
| 26 26 | 
             
                def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
         | 
| 27 | 
            -
                  raise "cannot infer with untrained model" unless  | 
| 27 | 
            +
                  raise "cannot infer with untrained model" unless trained?
         | 
| 28 28 | 
             
                  _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
         | 
| 29 29 | 
             
                end
         | 
| 30 30 |  | 
| @@ -86,6 +86,7 @@ module Tomoto | |
| 86 86 | 
             
                  end
         | 
| 87 87 | 
             
                end
         | 
| 88 88 |  | 
| 89 | 
            +
                # TODO raise error if iterations < 1
         | 
| 89 90 | 
             
                def train(iterations = 10, workers: 0, parallel: :default)
         | 
| 90 91 | 
             
                  prepare
         | 
| 91 92 | 
             
                  _train(iterations, workers, to_ps(parallel))
         | 
| @@ -97,6 +98,10 @@ module Tomoto | |
| 97 98 |  | 
| 98 99 | 
             
                private
         | 
| 99 100 |  | 
| 101 | 
            +
                def trained?
         | 
| 102 | 
            +
                  global_step.positive?
         | 
| 103 | 
            +
                end
         | 
| 104 | 
            +
             | 
| 100 105 | 
             
                def prepare
         | 
| 101 106 | 
             
                  unless defined?(@prepared)
         | 
| 102 107 | 
             
                    _prepare(@min_cf, @min_df, @rm_top)
         | 
    
        data/lib/tomoto/version.rb
    CHANGED
    
    
    
        data/vendor/EigenRand/README.md
    CHANGED
    
    | @@ -385,6 +385,10 @@ MIT License | |
| 385 385 |  | 
| 386 386 | 
             
            ## History
         | 
| 387 387 |  | 
| 388 | 
            +
            ### 0.4.1 (2022-08-13)
         | 
| 389 | 
            +
            * Fixed a bug where double-type generation with std::mt19937 fails compilation.
         | 
| 390 | 
            +
            * Fixed a bug where `UniformIntGen` in scalar mode generates numbers in the wrong range.
         | 
| 391 | 
            +
             | 
| 388 392 | 
             
            ### 0.4.0 alpha (2021-09-28)
         | 
| 389 393 | 
             
            * Now EigenRand supports ARM & ARM64 NEON architecture experimentally. Please report issues about ARM & ARM64 NEON.
         | 
| 390 394 | 
             
            * Now EigenRand has compatibility to `Eigen 3.4.0`.
         | 
| @@ -305,6 +305,13 @@ tomotopy의 Python3 예제 코드는 https://github.com/bab2min/tomotopy/blob/ma | |
| 305 305 |  | 
| 306 306 | 
             
            역사
         | 
| 307 307 | 
             
            -------
         | 
| 308 | 
            +
            * 0.12.4 (2023-01-22)
         | 
| 309 | 
            +
                * New features
         | 
| 310 | 
            +
                    * macOS ARM64 아키텍처에 대한 지원을 추가했습니다.
         | 
| 311 | 
            +
                * Bug fixes
         | 
| 312 | 
            +
                    * `tomotopy.Document.get_sub_topic_dist()`가 bad argument 예외를 발생시키는 문제를 해결했습니다.
         | 
| 313 | 
            +
                    * 예외 발생이 종종 크래시를 발생시키는 문제를 해결했습니다.
         | 
| 314 | 
            +
             | 
| 308 315 | 
             
            * 0.12.3 (2022-07-19)
         | 
| 309 316 | 
             
                * 기능 개선
         | 
| 310 317 | 
             
                    * 이제 `tomotopy.LDAModel.add_doc()`로 빈 문서를 삽입할 경우 예외를 발생시키는 대신 그냥 무시합니다. 새로 추가된 인자인 `ignore_empty_words`를 False로 설정할 경우 이전처럼 예외를 발생시킵니다.
         | 
    
        data/vendor/tomotopy/README.rst
    CHANGED
    
    | @@ -309,6 +309,13 @@ meaning you can use it for any reasonable purpose and remain in complete ownersh | |
| 309 309 |  | 
| 310 310 | 
             
            History
         | 
| 311 311 | 
             
            -------
         | 
| 312 | 
            +
            * 0.12.4 (2023-01-22)
         | 
| 313 | 
            +
                * New features
         | 
| 314 | 
            +
                    * Added support for macOS ARM64 architecture.
         | 
| 315 | 
            +
                * Bug fixes
         | 
| 316 | 
            +
                    * Fixed an issue where `tomotopy.Document.get_sub_topic_dist()` raises a bad argument exception.
         | 
| 317 | 
            +
                    * Fixed an issue where exception raising sometimes causes crashes.
         | 
| 318 | 
            +
             | 
| 312 319 | 
             
            * 0.12.3 (2022-07-19)
         | 
| 313 320 | 
             
                * New features
         | 
| 314 321 | 
             
                    * Now, inserting an empty document using `tomotopy.LDAModel.add_doc()` just ignores it instead of raising an exception. If the newly added argument `ignore_empty_words` is set to False, an exception is raised as before.
         | 
| @@ -514,3 +521,18 @@ Bundled Libraries and Their License | |
| 514 521 |  | 
| 515 522 | 
             
            * Mapbox Variant: `BSD License
         | 
| 516 523 | 
             
              <licenses_bundled/MapboxVariant>`_
         | 
| 524 | 
            +
             | 
| 525 | 
            +
            Citation
         | 
| 526 | 
            +
            ---------
         | 
| 527 | 
            +
            ::
         | 
| 528 | 
            +
             | 
| 529 | 
            +
                @software{minchul_lee_2022_6868418,
         | 
| 530 | 
            +
                  author       = {Minchul Lee},
         | 
| 531 | 
            +
                  title        = {bab2min/tomotopy: 0.12.3},
         | 
| 532 | 
            +
                  month        = jul,
         | 
| 533 | 
            +
                  year         = 2022,
         | 
| 534 | 
            +
                  publisher    = {Zenodo},
         | 
| 535 | 
            +
                  version      = {v0.12.3},
         | 
| 536 | 
            +
                  doi          = {10.5281/zenodo.6868418},
         | 
| 537 | 
            +
                  url          = {https://doi.org/10.5281/zenodo.6868418}
         | 
| 538 | 
            +
                }
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: tomoto
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.3. | 
| 4 | 
            +
              version: 0.3.3
         | 
| 5 5 | 
             
            platform: x86_64-darwin
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2023- | 
| 11 | 
            +
            date: 2023-02-02 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: 
         | 
| 14 14 | 
             
            email: andrew@ankane.org
         |