tomoto 0.3.2-x86_64-darwin → 0.3.3-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +1 -1
- data/README.md +4 -14
- data/ext/tomoto/llda.cpp +13 -0
- data/lib/tomoto/2.7/tomoto.bundle +0 -0
- data/lib/tomoto/3.0/tomoto.bundle +0 -0
- data/lib/tomoto/3.1/tomoto.bundle +0 -0
- data/lib/tomoto/3.2/tomoto.bundle +0 -0
- data/lib/tomoto/lda.rb +6 -1
- data/lib/tomoto/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b79fb57f7e14e6b483109a2ee2b9905b3ad30c5dc026477494d42238f6c3719d
         | 
| 4 | 
            +
              data.tar.gz: 22f74746b73ad822f1fdd1cf8cabdcc28b995d1d3f18097c90ca2894dadb38f2
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: acbd74efa07f328b5326944bd836bbb55c310a9d4877f645785072bb08aaabf52453b18c2371f70573acb5806f491659460fdb7881d5733bb576b2694727275f
         | 
| 7 | 
            +
              data.tar.gz: cd255e7ce35ef651c1cada54406631b9ddf71d7ee29af66a223bb12053b237e1619e294f2c6ff8a3481eac205a37e1d1e60c0b9d30cd2280c39b6f2c6c32f2ee
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/LICENSE.txt
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            MIT License
         | 
| 2 2 |  | 
| 3 3 | 
             
            Copyright (c) 2019, bab2min
         | 
| 4 | 
            -
            Copyright (c) 2020- | 
| 4 | 
            +
            Copyright (c) 2020-2023 Andrew Kane
         | 
| 5 5 |  | 
| 6 6 | 
             
            Permission is hereby granted, free of charge, to any person obtaining a copy
         | 
| 7 7 | 
             
            of this software and associated documentation files (the "Software"), to deal
         | 
    
        data/README.md
    CHANGED
    
    | @@ -12,17 +12,15 @@ Add this line to your application’s Gemfile: | |
| 12 12 | 
             
            gem "tomoto"
         | 
| 13 13 | 
             
            ```
         | 
| 14 14 |  | 
| 15 | 
            -
            ARM is not currently supported
         | 
| 16 | 
            -
             | 
| 17 15 | 
             
            ## Getting Started
         | 
| 18 16 |  | 
| 19 17 | 
             
            Train a model
         | 
| 20 18 |  | 
| 21 19 | 
             
            ```ruby
         | 
| 22 20 | 
             
            model = Tomoto::LDA.new(k: 2)
         | 
| 23 | 
            -
            model.add_doc(" | 
| 24 | 
            -
            model.add_doc(" | 
| 25 | 
            -
            model.add_doc(" | 
| 21 | 
            +
            model.add_doc(["tokens", "from", "document", "one"])
         | 
| 22 | 
            +
            model.add_doc(["tokens", "from", "document", "two"])
         | 
| 23 | 
            +
            model.add_doc(["tokens", "from", "document", "three"])
         | 
| 26 24 | 
             
            model.train(100) # iterations
         | 
| 27 25 | 
             
            ```
         | 
| 28 26 |  | 
| @@ -78,7 +76,7 @@ model.ll_per_word | |
| 78 76 | 
             
            Perform inference for unseen documents
         | 
| 79 77 |  | 
| 80 78 | 
             
            ```ruby
         | 
| 81 | 
            -
            doc = model.make_doc("unseen doc")
         | 
| 79 | 
            +
            doc = model.make_doc(["unseen", "doc"])
         | 
| 82 80 | 
             
            topic_dist, ll = model.infer(doc)
         | 
| 83 81 | 
             
            ```
         | 
| 84 82 |  | 
| @@ -114,14 +112,6 @@ If a method or option you need isn’t supported, feel free to open an issue. | |
| 114 112 | 
             
            - [LDA](examples/lda_basic.rb)
         | 
| 115 113 | 
             
            - [HDP](examples/hdp_basic.rb)
         | 
| 116 114 |  | 
| 117 | 
            -
            ## Tokenization
         | 
| 118 | 
            -
             | 
| 119 | 
            -
            Documents are tokenized by whitespace by default, or you can perform your own tokenization.
         | 
| 120 | 
            -
             | 
| 121 | 
            -
            ```ruby
         | 
| 122 | 
            -
            model.add_doc(["tokens", "from", "document", "one"])
         | 
| 123 | 
            -
            ```
         | 
| 124 | 
            -
             | 
| 125 115 | 
             
            ## Performance
         | 
| 126 116 |  | 
| 127 117 | 
             
            tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check which instruction set architecture it’s using with:
         | 
    
        data/ext/tomoto/llda.cpp
    CHANGED
    
    | @@ -29,5 +29,18 @@ void init_llda(Rice::Module& m) { | |
| 29 29 | 
             
                  "topics_per_label",
         | 
| 30 30 | 
             
                  [](tomoto::ILLDAModel& self) {
         | 
| 31 31 | 
             
                    return self.getNumTopicsPerLabel();
         | 
| 32 | 
            +
                  })
         | 
| 33 | 
            +
                .define_method(
         | 
| 34 | 
            +
                  "topic_label_dict",
         | 
| 35 | 
            +
                  [](tomoto::ILLDAModel& self) {
         | 
| 36 | 
            +
                    auto dict = self.getTopicLabelDict();
         | 
| 37 | 
            +
                    Array res;
         | 
| 38 | 
            +
                    auto utf8 = Rice::Class(rb_cEncoding).call("const_get", "UTF_8");
         | 
| 39 | 
            +
                    for (size_t i = 0; i < dict.size(); i++) {
         | 
| 40 | 
            +
                      VALUE value = Rice::detail::To_Ruby<std::string>().convert(dict.toWord(i));
         | 
| 41 | 
            +
                      Object obj(value);
         | 
| 42 | 
            +
                      res.push(obj.call("force_encoding", utf8));
         | 
| 43 | 
            +
                    }
         | 
| 44 | 
            +
                    return res;
         | 
| 32 45 | 
             
                  });
         | 
| 33 46 | 
             
            }
         | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
    
        data/lib/tomoto/lda.rb
    CHANGED
    
    | @@ -24,7 +24,7 @@ module Tomoto | |
| 24 24 |  | 
| 25 25 | 
             
                # TODO support multiple docs
         | 
| 26 26 | 
             
                def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
         | 
| 27 | 
            -
                  raise "cannot infer with untrained model" unless  | 
| 27 | 
            +
                  raise "cannot infer with untrained model" unless trained?
         | 
| 28 28 | 
             
                  _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
         | 
| 29 29 | 
             
                end
         | 
| 30 30 |  | 
| @@ -86,6 +86,7 @@ module Tomoto | |
| 86 86 | 
             
                  end
         | 
| 87 87 | 
             
                end
         | 
| 88 88 |  | 
| 89 | 
            +
                # TODO raise error if iterations < 1
         | 
| 89 90 | 
             
                def train(iterations = 10, workers: 0, parallel: :default)
         | 
| 90 91 | 
             
                  prepare
         | 
| 91 92 | 
             
                  _train(iterations, workers, to_ps(parallel))
         | 
| @@ -97,6 +98,10 @@ module Tomoto | |
| 97 98 |  | 
| 98 99 | 
             
                private
         | 
| 99 100 |  | 
| 101 | 
            +
                def trained?
         | 
| 102 | 
            +
                  global_step.positive?
         | 
| 103 | 
            +
                end
         | 
| 104 | 
            +
             | 
| 100 105 | 
             
                def prepare
         | 
| 101 106 | 
             
                  unless defined?(@prepared)
         | 
| 102 107 | 
             
                    _prepare(@min_cf, @min_df, @rm_top)
         | 
    
        data/lib/tomoto/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: tomoto
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.3. | 
| 4 | 
            +
              version: 0.3.3
         | 
| 5 5 | 
             
            platform: x86_64-darwin
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2023- | 
| 11 | 
            +
            date: 2023-02-02 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: 
         | 
| 14 14 | 
             
            email: andrew@ankane.org
         |