hybridforest 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/Gemfile.lock +1 -1
- data/lib/hybridforest/trees/feature_selectors/random_feature_subspace.rb +1 -0
- data/lib/hybridforest/utilities/utils.rb +38 -26
- data/lib/hybridforest/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b32e830c0e55cd23122b879bae7d42407085026f573d5565522ba64fe71a255e
         | 
| 4 | 
            +
              data.tar.gz: 55c965cbe81e2f2a2e18ebecd21b1027577249f2aa6de4bb0e57b14ab6409f30
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 7fd27d1de1bcc9f1776607475bc3e03731c8053dbd309171265d1eeb1abc1498965b62d251f1f731ea708ea3914372cba1843b73be1acedcb90d417e79cc54bf
         | 
| 7 | 
            +
              data.tar.gz: 75a6a949ccce8e0369966814fdd9a40944a9163dacb82de7a30fad31d54844e5fdc396ea546dbfcad19258e249a60b452973ee3df4e9fe8f07da71d4e888c274
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/Gemfile.lock
    CHANGED
    
    
| @@ -12,17 +12,16 @@ module HybridForest | |
| 12 12 | 
             
                # of independent features and an array of labels. Returns [+training_set+, +testing_set+, +testing_set_labels+]
         | 
| 13 13 | 
             
                #
         | 
| 14 14 | 
             
                def self.train_test_split(dataset, test_set_size = 0.20)
         | 
| 15 | 
            -
                  # TODO:  | 
| 15 | 
            +
                  # TODO: Offer stratify param
         | 
| 16 16 | 
             
                  dataset = to_dataframe(dataset)
         | 
| 17 | 
            +
                  all_rows = (0...dataset.count).to_a
         | 
| 17 18 |  | 
| 18 19 | 
             
                  test_set_count = (dataset.count * test_set_size).floor
         | 
| 19 | 
            -
                   | 
| 20 | 
            -
                  test_set = dataset[ | 
| 21 | 
            -
                  test_set_labels = test_set. | 
| 22 | 
            -
                  test_set.except!(test_set.label)
         | 
| 20 | 
            +
                  test_set_rows = rand_uniq_nums(test_set_count, 0...dataset.count)
         | 
| 21 | 
            +
                  test_set = dataset[test_set_rows]
         | 
| 22 | 
            +
                  test_set, test_set_labels = test_set.disconnect_labels
         | 
| 23 23 |  | 
| 24 | 
            -
                   | 
| 25 | 
            -
                  train_set = dataset[train_set_indices]
         | 
| 24 | 
            +
                  train_set = dataset[all_rows - test_set_rows]
         | 
| 26 25 |  | 
| 27 26 | 
             
                  [train_set, test_set, test_set_labels]
         | 
| 28 27 | 
             
                end
         | 
| @@ -37,20 +36,13 @@ module HybridForest | |
| 37 36 | 
             
                  dataset = to_dataframe(dataset)
         | 
| 38 37 | 
             
                  all_rows = (0...dataset.count).to_a
         | 
| 39 38 |  | 
| 40 | 
            -
                   | 
| 41 | 
            -
                   | 
| 42 | 
            -
                  dataset.count.times do
         | 
| 43 | 
            -
                    row = all_rows.sample
         | 
| 44 | 
            -
                    train_set_rows << row
         | 
| 45 | 
            -
                    train_set.concat(dataset[row])
         | 
| 46 | 
            -
                  end
         | 
| 39 | 
            +
                  train_set_rows = rand_nums(dataset.count, 0...dataset.count)
         | 
| 40 | 
            +
                  train_set = dataset[train_set_rows]
         | 
| 47 41 |  | 
| 48 42 | 
             
                  return train_test_split(dataset) if train_set_rows.sort == all_rows
         | 
| 49 43 |  | 
| 50 | 
            -
                   | 
| 51 | 
            -
                  test_set =  | 
| 52 | 
            -
                  test_set_labels = test_set.class_labels
         | 
| 53 | 
            -
                  test_set.except!(test_set.label)
         | 
| 44 | 
            +
                  test_set = dataset[all_rows - train_set_rows]
         | 
| 45 | 
            +
                  test_set, test_set_labels = test_set.disconnect_labels
         | 
| 54 46 |  | 
| 55 47 | 
             
                  [train_set, test_set, test_set_labels]
         | 
| 56 48 | 
             
                end
         | 
| @@ -88,16 +80,12 @@ module HybridForest | |
| 88 80 | 
             
                def self.random_sample(data:, size:, with_replacement: true)
         | 
| 89 81 | 
             
                  raise ArgumentError, "Invalid sample size" if size < 1 || size > data.count
         | 
| 90 82 |  | 
| 91 | 
            -
                  if with_replacement
         | 
| 92 | 
            -
                     | 
| 93 | 
            -
                    data[rows]
         | 
| 83 | 
            +
                  rows = if with_replacement
         | 
| 84 | 
            +
                    rand_nums(size, 0...data.count)
         | 
| 94 85 | 
             
                  else
         | 
| 95 | 
            -
                     | 
| 96 | 
            -
                    until rows.size == size
         | 
| 97 | 
            -
                      rows << rand(0...data.count)
         | 
| 98 | 
            -
                    end
         | 
| 99 | 
            -
                    data[rows.to_a]
         | 
| 86 | 
            +
                    rand_uniq_nums(size, 0...data.count)
         | 
| 100 87 | 
             
                  end
         | 
| 88 | 
            +
                  data[rows]
         | 
| 101 89 | 
             
                end
         | 
| 102 90 |  | 
| 103 91 | 
             
                # Outputs a report of common prediction metrics.
         | 
| @@ -168,6 +156,12 @@ module HybridForest | |
| 168 156 | 
             
                    def class_labels
         | 
| 169 157 | 
             
                      self[label].to_a
         | 
| 170 158 | 
             
                    end
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                    def disconnect_labels
         | 
| 161 | 
            +
                      labels = class_labels
         | 
| 162 | 
            +
                      except!(label)
         | 
| 163 | 
            +
                      [self, labels]
         | 
| 164 | 
            +
                    end
         | 
| 171 165 | 
             
                  end
         | 
| 172 166 | 
             
                end
         | 
| 173 167 |  | 
| @@ -202,5 +196,23 @@ module HybridForest | |
| 202 196 | 
             
                def false_label?(label)
         | 
| 203 197 | 
             
                  [false, 0].include? label
         | 
| 204 198 | 
             
                end
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                ##
         | 
| 201 | 
            +
                # Returns an array of +n+ random numbers in the exclusive +range+.
         | 
| 202 | 
            +
                def rand_nums(n, range)
         | 
| 203 | 
            +
                  n.times.collect { rand(range) }
         | 
| 204 | 
            +
                end
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                ##
         | 
| 207 | 
            +
                # Returns an array of +n+ _unique_ random numbers in the exclusive +range+.
         | 
| 208 | 
            +
                def rand_uniq_nums(n, range)
         | 
| 209 | 
            +
                  raise ArgumentError if n > range.size
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                  nums = Set.new
         | 
| 212 | 
            +
                  until nums.size == n
         | 
| 213 | 
            +
                    nums << rand(range)
         | 
| 214 | 
            +
                  end
         | 
| 215 | 
            +
                  nums.to_a
         | 
| 216 | 
            +
                end
         | 
| 205 217 | 
             
              end
         | 
| 206 218 | 
             
            end
         | 
    
        data/lib/hybridforest/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: hybridforest
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.11.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - hi-tech-jazz
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2021-12- | 
| 11 | 
            +
            date: 2021-12-30 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rake
         |