ix-cli 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ix-string-similarity +161 -108
- metadata +1 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 0c5aadde615cdfa81843168a63e691e29be91832d247cb01c769ac6f0be4cf5f
         | 
| 4 | 
            +
              data.tar.gz: 178a6b48491c5e5d627390595c60bef6b17c37d150ce02f8ccf59070666b8e2f
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 34a177748f513b36e85a425f02865b2c7803b1d2c59abce5cd79aae31bf7ff992adffdf3e7d4277c9312c6aa90f209fb662dde35cc2752366de0c6c7c14c90ea
         | 
| 7 | 
            +
              data.tar.gz: 6b79ddb512d93d28444592dd107076eae1e9c24b0b4da4c9f8c4d8fda5a0be46a69a48d2de0b3afd55bf8f741cc15437137908f736d13c2348581cf43d016c21
         | 
    
        data/bin/ix-string-similarity
    CHANGED
    
    | @@ -1,152 +1,205 @@ | |
| 1 1 | 
             
            #!/usr/bin/env ruby
         | 
| 2 2 |  | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 3 | 
            +
            class Chunk
         | 
| 4 | 
            +
              attr_accessor :char_1
         | 
| 5 | 
            +
              attr_accessor :char_2
         | 
| 6 | 
            +
              attr_accessor :index_1
         | 
| 7 | 
            +
              attr_accessor :index_2
         | 
| 8 | 
            +
             | 
| 9 | 
            +
              def initialize(c1, c2, i1, i2)
         | 
| 10 | 
            +
                @char_1 = c1
         | 
| 11 | 
            +
                @char_2 = c2
         | 
| 12 | 
            +
                @index_1 = i1
         | 
| 13 | 
            +
                @index_2 = i2
         | 
| 14 | 
            +
              end
         | 
| 5 15 |  | 
| 6 | 
            -
             | 
| 16 | 
            +
              def to_s
         | 
| 17 | 
            +
                "#{char_1} #{char_2} #{index_1} #{index_2}"
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
            end
         | 
| 7 20 |  | 
| 8 | 
            -
             | 
| 21 | 
            +
            class Similarity
         | 
| 22 | 
            +
              CHAR_REGEX = /./
         | 
| 9 23 |  | 
| 10 | 
            -
               | 
| 11 | 
            -
               | 
| 12 | 
            -
              opts.separator "Usage: #{File.basename($0)} [OPTIONS]"
         | 
| 13 | 
            -
              opts.separator ''
         | 
| 24 | 
            +
              attr_accessor :string_1
         | 
| 25 | 
            +
              attr_accessor :string_2
         | 
| 14 26 |  | 
| 15 | 
            -
               | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 27 | 
            +
              def initialize(string_1, string_2)
         | 
| 28 | 
            +
                @string_1 = string_1
         | 
| 29 | 
            +
                @string_2 = string_2
         | 
| 30 | 
            +
              end
         | 
| 18 31 |  | 
| 19 | 
            -
               | 
| 20 | 
            -
             | 
| 21 | 
            -
                 | 
| 32 | 
            +
              def tokens
         | 
| 33 | 
            +
                chunks = []
         | 
| 34 | 
            +
                string_1.scan(CHAR_REGEX).each_with_index do |char_1, index_1|
         | 
| 35 | 
            +
                  string_2.scan(CHAR_REGEX).each_with_index do |char_2, index_2|
         | 
| 36 | 
            +
                    next if char_1 != char_2
         | 
| 37 | 
            +
                    chunks.push(Chunk.new(char_1, char_2, index_1, index_2))
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
                chunks
         | 
| 22 41 | 
             
              end
         | 
| 23 42 |  | 
| 24 | 
            -
               | 
| 25 | 
            -
             | 
| 26 | 
            -
                 | 
| 43 | 
            +
              def count
         | 
| 44 | 
            +
                counter = 0
         | 
| 45 | 
            +
                prev = false
         | 
| 46 | 
            +
                tokens.each_with_index do |chunk, index|
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                  unless prev
         | 
| 49 | 
            +
                    prev = chunk.index_1
         | 
| 50 | 
            +
                    next
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  if prev == (chunk.index_1 - 1)
         | 
| 54 | 
            +
                    counter += 1
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                  prev = chunk.index_1
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                counter
         | 
| 27 61 | 
             
              end
         | 
| 28 62 |  | 
| 29 | 
            -
               | 
| 30 | 
            -
             | 
| 31 | 
            -
                 | 
| 63 | 
            +
              def score
         | 
| 64 | 
            +
                desired = (string_1.size + string_2.size) / 2
         | 
| 65 | 
            +
                size_thresh = ([string_1.size, string_2.size].sort.first.to_f / desired)
         | 
| 66 | 
            +
                compatibility_thresh = (count.to_f + 1) / string_1.size
         | 
| 67 | 
            +
                (size_thresh + compatibility_thresh).to_f / 2
         | 
| 32 68 | 
             
              end
         | 
| 69 | 
            +
            end
         | 
| 33 70 |  | 
| 71 | 
            +
            require 'optparse'
         | 
| 34 72 |  | 
| 35 | 
            -
             | 
| 73 | 
            +
            options = {}
         | 
| 74 | 
            +
            options[:threshold] = 0.8
         | 
| 75 | 
            +
             | 
| 76 | 
            +
            OptionParser.new do |opts|
         | 
| 77 | 
            +
             | 
| 78 | 
            +
              opts.banner = "Usage: #{$0} [OPTIONS]"
         | 
| 79 | 
            +
             | 
| 80 | 
            +
              opts.on('-t', '--threshold [NUMBER]', 'Threshold default value is 0.8.') do |value|
         | 
| 81 | 
            +
                options[:threshold] = value.to_f
         | 
| 82 | 
            +
              end
         | 
| 36 83 |  | 
| 37 84 | 
             
            end.parse!
         | 
| 38 85 |  | 
| 39 | 
            -
             | 
| 86 | 
            +
            required_options = [:threshold]
         | 
| 87 | 
            +
            required_options.each do |option|
         | 
| 88 | 
            +
              unless options[option]
         | 
| 89 | 
            +
                $stderr.puts "Can not run #{option.to_s} was not given."
         | 
| 90 | 
            +
                exit 1
         | 
| 91 | 
            +
              end
         | 
| 92 | 
            +
            end
         | 
| 93 | 
            +
             | 
| 94 | 
            +
            # hash = {
         | 
| 95 | 
            +
            #   'line' => [
         | 
| 96 | 
            +
            #     { :line => 'line', :score => 1 },
         | 
| 97 | 
            +
            #   ]
         | 
| 98 | 
            +
            # }
         | 
| 99 | 
            +
             | 
| 100 | 
            +
            hash = {}
         | 
| 101 | 
            +
            lines = 0
         | 
| 40 102 |  | 
| 41 | 
            -
             | 
| 42 | 
            -
               | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 103 | 
            +
            STDIN.each_line do |line|
         | 
| 104 | 
            +
              line.chomp!
         | 
| 105 | 
            +
              next if line == ''
         | 
| 106 | 
            +
              lines += 1
         | 
| 107 | 
            +
              resolved = false
         | 
| 108 | 
            +
              hash.keys.each do |registered_line|
         | 
| 109 | 
            +
                score = Similarity.new(line, registered_line).score
         | 
| 110 | 
            +
                if score > options[:threshold]
         | 
| 111 | 
            +
                  hash[registered_line].push({
         | 
| 112 | 
            +
                    :line => line,
         | 
| 113 | 
            +
                    :score => score
         | 
| 114 | 
            +
                  })
         | 
| 115 | 
            +
                  resolved = true
         | 
| 45 116 | 
             
                end
         | 
| 46 117 | 
             
              end
         | 
| 118 | 
            +
              next if resolved
         | 
| 119 | 
            +
              hash[line] ||= []
         | 
| 47 120 | 
             
            end
         | 
| 48 121 |  | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
                 | 
| 53 | 
            -
                   | 
| 122 | 
            +
            module Template
         | 
| 123 | 
            +
             | 
| 124 | 
            +
              class Banner < Struct.new(:lines, :groups, :threshold, :datetime)
         | 
| 125 | 
            +
                def to_s
         | 
| 126 | 
            +
                  format(template, to_h)
         | 
| 127 | 
            +
                end
         | 
| 128 | 
            +
                def template
         | 
| 129 | 
            +
                  '
         | 
| 130 | 
            +
            Total Lines Parsed: %<lines>s
         | 
| 131 | 
            +
            Total Groups Generated: %<groups>s
         | 
| 132 | 
            +
            Similarity Theshold at: %<threshold>s
         | 
| 133 | 
            +
            Generated on: %<datetime>s
         | 
| 134 | 
            +
                  '
         | 
| 54 135 | 
             
                end
         | 
| 55 | 
            -
                array
         | 
| 56 136 | 
             
              end
         | 
| 57 137 |  | 
| 58 | 
            -
               | 
| 59 | 
            -
                 | 
| 60 | 
            -
             | 
| 61 | 
            -
                  longest_string = other_string
         | 
| 62 | 
            -
                  shortest_string = self
         | 
| 63 | 
            -
                else
         | 
| 64 | 
            -
                  longest_string = self
         | 
| 65 | 
            -
                  shortest_string = other_string
         | 
| 66 | 
            -
                end
         | 
| 67 | 
            -
                scores = longest_string.to_a.map do |char|
         | 
| 68 | 
            -
                  0
         | 
| 138 | 
            +
              class Group < Struct.new(:number, :percent, :items, :line)
         | 
| 139 | 
            +
                def to_s
         | 
| 140 | 
            +
                  format(template, to_h)
         | 
| 69 141 | 
             
                end
         | 
| 70 | 
            -
                 | 
| 71 | 
            -
                   | 
| 72 | 
            -
                    scores[index] = 1
         | 
| 73 | 
            -
                  end
         | 
| 142 | 
            +
                def template
         | 
| 143 | 
            +
                  'Group %<number>s represents %<percent>s and has %<items>s items similar to: %<line>s'
         | 
| 74 144 | 
             
                end
         | 
| 75 | 
            -
                scores
         | 
| 76 145 | 
             
              end
         | 
| 77 146 |  | 
| 78 | 
            -
               | 
| 79 | 
            -
                 | 
| 147 | 
            +
              class Item < Struct.new(:count, :total, :score, :line)
         | 
| 148 | 
            +
                def to_s
         | 
| 149 | 
            +
                  format(template, to_h)
         | 
| 150 | 
            +
                end
         | 
| 151 | 
            +
                def template
         | 
| 152 | 
            +
                  '  %<count>s/%<total>s %<score>s  %<line>s'
         | 
| 153 | 
            +
                end
         | 
| 80 154 | 
             
              end
         | 
| 81 | 
            -
            end
         | 
| 82 155 |  | 
| 83 | 
            -
            class TargetString
         | 
| 84 | 
            -
              attr_accessor :evaluated
         | 
| 85 | 
            -
              attr_accessor :data
         | 
| 86 | 
            -
              def to_s
         | 
| 87 | 
            -
                data
         | 
| 88 | 
            -
              end
         | 
| 89 156 | 
             
            end
         | 
| 90 157 |  | 
| 91 | 
            -
             | 
| 158 | 
            +
            require 'isna'
         | 
| 92 159 |  | 
| 93 | 
            -
             | 
| 160 | 
            +
            banner = Template::Banner.new
         | 
| 161 | 
            +
            banner.lines = lines.to_s.to_ansi.yellow.to_s
         | 
| 162 | 
            +
            banner.groups = hash.keys.size.to_s.to_ansi.yellow.to_s
         | 
| 163 | 
            +
            banner.threshold = options[:threshold].to_s.to_ansi.yellow.to_s
         | 
| 164 | 
            +
            banner.datetime = Time.now.to_s.to_ansi.yellow.to_s
         | 
| 165 | 
            +
            puts banner.to_s
         | 
| 94 166 |  | 
| 95 | 
            -
             | 
| 96 | 
            -
              next if line.chomp == ''
         | 
| 97 | 
            -
              strings << line.chomp
         | 
| 98 | 
            -
            end
         | 
| 167 | 
            +
            groups = []
         | 
| 99 168 |  | 
| 100 | 
            -
             | 
| 101 | 
            -
               | 
| 169 | 
            +
            hash.each do |category_name, records|
         | 
| 170 | 
            +
              groups.push([category_name, records.size])
         | 
| 102 171 | 
             
            end
         | 
| 103 | 
            -
             | 
| 104 | 
            -
             | 
| 105 | 
            -
             | 
| 106 | 
            -
             | 
| 107 | 
            -
               | 
| 108 | 
            -
              target_string.evaluated = false
         | 
| 109 | 
            -
              target_string.data = string
         | 
| 110 | 
            -
              target_string
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            sorted_groups_by_n_records_asc = groups.sort do |array_a, array_b|
         | 
| 174 | 
            +
              number_of_records_in_a = array_a[1]
         | 
| 175 | 
            +
              number_of_records_in_b = array_b[1]
         | 
| 176 | 
            +
              number_of_records_in_a <=> number_of_records_in_b
         | 
| 111 177 | 
             
            end
         | 
| 112 178 |  | 
| 113 | 
            -
             | 
| 114 | 
            -
               | 
| 115 | 
            -
             | 
| 179 | 
            +
            sorted_groups_by_n_records_asc.reverse.each_with_index do |key, index|
         | 
| 180 | 
            +
              line, records = key[0], hash[key[0]]
         | 
| 181 | 
            +
             | 
| 182 | 
            +
              puts ''
         | 
| 183 | 
            +
             | 
| 184 | 
            +
              group = Template::Group.new
         | 
| 185 | 
            +
              group.percent = ('%2.2f%%' % ((records.size.to_f / lines) * 100)).to_s.to_ansi.red.to_s
         | 
| 186 | 
            +
              group.number = (index + 1).to_s.to_ansi.red.to_s
         | 
| 187 | 
            +
              group.items = records.size.to_s.to_ansi.cyan.to_s
         | 
| 188 | 
            +
              group.line = line.chomp.to_ansi.green.to_s
         | 
| 189 | 
            +
              puts group.to_s
         | 
| 190 | 
            +
             | 
| 191 | 
            +
              sorted_items_in_group = records.sort do |a, b|
         | 
| 192 | 
            +
                a[:score] <=> b[:score]
         | 
| 116 193 | 
             
              end
         | 
| 117 | 
            -
            else
         | 
| 118 | 
            -
              groups = { 0 => strings }
         | 
| 119 | 
            -
            end
         | 
| 120 194 |  | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
                 | 
| 125 | 
            -
                 | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
                  else
         | 
| 129 | 
            -
                    puts "****>>" + string_1.to_s
         | 
| 130 | 
            -
                  end
         | 
| 131 | 
            -
                end
         | 
| 132 | 
            -
                string_1.evaluated = true
         | 
| 133 | 
            -
                group.each do |string_2|
         | 
| 134 | 
            -
                  next if string_2.evaluated
         | 
| 135 | 
            -
                  similarity = string_1.to_s.similarity(string_2.to_s)
         | 
| 136 | 
            -
                  scores = string_1.to_s.scores(string_2.to_s).inspect
         | 
| 137 | 
            -
                  template = "%5.f %s"
         | 
| 138 | 
            -
                  bindings = [similarity, string_2, scores]
         | 
| 139 | 
            -
                  if similarity >= configuration.threshold
         | 
| 140 | 
            -
                    string_2.evaluated = true
         | 
| 141 | 
            -
                    counter += 1
         | 
| 142 | 
            -
                    unless configuration.summary
         | 
| 143 | 
            -
                      puts template % bindings
         | 
| 144 | 
            -
                    end
         | 
| 145 | 
            -
                  end
         | 
| 146 | 
            -
                end
         | 
| 147 | 
            -
                if counter > 0
         | 
| 148 | 
            -
                  puts "#{counter} #{summary_string}"
         | 
| 149 | 
            -
                end
         | 
| 195 | 
            +
              sorted_items_in_group.reverse.each_with_index do |record, index|
         | 
| 196 | 
            +
                item = Template::Item.new
         | 
| 197 | 
            +
                item.count = (index + 1).to_s.rjust(4, ' ').to_ansi.cyan.to_s
         | 
| 198 | 
            +
                item.total = records.size.to_s.ljust(4, ' ').to_ansi.cyan.to_s
         | 
| 199 | 
            +
                item.score = ('%4.2f%%' % (record[:score] * 100)).rjust(7, ' ').to_ansi.green.to_s
         | 
| 200 | 
            +
                item.line = record[:line]
         | 
| 201 | 
            +
                puts item.to_s
         | 
| 150 202 | 
             
              end
         | 
| 203 | 
            +
             | 
| 151 204 | 
             
            end
         | 
| 152 205 |  |