ix-cli 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/ix-string-similarity +161 -108
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9356248fcc4f5fe5c83fedeb01e13166a9764bed70c39fe06132963141f394d9
4
- data.tar.gz: c7676df3c6b0e8427888882bc02fa609e6aa987816f76ef90e656ca6ce196ed7
3
+ metadata.gz: 0c5aadde615cdfa81843168a63e691e29be91832d247cb01c769ac6f0be4cf5f
4
+ data.tar.gz: 178a6b48491c5e5d627390595c60bef6b17c37d150ce02f8ccf59070666b8e2f
5
5
  SHA512:
6
- metadata.gz: 21233826e4b716857231aa89c0abc3263702cf0347b71a2bfc40e3502b79946b98a7b3ae5496bf1975121ef1f3c44952bedd0415d817ac7eee34246ec91007b6
7
- data.tar.gz: fbee1010c9fda5caf7144b8855c6ff8f502413ba3dbee2b81c630340d6c429299878b9b20cc34bc6d9fe2912e3b5e4cee44159f0b31fc947d0467957f7cf6802
6
+ metadata.gz: 34a177748f513b36e85a425f02865b2c7803b1d2c59abce5cd79aae31bf7ff992adffdf3e7d4277c9312c6aa90f209fb662dde35cc2752366de0c6c7c14c90ea
7
+ data.tar.gz: 6b79ddb512d93d28444592dd107076eae1e9c24b0b4da4c9f8c4d8fda5a0be46a69a48d2de0b3afd55bf8f741cc15437137908f736d13c2348581cf43d016c21
@@ -1,152 +1,205 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'optparse'
4
- require 'ostruct'
3
+ class Chunk
4
+ attr_accessor :char_1
5
+ attr_accessor :char_2
6
+ attr_accessor :index_1
7
+ attr_accessor :index_2
8
+
9
+ def initialize(c1, c2, i1, i2)
10
+ @char_1 = c1
11
+ @char_2 = c2
12
+ @index_1 = i1
13
+ @index_2 = i2
14
+ end
5
15
 
6
- configuration = OpenStruct.new
16
+ def to_s
17
+ "#{char_1} #{char_2} #{index_1} #{index_2}"
18
+ end
19
+ end
7
20
 
8
- OptionParser.new do |opts|
21
+ class Similarity
22
+ CHAR_REGEX = /./
9
23
 
10
- opts.banner = "Find similarity in a set of strings."
11
- opts.separator ''
12
- opts.separator "Usage: #{File.basename($0)} [OPTIONS]"
13
- opts.separator ''
24
+ attr_accessor :string_1
25
+ attr_accessor :string_2
14
26
 
15
- configuration.group = false
16
- configuration.threshold = 50
17
- configuration.summary = false
27
+ def initialize(string_1, string_2)
28
+ @string_1 = string_1
29
+ @string_2 = string_2
30
+ end
18
31
 
19
- description = "Group input in batches and process each individually (faster)"
20
- opts.on("-g", "--group", description) do |v|
21
- configuration.group = v
32
+ def tokens
33
+ chunks = []
34
+ string_1.scan(CHAR_REGEX).each_with_index do |char_1, index_1|
35
+ string_2.scan(CHAR_REGEX).each_with_index do |char_2, index_2|
36
+ next if char_1 != char_2
37
+ chunks.push(Chunk.new(char_1, char_2, index_1, index_2))
38
+ end
39
+ end
40
+ chunks
22
41
  end
23
42
 
24
- description = "Limit the number of results by threshold, default is 50"
25
- opts.on("-t", "--threshhold [NUMBER]", OptionParser::DecimalNumeric, description) do |v|
26
- configuration.threshold = v
43
+ def count
44
+ counter = 0
45
+ prev = false
46
+ tokens.each_with_index do |chunk, index|
47
+
48
+ unless prev
49
+ prev = chunk.index_1
50
+ next
51
+ end
52
+
53
+ if prev == (chunk.index_1 - 1)
54
+ counter += 1
55
+ end
56
+
57
+ prev = chunk.index_1
58
+ end
59
+
60
+ counter
27
61
  end
28
62
 
29
- description = "Print a Summary of the groups found"
30
- opts.on("-s", "--summary", description) do |v|
31
- configuration.summary = v
63
+ def score
64
+ desired = (string_1.size + string_2.size) / 2
65
+ size_thresh = ([string_1.size, string_2.size].sort.first.to_f / desired)
66
+ compatibility_thresh = (count.to_f + 1) / string_1.size
67
+ (size_thresh + compatibility_thresh).to_f / 2
32
68
  end
69
+ end
33
70
 
71
+ require 'optparse'
34
72
 
35
- opts.separator ''
73
+ options = {}
74
+ options[:threshold] = 0.8
75
+
76
+ OptionParser.new do |opts|
77
+
78
+ opts.banner = "Usage: #{$0} [OPTIONS]"
79
+
80
+ opts.on('-t', '--threshold [NUMBER]', 'Threshold default value is 0.8.') do |value|
81
+ options[:threshold] = value.to_f
82
+ end
36
83
 
37
84
  end.parse!
38
85
 
39
- # puts configuration.inspect
86
+ required_options = [:threshold]
87
+ required_options.each do |option|
88
+ unless options[option]
89
+ $stderr.puts "Can not run #{option.to_s} was not given."
90
+ exit 1
91
+ end
92
+ end
93
+
94
+ # hash = {
95
+ # 'line' => [
96
+ # { :line => 'line', :score => 1 },
97
+ # ]
98
+ # }
99
+
100
+ hash = {}
101
+ lines = 0
40
102
 
41
- class Array
42
- def product
43
- inject do |cumulative, value|
44
- cumulative += value
103
+ STDIN.each_line do |line|
104
+ line.chomp!
105
+ next if line == ''
106
+ lines += 1
107
+ resolved = false
108
+ hash.keys.each do |registered_line|
109
+ score = Similarity.new(line, registered_line).score
110
+ if score > options[:threshold]
111
+ hash[registered_line].push({
112
+ :line => line,
113
+ :score => score
114
+ })
115
+ resolved = true
45
116
  end
46
117
  end
118
+ next if resolved
119
+ hash[line] ||= []
47
120
  end
48
121
 
49
- class String
50
- def to_a
51
- array = []
52
- size.times do |n|
53
- array << self[n]
122
+ module Template
123
+
124
+ class Banner < Struct.new(:lines, :groups, :threshold, :datetime)
125
+ def to_s
126
+ format(template, to_h)
127
+ end
128
+ def template
129
+ '
130
+ Total Lines Parsed: %<lines>s
131
+ Total Groups Generated: %<groups>s
132
+ Similarity Theshold at: %<threshold>s
133
+ Generated on: %<datetime>s
134
+ '
54
135
  end
55
- array
56
136
  end
57
137
 
58
- def scores(other_string)
59
- longest_string = nil
60
- if other_string.size > self.size
61
- longest_string = other_string
62
- shortest_string = self
63
- else
64
- longest_string = self
65
- shortest_string = other_string
66
- end
67
- scores = longest_string.to_a.map do |char|
68
- 0
138
+ class Group < Struct.new(:number, :percent, :items, :line)
139
+ def to_s
140
+ format(template, to_h)
69
141
  end
70
- shortest_string.size.times do |index|
71
- if shortest_string[index] == longest_string[index]
72
- scores[index] = 1
73
- end
142
+ def template
143
+ 'Group %<number>s represents %<percent>s and has %<items>s items similar to: %<line>s'
74
144
  end
75
- scores
76
145
  end
77
146
 
78
- def similarity(other_string)
79
- scores(other_string).product * 100.0 / size
147
+ class Item < Struct.new(:count, :total, :score, :line)
148
+ def to_s
149
+ format(template, to_h)
150
+ end
151
+ def template
152
+ ' %<count>s/%<total>s %<score>s %<line>s'
153
+ end
80
154
  end
81
- end
82
155
 
83
- class TargetString
84
- attr_accessor :evaluated
85
- attr_accessor :data
86
- def to_s
87
- data
88
- end
89
156
  end
90
157
 
91
- # client
158
+ require 'isna'
92
159
 
93
- strings = []
160
+ banner = Template::Banner.new
161
+ banner.lines = lines.to_s.to_ansi.yellow.to_s
162
+ banner.groups = hash.keys.size.to_s.to_ansi.yellow.to_s
163
+ banner.threshold = options[:threshold].to_s.to_ansi.yellow.to_s
164
+ banner.datetime = Time.now.to_s.to_ansi.yellow.to_s
165
+ puts banner.to_s
94
166
 
95
- STDIN.each_line do |line|
96
- next if line.chomp == ''
97
- strings << line.chomp
98
- end
167
+ groups = []
99
168
 
100
- strings.sort! do |n1, n2|
101
- n1.size <=> n2.size
169
+ hash.each do |category_name, records|
170
+ groups.push([category_name, records.size])
102
171
  end
103
-
104
- strings.reverse!
105
-
106
- strings.map! do |string|
107
- target_string = TargetString.new
108
- target_string.evaluated = false
109
- target_string.data = string
110
- target_string
172
+
173
+ sorted_groups_by_n_records_asc = groups.sort do |array_a, array_b|
174
+ number_of_records_in_a = array_a[1]
175
+ number_of_records_in_b = array_b[1]
176
+ number_of_records_in_a <=> number_of_records_in_b
111
177
  end
112
178
 
113
- if configuration.group
114
- groups = strings.group_by do |string|
115
- string.data.size
179
+ sorted_groups_by_n_records_asc.reverse.each_with_index do |key, index|
180
+ line, records = key[0], hash[key[0]]
181
+
182
+ puts ''
183
+
184
+ group = Template::Group.new
185
+ group.percent = ('%2.2f%%' % ((records.size.to_f / lines) * 100)).to_s.to_ansi.red.to_s
186
+ group.number = (index + 1).to_s.to_ansi.red.to_s
187
+ group.items = records.size.to_s.to_ansi.cyan.to_s
188
+ group.line = line.chomp.to_ansi.green.to_s
189
+ puts group.to_s
190
+
191
+ sorted_items_in_group = records.sort do |a, b|
192
+ a[:score] <=> b[:score]
116
193
  end
117
- else
118
- groups = { 0 => strings }
119
- end
120
194
 
121
- counter = 0
122
- groups.each do |key, group|
123
- group.each do |string_1|
124
- counter = 0
125
- unless string_1.evaluated
126
- if configuration.summary
127
- summary_string = string_1.to_s
128
- else
129
- puts "****>>" + string_1.to_s
130
- end
131
- end
132
- string_1.evaluated = true
133
- group.each do |string_2|
134
- next if string_2.evaluated
135
- similarity = string_1.to_s.similarity(string_2.to_s)
136
- scores = string_1.to_s.scores(string_2.to_s).inspect
137
- template = "%5.f %s"
138
- bindings = [similarity, string_2, scores]
139
- if similarity >= configuration.threshold
140
- string_2.evaluated = true
141
- counter += 1
142
- unless configuration.summary
143
- puts template % bindings
144
- end
145
- end
146
- end
147
- if counter > 0
148
- puts "#{counter} #{summary_string}"
149
- end
195
+ sorted_items_in_group.reverse.each_with_index do |record, index|
196
+ item = Template::Item.new
197
+ item.count = (index + 1).to_s.rjust(4, ' ').to_ansi.cyan.to_s
198
+ item.total = records.size.to_s.ljust(4, ' ').to_ansi.cyan.to_s
199
+ item.score = ('%4.2f%%' % (record[:score] * 100)).rjust(7, ' ').to_ansi.green.to_s
200
+ item.line = record[:line]
201
+ puts item.to_s
150
202
  end
203
+
151
204
  end
152
205
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ix-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuyoshi Tlacaelel