liqrrdmetal 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/liqrrdmetal.rb +2 -0
- data/lib/liqrrdmetal/liqrrdmetal.rb +230 -0
- metadata +52 -0
data/lib/liqrrdmetal.rb
ADDED
@@ -0,0 +1,230 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'strscan'
|
3
|
+
|
4
|
+
# Derived from the LiquidMetal[https://github.com/rmm5t/liquidmetal]
|
5
|
+
# JavaScript library, LiqrrdMetal brings substring scoring to Ruby.
|
6
|
+
# Similar to Quicksilver[http://qsapp.com/], LiqrrdMetal gives users the ability
|
7
|
+
# to quickly find the most relevant items by typing in portions of the string while
|
8
|
+
# seeing the portions of the substring that are being matched.
|
9
|
+
#
|
10
|
+
# To facilitate common sorting, lower scores are _better_;
|
11
|
+
# a score of 0.0 indicates a perfect match, while a score of 1.0 indicates no match.
|
12
|
+
#
|
13
|
+
# == Usage
|
14
|
+
#
|
15
|
+
# Starting with the basics, here is how to find the score for a possible match:
|
16
|
+
#
|
17
|
+
# score = LiqqrdMetal.score( "re", "regards.txt" )
|
18
|
+
# #=> 0.082
|
19
|
+
#
|
20
|
+
# score = LiqqrdMetal.score( "re", "preview.jpg" )
|
21
|
+
# #=> 0.236
|
22
|
+
#
|
23
|
+
# score = LiqqrdMetal.score( "re", "no" )
|
24
|
+
# #=> 1.0
|
25
|
+
#
|
26
|
+
# Want to know which letters were matched?
|
27
|
+
#
|
28
|
+
# score,parts = LiqqrdMetal.score_with_parts( "re", "Preview.jpg" )
|
29
|
+
# puts "%.02f" % score
|
30
|
+
# #=> 0.24
|
31
|
+
#
|
32
|
+
# p parts
|
33
|
+
# #=> [#<struct LiqrrdMetal::MatchPart text="P", match=false>,
|
34
|
+
# #=> #<struct LiqrrdMetal::MatchPart text="re", match=true>,
|
35
|
+
# #=> #<struct LiqrrdMetal::MatchPart text="view.jpg", match=false>]]
|
36
|
+
#
|
37
|
+
# puts parts.join
|
38
|
+
# #=> Preview.jpg
|
39
|
+
#
|
40
|
+
# puts parts.map(&:to_html).join
|
41
|
+
# #=> P<span class='match'>re</span>view.jpg
|
42
|
+
#
|
43
|
+
# require 'json'
|
44
|
+
# puts parts.to_json
|
45
|
+
# #=> [{"t":"P","m":false},{"t":"re","m":true},{"t":"view.jpg","m":false}]
|
46
|
+
#
|
47
|
+
# Sort an array of possible matches by score, removing low-scoring items:
|
48
|
+
#
|
49
|
+
# def best_matches( search, strings )
|
50
|
+
# strings.map{ |s|
|
51
|
+
# [LiqrrdMetal.score(search,s),s]
|
52
|
+
# }.select{ |score,string|
|
53
|
+
# score < 0.3
|
54
|
+
# }.sort.map{ |score,string|
|
55
|
+
# string
|
56
|
+
# }
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
# p best_matches( "re", various_filenames )
|
60
|
+
# #=> ["resizing-text.svg", "PreviewIcon.psd" ]
|
61
|
+
#
|
62
|
+
# Given an array of possible matches, return the matching parts sorted by score:
|
63
|
+
#
|
64
|
+
# hits = LiqrrdMetal.parts_by_score( "re", various_filenames )
|
65
|
+
#
|
66
|
+
# p hits.map(&:join)
|
67
|
+
# #=> ["resizing-text.svg", "PreviewIcon.psd", "prime-finder.rb" ]
|
68
|
+
#
|
69
|
+
# p hits.map{ |parts| parts.map(&:to_ascii).join }
|
70
|
+
# #=> ["_re_sizing-text.svg", "P_re_viewIcon.psd", "p_r_im_e_-finder.rb" ]
|
71
|
+
#
|
72
|
+
# You can also specify the threshold for the parts_by_score method:
|
73
|
+
#
|
74
|
+
# good_hits = LiqrrdMetal.parts_by_score( "re", various_filenames, 0.3 )
|
75
|
+
#
|
76
|
+
#
|
77
|
+
# == License & Contact
|
78
|
+
#
|
79
|
+
# LiqrrdMetal is released under the {MIT License}[http://www.opensource.org/licenses/mit-license.php].
|
80
|
+
#
|
81
|
+
# Copyright (c) 2011, Gavin Kistner (!@phrogz.net)
|
82
|
+
module LiqrrdMetal
|
83
|
+
VERSION = 0.5
|
84
|
+
|
85
|
+
# If you want score_with_parts to be accurate, the MATCH score must be unique
|
86
|
+
MATCH = 0.00 #:nodoc:
|
87
|
+
NEW_WORD = 0.01 #:nodoc:
|
88
|
+
TRAILING_BUT_STARTED = [0.10] #:nodoc:
|
89
|
+
BUFFER = [0.15] #:nodoc:
|
90
|
+
TRAILING = [0.20] #:nodoc:
|
91
|
+
NO_MATCH = [1.00] #:nodoc:
|
92
|
+
|
93
|
+
# Used to identify substrings and whether or not they were matched directly by the search string.
|
94
|
+
class MatchPart
|
95
|
+
# The substring text
|
96
|
+
attr_reader :text
|
97
|
+
# Whether the substring was part of the match
|
98
|
+
attr_reader :match
|
99
|
+
def initialize( text, match=false )
|
100
|
+
@text = text
|
101
|
+
@match = match
|
102
|
+
end
|
103
|
+
|
104
|
+
# Does this part indicate a matched substring?
|
105
|
+
def match?; @match; end
|
106
|
+
|
107
|
+
# Returns the substring (regardless of whether it was matched or not)
|
108
|
+
def to_s; @text; end
|
109
|
+
|
110
|
+
# The text wrapped by the HTML <code>\<span class='match'\>...\</span\></code> (only wrapped if it was a match)
|
111
|
+
# score,parts = LiqqrdMetal.score_with_parts( "re", "Preview.jpg" )
|
112
|
+
# puts parts.map(&:to_html).join
|
113
|
+
# #=> P<span class='match'>re</span>view.jpg
|
114
|
+
def to_html; @match ? "<span class='match'>#{@text}</span>" : @text; end
|
115
|
+
|
116
|
+
# The text wrapped with underscores (only wrapped if it was a match)
|
117
|
+
# score,parts = LiqqrdMetal.score_with_parts( "re", "Preview.jpg" )
|
118
|
+
# puts parts.map(&:to_ascii).join
|
119
|
+
# #=> P_re_view.jpg
|
120
|
+
def to_ascii; @match ? "_#{@text}_" : text; end
|
121
|
+
|
122
|
+
# Get this part as a terse JSON[http://json.org] payload suitable
|
123
|
+
# for transmitting over the wire.
|
124
|
+
# require 'json'
|
125
|
+
# score,parts = LiqqrdMetal.score_with_parts( "re", "Preview.jpg" )
|
126
|
+
# puts parts.to_json
|
127
|
+
# #=> [{"t":"P","m":false},{"t":"re","m":true},{"t":"view.jpg","m":false}]
|
128
|
+
|
129
|
+
def to_json(*a); { t:@text, m:!!@match }.to_json(*a); end
|
130
|
+
end
|
131
|
+
|
132
|
+
module_function
|
133
|
+
|
134
|
+
# Match a single search term agains an array of possible results,
|
135
|
+
# receiving an array sorted by score (descending) of the matched text parts.
|
136
|
+
# By default non-matching entries are not included in the results; set the
|
137
|
+
# `score_threshold` below 0.0 to include them.
|
138
|
+
#
|
139
|
+
# items = ["FooBar","Foo Bar","For the Love of Big Cars"]
|
140
|
+
# hits = LiqrrdMetal.parts_by_score( "foobar", items )
|
141
|
+
# hits.each{ |parts| puts parts.map(&:to_ascii).join }
|
142
|
+
# #=> _FooBar_
|
143
|
+
# #=> _Foo_ _Bar_
|
144
|
+
# #=> _Fo_r the L_o_ve of _B_ig C_ar_s
|
145
|
+
def parts_by_score( search, actuals, score_threshold=1.0 )
|
146
|
+
actuals.map{ |actual|
|
147
|
+
[ actual, *score_with_parts(search,actual) ]
|
148
|
+
}.select{ |actual,score,parts|
|
149
|
+
score < score_threshold
|
150
|
+
}.sort_by{ |actual,score,parts|
|
151
|
+
[ score, actual ]
|
152
|
+
}.map{ |actual,score,parts|
|
153
|
+
parts
|
154
|
+
}
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns an array with the score of the match,
|
158
|
+
# followed by an array of MatchPart instances.
|
159
|
+
#
|
160
|
+
# score, parts = LiqrrdMetal.score_with_parts( "foov", "A Fool in Love" )
|
161
|
+
# puts "%0.2f" % score
|
162
|
+
# #=> 0.46
|
163
|
+
# p parts.map{ |p| p.match? ? "_#{p}_" : p.text }.join
|
164
|
+
# #=> "A _Foo_l in Lo_v_e"
|
165
|
+
# p parts.map(&:to_html).join
|
166
|
+
# #=> "A <span class='match'>Foo</span>l in Lo<span class='match'>v</span>e"
|
167
|
+
def score_with_parts( search, actual )
|
168
|
+
if search.length==0
|
169
|
+
[ TRAILING[0], [MatchPart.new(actual)] ]
|
170
|
+
elsif search.length > actual.length
|
171
|
+
[ NO_MATCH[0], [MatchPart.new(actual)] ]
|
172
|
+
else
|
173
|
+
values = scores( search, actual )
|
174
|
+
score = values.inject{ |sum,score| sum+score } / values.length
|
175
|
+
was_matching,start = nil
|
176
|
+
parts = []
|
177
|
+
values.each_with_index do |score,i|
|
178
|
+
is_match = score==MATCH
|
179
|
+
if is_match != was_matching
|
180
|
+
parts << MatchPart.new(actual[start...i],was_matching) if start
|
181
|
+
was_matching = is_match
|
182
|
+
start = i
|
183
|
+
end
|
184
|
+
end
|
185
|
+
parts << MatchPart.new(actual[start..-1],was_matching) if start
|
186
|
+
[ score, parts ]
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Return a score for matching the search term against the actual text.
|
191
|
+
# A score of <code>1.0</code> indicates no match. A score of <code>0.0</code> is a perfect match.
|
192
|
+
def score( search, actual )
|
193
|
+
if search.length==0
|
194
|
+
TRAILING[0]
|
195
|
+
elsif search.length > actual.length
|
196
|
+
NO_MATCH[0]
|
197
|
+
else
|
198
|
+
values = scores( search, actual )
|
199
|
+
values.inject{ |sum,score| sum+score } / values.length
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
# Return an aray of scores for each letter in the actual text.
|
204
|
+
# Returns a single-value array of <code>[0.0]</code> if no match exists.
|
205
|
+
def scores( search, actual )
|
206
|
+
actual_length = actual.length
|
207
|
+
scores = Array.new(actual_length)
|
208
|
+
|
209
|
+
last = -1
|
210
|
+
started = false
|
211
|
+
scanner = StringScanner.new actual
|
212
|
+
search.chars.each do |c|
|
213
|
+
return NO_MATCH unless fluff = scanner.scan_until(/#{c}/i)
|
214
|
+
pos = scanner.pos-1
|
215
|
+
started = true if pos == 0
|
216
|
+
if /\s/ =~ actual[pos-1]
|
217
|
+
scores[pos-1] = NEW_WORD unless pos==0
|
218
|
+
scores[(last+1)..(pos-1)] = BUFFER*(fluff.length-1)
|
219
|
+
elsif /[A-Z]/ =~ actual[pos]
|
220
|
+
scores[(last+1)..pos] = BUFFER*fluff.length
|
221
|
+
else
|
222
|
+
scores[(last+1)..pos] = NO_MATCH*fluff.length
|
223
|
+
end
|
224
|
+
scores[pos] = MATCH
|
225
|
+
last = pos
|
226
|
+
end
|
227
|
+
scores[ (last+1)...scores.length ] = (started ? TRAILING_BUT_STARTED : TRAILING) * (scores.length-last-1)
|
228
|
+
scores
|
229
|
+
end
|
230
|
+
end
|
metadata
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: liqrrdmetal
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.5'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Gavin Kistner
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-04-19 00:00:00.000000000 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
description: Derived from the LiquidMetal JavaScript library, LiqrrdMetal brings substring
|
16
|
+
scoring to Ruby. Similar to Quicksilver, LiqrrdMetal gives users the ability to
|
17
|
+
quickly find the most relevant items by typing in portions of the string, while
|
18
|
+
seeing the portions of the substring that are being matched.
|
19
|
+
email: gavin@phrogz.net
|
20
|
+
executables: []
|
21
|
+
extensions: []
|
22
|
+
extra_rdoc_files: []
|
23
|
+
files:
|
24
|
+
- lib/liqrrdmetal/liqrrdmetal.rb
|
25
|
+
- lib/liqrrdmetal.rb
|
26
|
+
has_rdoc: true
|
27
|
+
homepage: http://github.com/Phrogz/liqrrdmetal
|
28
|
+
licenses: []
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements:
|
46
|
+
- StringScanner (part of the Ruby Standard Library)
|
47
|
+
rubyforge_project:
|
48
|
+
rubygems_version: 1.5.2
|
49
|
+
signing_key:
|
50
|
+
specification_version: 3
|
51
|
+
summary: Calculate scoring of autocomplete-style substring matches.
|
52
|
+
test_files: []
|