liqrrdmetal 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/liqrrdmetal.rb +2 -0
- data/lib/liqrrdmetal/liqrrdmetal.rb +230 -0
- metadata +52 -0
data/lib/liqrrdmetal.rb
ADDED
@@ -0,0 +1,230 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'strscan'
|
3
|
+
|
4
|
+
# Derived from the LiquidMetal[https://github.com/rmm5t/liquidmetal]
|
5
|
+
# JavaScript library, LiqrrdMetal brings substring scoring to Ruby.
|
6
|
+
# Similar to Quicksilver[http://qsapp.com/], LiqrrdMetal gives users the ability
|
7
|
+
# to quickly find the most relevant items by typing in portions of the string while
|
8
|
+
# seeing the portions of the substring that are being matched.
|
9
|
+
#
|
10
|
+
# To facilitate common sorting, lower scores are _better_;
|
11
|
+
# a score of 0.0 indicates a perfect match, while a score of 1.0 indicates no match.
|
12
|
+
#
|
13
|
+
# == Usage
|
14
|
+
#
|
15
|
+
# Starting with the basics, here is how to find the score for a possible match:
|
16
|
+
#
|
17
|
+
# score = LiqqrdMetal.score( "re", "regards.txt" )
|
18
|
+
# #=> 0.082
|
19
|
+
#
|
20
|
+
# score = LiqqrdMetal.score( "re", "preview.jpg" )
|
21
|
+
# #=> 0.236
|
22
|
+
#
|
23
|
+
# score = LiqqrdMetal.score( "re", "no" )
|
24
|
+
# #=> 1.0
|
25
|
+
#
|
26
|
+
# Want to know which letters were matched?
|
27
|
+
#
|
28
|
+
# score,parts = LiqqrdMetal.score_with_parts( "re", "Preview.jpg" )
|
29
|
+
# puts "%.02f" % score
|
30
|
+
# #=> 0.24
|
31
|
+
#
|
32
|
+
# p parts
|
33
|
+
# #=> [#<struct LiqrrdMetal::MatchPart text="P", match=false>,
|
34
|
+
# #=> #<struct LiqrrdMetal::MatchPart text="re", match=true>,
|
35
|
+
# #=> #<struct LiqrrdMetal::MatchPart text="view.jpg", match=false>]]
|
36
|
+
#
|
37
|
+
# puts parts.join
|
38
|
+
# #=> Preview.jpg
|
39
|
+
#
|
40
|
+
# puts parts.map(&:to_html).join
|
41
|
+
# #=> P<span class='match'>re</span>view.jpg
|
42
|
+
#
|
43
|
+
# require 'json'
|
44
|
+
# puts parts.to_json
|
45
|
+
# #=> [{"t":"P","m":false},{"t":"re","m":true},{"t":"view.jpg","m":false}]
|
46
|
+
#
|
47
|
+
# Sort an array of possible matches by score, removing low-scoring items:
|
48
|
+
#
|
49
|
+
# def best_matches( search, strings )
|
50
|
+
# strings.map{ |s|
|
51
|
+
# [LiqrrdMetal.score(search,s),s]
|
52
|
+
# }.select{ |score,string|
|
53
|
+
# score < 0.3
|
54
|
+
# }.sort.map{ |score,string|
|
55
|
+
# string
|
56
|
+
# }
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
# p best_matches( "re", various_filenames )
|
60
|
+
# #=> ["resizing-text.svg", "PreviewIcon.psd" ]
|
61
|
+
#
|
62
|
+
# Given an array of possible matches, return the matching parts sorted by score:
|
63
|
+
#
|
64
|
+
# hits = LiqrrdMetal.parts_by_score( "re", various_filenames )
|
65
|
+
#
|
66
|
+
# p hits.map(&:join)
|
67
|
+
# #=> ["resizing-text.svg", "PreviewIcon.psd", "prime-finder.rb" ]
|
68
|
+
#
|
69
|
+
# p hits.map{ |parts| parts.map(&:to_ascii).join }
|
70
|
+
# #=> ["_re_sizing-text.svg", "P_re_viewIcon.psd", "p_r_im_e_-finder.rb" ]
|
71
|
+
#
|
72
|
+
# You can also specify the threshold for the parts_by_score method:
|
73
|
+
#
|
74
|
+
# good_hits = LiqrrdMetal.parts_by_score( "re", various_filenames, 0.3 )
|
75
|
+
#
|
76
|
+
#
|
77
|
+
# == License & Contact
|
78
|
+
#
|
79
|
+
# LiqrrdMetal is released under the {MIT License}[http://www.opensource.org/licenses/mit-license.php].
|
80
|
+
#
|
81
|
+
# Copyright (c) 2011, Gavin Kistner (!@phrogz.net)
|
82
|
+
module LiqrrdMetal
|
83
|
+
VERSION = 0.5
|
84
|
+
|
85
|
+
# If you want score_with_parts to be accurate, the MATCH score must be unique
|
86
|
+
MATCH = 0.00 #:nodoc:
|
87
|
+
NEW_WORD = 0.01 #:nodoc:
|
88
|
+
TRAILING_BUT_STARTED = [0.10] #:nodoc:
|
89
|
+
BUFFER = [0.15] #:nodoc:
|
90
|
+
TRAILING = [0.20] #:nodoc:
|
91
|
+
NO_MATCH = [1.00] #:nodoc:
|
92
|
+
|
93
|
+
# Used to identify substrings and whether or not they were matched directly by the search string.
|
94
|
+
class MatchPart
|
95
|
+
# The substring text
|
96
|
+
attr_reader :text
|
97
|
+
# Whether the substring was part of the match
|
98
|
+
attr_reader :match
|
99
|
+
def initialize( text, match=false )
|
100
|
+
@text = text
|
101
|
+
@match = match
|
102
|
+
end
|
103
|
+
|
104
|
+
# Does this part indicate a matched substring?
|
105
|
+
def match?; @match; end
|
106
|
+
|
107
|
+
# Returns the substring (regardless of whether it was matched or not)
|
108
|
+
def to_s; @text; end
|
109
|
+
|
110
|
+
# The text wrapped by the HTML <code>\<span class='match'\>...\</span\></code> (only wrapped if it was a match)
|
111
|
+
# score,parts = LiqqrdMetal.score_with_parts( "re", "Preview.jpg" )
|
112
|
+
# puts parts.map(&:to_html).join
|
113
|
+
# #=> P<span class='match'>re</span>view.jpg
|
114
|
+
def to_html; @match ? "<span class='match'>#{@text}</span>" : @text; end
|
115
|
+
|
116
|
+
# The text wrapped with underscores (only wrapped if it was a match)
|
117
|
+
# score,parts = LiqqrdMetal.score_with_parts( "re", "Preview.jpg" )
|
118
|
+
# puts parts.map(&:to_ascii).join
|
119
|
+
# #=> P_re_view.jpg
|
120
|
+
def to_ascii; @match ? "_#{@text}_" : text; end
|
121
|
+
|
122
|
+
# Get this part as a terse JSON[http://json.org] payload suitable
|
123
|
+
# for transmitting over the wire.
|
124
|
+
# require 'json'
|
125
|
+
# score,parts = LiqqrdMetal.score_with_parts( "re", "Preview.jpg" )
|
126
|
+
# puts parts.to_json
|
127
|
+
# #=> [{"t":"P","m":false},{"t":"re","m":true},{"t":"view.jpg","m":false}]
|
128
|
+
|
129
|
+
def to_json(*a); { t:@text, m:!!@match }.to_json(*a); end
|
130
|
+
end
|
131
|
+
|
132
|
+
module_function
|
133
|
+
|
134
|
+
# Match a single search term agains an array of possible results,
|
135
|
+
# receiving an array sorted by score (descending) of the matched text parts.
|
136
|
+
# By default non-matching entries are not included in the results; set the
|
137
|
+
# `score_threshold` below 0.0 to include them.
|
138
|
+
#
|
139
|
+
# items = ["FooBar","Foo Bar","For the Love of Big Cars"]
|
140
|
+
# hits = LiqrrdMetal.parts_by_score( "foobar", items )
|
141
|
+
# hits.each{ |parts| puts parts.map(&:to_ascii).join }
|
142
|
+
# #=> _FooBar_
|
143
|
+
# #=> _Foo_ _Bar_
|
144
|
+
# #=> _Fo_r the L_o_ve of _B_ig C_ar_s
|
145
|
+
def parts_by_score( search, actuals, score_threshold=1.0 )
|
146
|
+
actuals.map{ |actual|
|
147
|
+
[ actual, *score_with_parts(search,actual) ]
|
148
|
+
}.select{ |actual,score,parts|
|
149
|
+
score < score_threshold
|
150
|
+
}.sort_by{ |actual,score,parts|
|
151
|
+
[ score, actual ]
|
152
|
+
}.map{ |actual,score,parts|
|
153
|
+
parts
|
154
|
+
}
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns an array with the score of the match,
|
158
|
+
# followed by an array of MatchPart instances.
|
159
|
+
#
|
160
|
+
# score, parts = LiqrrdMetal.score_with_parts( "foov", "A Fool in Love" )
|
161
|
+
# puts "%0.2f" % score
|
162
|
+
# #=> 0.46
|
163
|
+
# p parts.map{ |p| p.match? ? "_#{p}_" : p.text }.join
|
164
|
+
# #=> "A _Foo_l in Lo_v_e"
|
165
|
+
# p parts.map(&:to_html).join
|
166
|
+
# #=> "A <span class='match'>Foo</span>l in Lo<span class='match'>v</span>e"
|
167
|
+
def score_with_parts( search, actual )
|
168
|
+
if search.length==0
|
169
|
+
[ TRAILING[0], [MatchPart.new(actual)] ]
|
170
|
+
elsif search.length > actual.length
|
171
|
+
[ NO_MATCH[0], [MatchPart.new(actual)] ]
|
172
|
+
else
|
173
|
+
values = scores( search, actual )
|
174
|
+
score = values.inject{ |sum,score| sum+score } / values.length
|
175
|
+
was_matching,start = nil
|
176
|
+
parts = []
|
177
|
+
values.each_with_index do |score,i|
|
178
|
+
is_match = score==MATCH
|
179
|
+
if is_match != was_matching
|
180
|
+
parts << MatchPart.new(actual[start...i],was_matching) if start
|
181
|
+
was_matching = is_match
|
182
|
+
start = i
|
183
|
+
end
|
184
|
+
end
|
185
|
+
parts << MatchPart.new(actual[start..-1],was_matching) if start
|
186
|
+
[ score, parts ]
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Return a score for matching the search term against the actual text.
|
191
|
+
# A score of <code>1.0</code> indicates no match. A score of <code>0.0</code> is a perfect match.
|
192
|
+
def score( search, actual )
|
193
|
+
if search.length==0
|
194
|
+
TRAILING[0]
|
195
|
+
elsif search.length > actual.length
|
196
|
+
NO_MATCH[0]
|
197
|
+
else
|
198
|
+
values = scores( search, actual )
|
199
|
+
values.inject{ |sum,score| sum+score } / values.length
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
# Return an aray of scores for each letter in the actual text.
|
204
|
+
# Returns a single-value array of <code>[0.0]</code> if no match exists.
|
205
|
+
def scores( search, actual )
|
206
|
+
actual_length = actual.length
|
207
|
+
scores = Array.new(actual_length)
|
208
|
+
|
209
|
+
last = -1
|
210
|
+
started = false
|
211
|
+
scanner = StringScanner.new actual
|
212
|
+
search.chars.each do |c|
|
213
|
+
return NO_MATCH unless fluff = scanner.scan_until(/#{c}/i)
|
214
|
+
pos = scanner.pos-1
|
215
|
+
started = true if pos == 0
|
216
|
+
if /\s/ =~ actual[pos-1]
|
217
|
+
scores[pos-1] = NEW_WORD unless pos==0
|
218
|
+
scores[(last+1)..(pos-1)] = BUFFER*(fluff.length-1)
|
219
|
+
elsif /[A-Z]/ =~ actual[pos]
|
220
|
+
scores[(last+1)..pos] = BUFFER*fluff.length
|
221
|
+
else
|
222
|
+
scores[(last+1)..pos] = NO_MATCH*fluff.length
|
223
|
+
end
|
224
|
+
scores[pos] = MATCH
|
225
|
+
last = pos
|
226
|
+
end
|
227
|
+
scores[ (last+1)...scores.length ] = (started ? TRAILING_BUT_STARTED : TRAILING) * (scores.length-last-1)
|
228
|
+
scores
|
229
|
+
end
|
230
|
+
end
|
metadata
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: liqrrdmetal
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.5'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Gavin Kistner
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-04-19 00:00:00.000000000 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
description: Derived from the LiquidMetal JavaScript library, LiqrrdMetal brings substring
|
16
|
+
scoring to Ruby. Similar to Quicksilver, LiqrrdMetal gives users the ability to
|
17
|
+
quickly find the most relevant items by typing in portions of the string, while
|
18
|
+
seeing the portions of the substring that are being matched.
|
19
|
+
email: gavin@phrogz.net
|
20
|
+
executables: []
|
21
|
+
extensions: []
|
22
|
+
extra_rdoc_files: []
|
23
|
+
files:
|
24
|
+
- lib/liqrrdmetal/liqrrdmetal.rb
|
25
|
+
- lib/liqrrdmetal.rb
|
26
|
+
has_rdoc: true
|
27
|
+
homepage: http://github.com/Phrogz/liqrrdmetal
|
28
|
+
licenses: []
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements:
|
46
|
+
- StringScanner (part of the Ruby Standard Library)
|
47
|
+
rubyforge_project:
|
48
|
+
rubygems_version: 1.5.2
|
49
|
+
signing_key:
|
50
|
+
specification_version: 3
|
51
|
+
summary: Calculate scoring of autocomplete-style substring matches.
|
52
|
+
test_files: []
|