searchlink 2.3.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/searchlink +84 -0
- data/lib/searchlink/array.rb +7 -0
- data/lib/searchlink/config.rb +230 -0
- data/lib/searchlink/curl/html.rb +482 -0
- data/lib/searchlink/curl/json.rb +90 -0
- data/lib/searchlink/curl.rb +7 -0
- data/lib/searchlink/help.rb +103 -0
- data/lib/searchlink/output.rb +270 -0
- data/lib/searchlink/parse.rb +668 -0
- data/lib/searchlink/plist.rb +213 -0
- data/lib/searchlink/search.rb +70 -0
- data/lib/searchlink/searches/amazon.rb +25 -0
- data/lib/searchlink/searches/applemusic.rb +123 -0
- data/lib/searchlink/searches/bitly.rb +50 -0
- data/lib/searchlink/searches/definition.rb +67 -0
- data/lib/searchlink/searches/duckduckgo.rb +167 -0
- data/lib/searchlink/searches/github.rb +245 -0
- data/lib/searchlink/searches/google.rb +67 -0
- data/lib/searchlink/searches/helpers/chromium.rb +318 -0
- data/lib/searchlink/searches/helpers/firefox.rb +135 -0
- data/lib/searchlink/searches/helpers/safari.rb +133 -0
- data/lib/searchlink/searches/history.rb +166 -0
- data/lib/searchlink/searches/hook.rb +77 -0
- data/lib/searchlink/searches/itunes.rb +97 -0
- data/lib/searchlink/searches/lastfm.rb +41 -0
- data/lib/searchlink/searches/lyrics.rb +91 -0
- data/lib/searchlink/searches/pinboard.rb +183 -0
- data/lib/searchlink/searches/social.rb +105 -0
- data/lib/searchlink/searches/software.rb +27 -0
- data/lib/searchlink/searches/spelling.rb +59 -0
- data/lib/searchlink/searches/spotlight.rb +28 -0
- data/lib/searchlink/searches/stackoverflow.rb +31 -0
- data/lib/searchlink/searches/tmdb.rb +52 -0
- data/lib/searchlink/searches/twitter.rb +46 -0
- data/lib/searchlink/searches/wikipedia.rb +33 -0
- data/lib/searchlink/searches/youtube.rb +48 -0
- data/lib/searchlink/searches.rb +194 -0
- data/lib/searchlink/semver.rb +140 -0
- data/lib/searchlink/string.rb +469 -0
- data/lib/searchlink/url.rb +153 -0
- data/lib/searchlink/util.rb +87 -0
- data/lib/searchlink/version.rb +93 -0
- data/lib/searchlink/which.rb +175 -0
- data/lib/searchlink.rb +66 -0
- data/lib/tokens.rb +3 -0
- metadata +299 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
module SL
|
|
2
|
+
# Semantic versioning library
|
|
3
|
+
class SemVer
|
|
4
|
+
attr_accessor :maj, :min, :patch, :pre
|
|
5
|
+
|
|
6
|
+
# Initialize a Semantic Version object
|
|
7
|
+
#
|
|
8
|
+
# @param version_string [String] a semantic version number
|
|
9
|
+
#
|
|
10
|
+
# @return [SemVer] SemVer object
|
|
11
|
+
#
|
|
12
|
+
def initialize(version_string)
|
|
13
|
+
raise "Invalid semantic version number: #{version_string}" unless version_string.valid_version?
|
|
14
|
+
|
|
15
|
+
@maj, @min, @patch = version_string.split(/\./)
|
|
16
|
+
@pre = nil
|
|
17
|
+
if @patch =~ /(-?[^0-9]+\d*)$/
|
|
18
|
+
@pre = Regexp.last_match(1).sub(/^-/, '')
|
|
19
|
+
@patch = @patch.sub(/(-?[^0-9]+\d*)$/, '')
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
@maj = @maj.to_i
|
|
23
|
+
@min = @min.to_i
|
|
24
|
+
@patch = @patch.to_i
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
##
|
|
28
|
+
## SemVer String helpers
|
|
29
|
+
##
|
|
30
|
+
class ::String
|
|
31
|
+
# Test if given string is a valid semantic version
|
|
32
|
+
# number with major, minor and patch (and optionally
|
|
33
|
+
# pre)
|
|
34
|
+
#
|
|
35
|
+
# @return [Boolean] string is semantic version number
|
|
36
|
+
#
|
|
37
|
+
def valid_version?
|
|
38
|
+
pattern = /^\d+\.\d+\.\d+(-?([^0-9]+\d*))?$/
|
|
39
|
+
self =~ pattern ? true : false
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
##
|
|
44
|
+
## Test if self is older than a semantic version number
|
|
45
|
+
##
|
|
46
|
+
## @param other [String,SemVer] The semantic version number or SemVer object
|
|
47
|
+
##
|
|
48
|
+
## @return [Boolean] true if semver is older
|
|
49
|
+
##
|
|
50
|
+
def older_than(other)
|
|
51
|
+
latest = other.is_a?(SemVer) ? other : SemVer.new(other)
|
|
52
|
+
|
|
53
|
+
return false if latest.equal?(self)
|
|
54
|
+
|
|
55
|
+
if @maj > latest.maj
|
|
56
|
+
false
|
|
57
|
+
elsif @maj < latest.maj
|
|
58
|
+
true
|
|
59
|
+
elsif @min > latest.min
|
|
60
|
+
false
|
|
61
|
+
elsif @min < latest.min
|
|
62
|
+
true
|
|
63
|
+
elsif @patch > latest.patch
|
|
64
|
+
false
|
|
65
|
+
elsif @patch < latest.patch
|
|
66
|
+
true
|
|
67
|
+
else
|
|
68
|
+
return false if @pre.nil? && latest.pre.nil?
|
|
69
|
+
|
|
70
|
+
return true if @pre.nil? && !latest.pre.nil?
|
|
71
|
+
|
|
72
|
+
return false if !@pre.nil? && latest.pre.nil?
|
|
73
|
+
|
|
74
|
+
@pre < latest.pre
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
##
|
|
79
|
+
## @see #older_than
|
|
80
|
+
##
|
|
81
|
+
def <(other)
|
|
82
|
+
older_than(other)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
##
|
|
86
|
+
## Test if self is newer than a semantic version number
|
|
87
|
+
##
|
|
88
|
+
## @param other [String,SemVer] The semantic version
|
|
89
|
+
## number or SemVer object
|
|
90
|
+
##
|
|
91
|
+
## @return [Boolean] true if semver is newer
|
|
92
|
+
##
|
|
93
|
+
def newer_than(other)
|
|
94
|
+
v = other.is_a?(SemVer) ? other : SemVer.new(other)
|
|
95
|
+
v.older_than(self) && !v.equal?(self)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
##
|
|
99
|
+
## @see #newer_than
|
|
100
|
+
##
|
|
101
|
+
def >(other)
|
|
102
|
+
newer_than(other)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
##
|
|
106
|
+
## Test if self is equal to other
|
|
107
|
+
##
|
|
108
|
+
## @param other [String,SemVer] The other semantic version number
|
|
109
|
+
##
|
|
110
|
+
## @return [Boolean] values are equal
|
|
111
|
+
##
|
|
112
|
+
def equal?(other)
|
|
113
|
+
v = other.is_a?(SemVer) ? other : SemVer.new(other)
|
|
114
|
+
|
|
115
|
+
v.maj == @maj && v.min == @min && v.patch == @patch && v.pre == @pre
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
##
|
|
119
|
+
## @see #equal?
|
|
120
|
+
##
|
|
121
|
+
def ==(other)
|
|
122
|
+
equal?(other)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def inspect
|
|
126
|
+
{
|
|
127
|
+
object_id: object_id,
|
|
128
|
+
maj: @maj,
|
|
129
|
+
min: @min,
|
|
130
|
+
patch: @patch,
|
|
131
|
+
pre: @pre
|
|
132
|
+
}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def to_s
|
|
136
|
+
ver = [@maj, @min, @patch].join('.')
|
|
137
|
+
@pre.nil? ? ver : "#{ver}-#{@pre}"
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
# String helpers
|
|
2
|
+
class ::String
|
|
3
|
+
# URL Encode string
|
|
4
|
+
#
|
|
5
|
+
# @return [String] url encoded string
|
|
6
|
+
#
|
|
7
|
+
def url_encode
|
|
8
|
+
ERB::Util.url_encode(gsub(/%22/, '"'))
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def url_decode
|
|
12
|
+
CGI.unescape(self)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
##
|
|
16
|
+
## Adds ?: to any parentheticals in a regular expression
|
|
17
|
+
## to avoid match groups
|
|
18
|
+
##
|
|
19
|
+
## @return [String] modified regular expression
|
|
20
|
+
##
|
|
21
|
+
def normalize_trigger
|
|
22
|
+
gsub(/\((?!\?:)/, '(?:').gsub(/(^(\^|\\A)|(\$|\\Z)$)/, '').downcase
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
##
|
|
26
|
+
## Generate a spacer based on character widths for help dialog display
|
|
27
|
+
##
|
|
28
|
+
## @return [String] string containing tabs
|
|
29
|
+
##
|
|
30
|
+
def spacer
|
|
31
|
+
len = length
|
|
32
|
+
scan(/[mwv]/).each { len += 1 }
|
|
33
|
+
scan(/t/).each { len -= 1 }
|
|
34
|
+
case len
|
|
35
|
+
when 0..3
|
|
36
|
+
"\t\t"
|
|
37
|
+
when 4..12
|
|
38
|
+
" \t"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# parse command line flags into long options
|
|
43
|
+
def parse_flags
|
|
44
|
+
gsub(/(\+\+|--)([dirtvs]+)\b/) do
|
|
45
|
+
m = Regexp.last_match
|
|
46
|
+
bool = m[1] == '++' ? '' : 'no-'
|
|
47
|
+
output = ' '
|
|
48
|
+
m[2].split('').each do |arg|
|
|
49
|
+
output += case arg
|
|
50
|
+
when 'd'
|
|
51
|
+
"--#{bool}debug "
|
|
52
|
+
when 'i'
|
|
53
|
+
"--#{bool}inline "
|
|
54
|
+
when 'r'
|
|
55
|
+
"--#{bool}prefix_random "
|
|
56
|
+
when 't'
|
|
57
|
+
"--#{bool}include_titles "
|
|
58
|
+
when 'v'
|
|
59
|
+
"--#{bool}validate_links "
|
|
60
|
+
when 's'
|
|
61
|
+
"--#{bool}remove_seo "
|
|
62
|
+
else
|
|
63
|
+
''
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
output
|
|
68
|
+
end.gsub(/ +/, ' ')
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def parse_flags!
|
|
72
|
+
replace parse_flags
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
##
|
|
76
|
+
## Convert file-myfile-rb to myfile.rb
|
|
77
|
+
##
|
|
78
|
+
## @return { description_of_the_return_value }
|
|
79
|
+
##
|
|
80
|
+
def fix_gist_file
|
|
81
|
+
sub(/^file-/, '').sub(/-([^\-]+)$/, '.\1')
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Turn a string into a slug, removing spaces and
|
|
85
|
+
# non-alphanumeric characters
|
|
86
|
+
#
|
|
87
|
+
# @return [String] slugified string
|
|
88
|
+
#
|
|
89
|
+
def slugify
|
|
90
|
+
downcase.gsub(/[^a-z0-9_]/i, '-').gsub(/-+/, '-').sub(/-?$/, '')
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Destructive slugify
|
|
94
|
+
# @see #slugify
|
|
95
|
+
def slugify!
|
|
96
|
+
replace slugify
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
##
|
|
100
|
+
## Remove newlines, escape quotes, and remove Google
|
|
101
|
+
## Analytics strings
|
|
102
|
+
##
|
|
103
|
+
## @return [String] cleaned URL/String
|
|
104
|
+
##
|
|
105
|
+
def clean
|
|
106
|
+
gsub(/\n+/, ' ')
|
|
107
|
+
.gsub(/"/, '"')
|
|
108
|
+
.gsub(/\|/, '-')
|
|
109
|
+
.gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
|
|
110
|
+
.sub(/\?&/, '').strip
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# convert itunes to apple music link
|
|
114
|
+
#
|
|
115
|
+
# @return [String] apple music link
|
|
116
|
+
def to_am
|
|
117
|
+
input = dup
|
|
118
|
+
input.sub!(%r{/itunes\.apple\.com}, 'geo.itunes.apple.com')
|
|
119
|
+
append = input =~ %r{\?[^/]+=} ? '&app=music' : '?app=music'
|
|
120
|
+
input + append
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
##
|
|
124
|
+
## Remove the protocol from a URL
|
|
125
|
+
##
|
|
126
|
+
## @return [String] just hostname and path of URL
|
|
127
|
+
##
|
|
128
|
+
def remove_protocol
|
|
129
|
+
sub(%r{^(https?|s?ftp|file)://}, '')
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
##
|
|
133
|
+
## Return just the path of a URL
|
|
134
|
+
##
|
|
135
|
+
## @return [String] The path.
|
|
136
|
+
##
|
|
137
|
+
def url_path
|
|
138
|
+
URI.parse(self).path
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Extract the most relevant portions from a URL path
|
|
142
|
+
#
|
|
143
|
+
# @return [Array] array of relevant path elements
|
|
144
|
+
#
|
|
145
|
+
def path_elements
|
|
146
|
+
path = url_path
|
|
147
|
+
# force trailing slash
|
|
148
|
+
path.sub!(%r{/?$}, '/')
|
|
149
|
+
# remove last path element
|
|
150
|
+
path.sub!(%r{/[^/]+[.\-][^/]+/$}, '')
|
|
151
|
+
# remove starting/ending slashes
|
|
152
|
+
path.gsub!(%r{(^/|/$)}, '')
|
|
153
|
+
# split at slashes, delete sections that are shorter
|
|
154
|
+
# than 5 characters or only consist of numbers
|
|
155
|
+
path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
##
|
|
159
|
+
## Destructive punctuation close
|
|
160
|
+
##
|
|
161
|
+
## @see #close_punctuation
|
|
162
|
+
##
|
|
163
|
+
def close_punctuation!
|
|
164
|
+
replace close_punctuation
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
##
|
|
168
|
+
## Complete incomplete punctuation pairs
|
|
169
|
+
##
|
|
170
|
+
## @return [String] string with all punctuation
|
|
171
|
+
## properly paired
|
|
172
|
+
##
|
|
173
|
+
def close_punctuation
|
|
174
|
+
return self unless self =~ /[“‘\[(<]/
|
|
175
|
+
|
|
176
|
+
words = split(/\s+/)
|
|
177
|
+
|
|
178
|
+
punct_chars = {
|
|
179
|
+
'“' => '”',
|
|
180
|
+
'‘' => '’',
|
|
181
|
+
'[' => ']',
|
|
182
|
+
'(' => ')',
|
|
183
|
+
'<' => '>'
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
left_punct = []
|
|
187
|
+
|
|
188
|
+
words.each do |w|
|
|
189
|
+
punct_chars.each do |k, v|
|
|
190
|
+
left_punct.push(k) if w =~ /#{Regexp.escape(k)}/
|
|
191
|
+
left_punct.delete_at(left_punct.rindex(k)) if w =~ /#{Regexp.escape(v)}/
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
tail = ''
|
|
196
|
+
left_punct.reverse.each { |c| tail += punct_chars[c] }
|
|
197
|
+
|
|
198
|
+
gsub(/[^a-z)\]’”.…]+$/i, '...').strip + tail
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
##
|
|
202
|
+
## Destructively remove SEO elements from a title
|
|
203
|
+
##
|
|
204
|
+
## @param url The url of the page from which the
|
|
205
|
+
## title came
|
|
206
|
+
##
|
|
207
|
+
## @see #remove_seo
|
|
208
|
+
##
|
|
209
|
+
def remove_seo!(url)
|
|
210
|
+
replace remove_seo(url)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
##
|
|
214
|
+
## Remove SEO elements from a title
|
|
215
|
+
##
|
|
216
|
+
## @param url The url of the page from which the title came
|
|
217
|
+
##
|
|
218
|
+
## @return [String] cleaned title
|
|
219
|
+
##
|
|
220
|
+
def remove_seo(url)
|
|
221
|
+
title = dup
|
|
222
|
+
url = URI.parse(url)
|
|
223
|
+
host = url.hostname
|
|
224
|
+
unless host
|
|
225
|
+
return self unless SL.config['debug']
|
|
226
|
+
|
|
227
|
+
SL.add_error('Invalid URL', "Could not remove SEO for #{url}")
|
|
228
|
+
return self
|
|
229
|
+
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
path = url.path
|
|
233
|
+
root_page = path =~ %r{^/?$} ? true : false
|
|
234
|
+
|
|
235
|
+
title.gsub!(/\s*(–|—)\s*/, ' - ')
|
|
236
|
+
title.gsub!(/&[lr]dquo;/, '"')
|
|
237
|
+
title.gsub!(/&[lr]dquo;/, "'")
|
|
238
|
+
title.gsub!(/–/, ' — ')
|
|
239
|
+
title = CGI.unescapeHTML(title)
|
|
240
|
+
title.gsub!(/ +/, ' ')
|
|
241
|
+
|
|
242
|
+
seo_title_separators = %w[| » « — – - · :]
|
|
243
|
+
|
|
244
|
+
begin
|
|
245
|
+
re_parts = []
|
|
246
|
+
|
|
247
|
+
host_parts = host.sub(/(?:www\.)?(.*?)\.[^.]+$/, '\1').split(/\./).delete_if { |p| p.length < 3 }
|
|
248
|
+
h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join('.?') }.join('|') : ''
|
|
249
|
+
re_parts.push(h_re) unless h_re.empty?
|
|
250
|
+
|
|
251
|
+
# p_re = path.path_elements.map{|seg| seg.downcase.split(//).join('.?') }.join('|')
|
|
252
|
+
# re_parts.push(p_re) if p_re.length > 0
|
|
253
|
+
|
|
254
|
+
site_re = "(#{re_parts.join('|')})"
|
|
255
|
+
|
|
256
|
+
dead_switch = 0
|
|
257
|
+
|
|
258
|
+
while title.downcase.gsub(/[^a-z]/i, '') =~ /#{site_re}/i
|
|
259
|
+
|
|
260
|
+
break if dead_switch > 5
|
|
261
|
+
|
|
262
|
+
seo_title_separators.each_with_index do |sep, i|
|
|
263
|
+
parts = title.split(/ *#{Regexp.escape(sep)} +/)
|
|
264
|
+
|
|
265
|
+
next if parts.length == 1
|
|
266
|
+
|
|
267
|
+
remaining_separators = seo_title_separators[i..].map { |s| Regexp.escape(s) }.join('')
|
|
268
|
+
seps = Regexp.new("^[^#{remaining_separators}]+$")
|
|
269
|
+
|
|
270
|
+
longest = parts.longest_element.strip
|
|
271
|
+
|
|
272
|
+
unless parts.empty?
|
|
273
|
+
parts.delete_if do |pt|
|
|
274
|
+
compressed = pt.strip.downcase.gsub(/[^a-z]/i, '')
|
|
275
|
+
compressed =~ /#{site_re}/ && pt =~ seps ? !root_page : false
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
title = if parts.empty?
|
|
280
|
+
longest
|
|
281
|
+
elsif parts.length < 2
|
|
282
|
+
parts.join(sep)
|
|
283
|
+
elsif parts.length > 2
|
|
284
|
+
parts.longest_element.strip
|
|
285
|
+
else
|
|
286
|
+
parts.join(sep)
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
dead_switch += 1
|
|
290
|
+
end
|
|
291
|
+
rescue StandardError => e
|
|
292
|
+
return self unless SL.config['debug']
|
|
293
|
+
|
|
294
|
+
SL.add_error("Error SEO processing title for #{url}", e)
|
|
295
|
+
return self
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
seps = Regexp.new(" *[#{seo_title_separators.map { |s| Regexp.escape(s) }.join('')}] +")
|
|
299
|
+
if title =~ seps
|
|
300
|
+
seo_parts = title.split(seps)
|
|
301
|
+
title = seo_parts.longest_element.strip if seo_parts.length.positive?
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
title && title.length > 5 ? title.gsub(/\s+/, ' ') : CGI.unescapeHTML(self)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
##
|
|
308
|
+
## Truncate in place
|
|
309
|
+
##
|
|
310
|
+
## @see #truncate
|
|
311
|
+
##
|
|
312
|
+
## @param max [Number] The maximum length
|
|
313
|
+
##
|
|
314
|
+
def truncate!(max)
|
|
315
|
+
replace truncate(max)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
##
|
|
319
|
+
## Truncate string to given length, preserving words
|
|
320
|
+
##
|
|
321
|
+
## @param max [Number] The maximum length
|
|
322
|
+
##
|
|
323
|
+
def truncate(max)
|
|
324
|
+
return self if length < max
|
|
325
|
+
|
|
326
|
+
trunc_title = []
|
|
327
|
+
|
|
328
|
+
words = split(/\s+/)
|
|
329
|
+
words.each do |word|
|
|
330
|
+
break unless trunc_title.join(' ').length.close_punctuation + word.length <= max
|
|
331
|
+
|
|
332
|
+
trunc_title << word
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
trunc_title.empty? ? words[0] : trunc_title.join(' ')
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
##
|
|
339
|
+
## Test an AppleScript response, substituting nil for
|
|
340
|
+
## 'Missing Value'
|
|
341
|
+
##
|
|
342
|
+
## @return [Nil, String] nil if string is
|
|
343
|
+
## "missing value"
|
|
344
|
+
##
|
|
345
|
+
def nil_if_missing
|
|
346
|
+
return nil if self =~ /missing value/
|
|
347
|
+
|
|
348
|
+
self
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
##
|
|
352
|
+
## Score string based on number of matches, 0 - 10
|
|
353
|
+
##
|
|
354
|
+
## @param terms [String] The terms to
|
|
355
|
+
## match
|
|
356
|
+
## @param separator [String] The word separator
|
|
357
|
+
## @param start_word [Boolean] Require match to be
|
|
358
|
+
## at beginning of word
|
|
359
|
+
##
|
|
360
|
+
def matches_score(terms, separator: ' ', start_word: true)
|
|
361
|
+
matched = 0
|
|
362
|
+
regexes = terms.to_rx_array(separator: separator, start_word: start_word)
|
|
363
|
+
|
|
364
|
+
regexes.each do |rx|
|
|
365
|
+
matched += 1 if self =~ rx
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
return 0 if matched.zero?
|
|
369
|
+
|
|
370
|
+
((matched / regexes.count.to_f) * 10).round(3)
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def matches_fuzzy(terms, separator: ' ', start_word: true, threshhold: 5)
|
|
374
|
+
sources = split(/(#{separator})+/)
|
|
375
|
+
words = terms.split(/(#{separator})+/)
|
|
376
|
+
matches = 0
|
|
377
|
+
sources.each do |src|
|
|
378
|
+
words.each do |term|
|
|
379
|
+
d = src.distance(term)
|
|
380
|
+
matches += 1 if d <= threshhold
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
((matches / words.count.to_f) * 10).round(3)
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
def distance(t)
|
|
388
|
+
s = self.dup
|
|
389
|
+
m = s.length
|
|
390
|
+
n = t.length
|
|
391
|
+
return m if n == 0
|
|
392
|
+
return n if m == 0
|
|
393
|
+
d = Array.new(m+1) {Array.new(n+1)}
|
|
394
|
+
|
|
395
|
+
(0..m).each {|i| d[i][0] = i}
|
|
396
|
+
(0..n).each {|j| d[0][j] = j}
|
|
397
|
+
(1..n).each do |j|
|
|
398
|
+
(1..m).each do |i|
|
|
399
|
+
d[i][j] = if s[i-1] == t[j-1] # adjust index into string
|
|
400
|
+
d[i-1][j-1] # no operation required
|
|
401
|
+
else
|
|
402
|
+
[ d[i-1][j]+1, # deletion
|
|
403
|
+
d[i][j-1]+1, # insertion
|
|
404
|
+
d[i-1][j-1]+1, # substitution
|
|
405
|
+
].min
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
d[m][n]
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
##
|
|
413
|
+
## Test if self contains exactl match for string (case insensitive)
|
|
414
|
+
##
|
|
415
|
+
## @param string [String] The string to match
|
|
416
|
+
##
|
|
417
|
+
def matches_exact(string)
|
|
418
|
+
comp = gsub(/[^a-z0-9 ]/i, '')
|
|
419
|
+
comp =~ /\b#{string.gsub(/[^a-z0-9 ]/i, '').split(/ +/).map { |s| Regexp.escape(s) }.join(' +')}/i
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
##
|
|
423
|
+
## Test that self does not contain any of terms
|
|
424
|
+
##
|
|
425
|
+
## @param terms [String] The terms to test
|
|
426
|
+
##
|
|
427
|
+
def matches_none(terms)
|
|
428
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
429
|
+
rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
|
430
|
+
true
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
##
|
|
434
|
+
## Test if self contains any of terms
|
|
435
|
+
##
|
|
436
|
+
## @param terms [String] The terms to test
|
|
437
|
+
##
|
|
438
|
+
def matches_any(terms)
|
|
439
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
440
|
+
rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
|
441
|
+
false
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
##
|
|
445
|
+
## Test that self matches every word in terms
|
|
446
|
+
##
|
|
447
|
+
## @param terms [String] The terms to test
|
|
448
|
+
##
|
|
449
|
+
def matches_all(terms)
|
|
450
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
451
|
+
rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
|
452
|
+
true
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
##
|
|
456
|
+
## Break a string into an array of Regexps
|
|
457
|
+
##
|
|
458
|
+
## @param separator [String] The word separator
|
|
459
|
+
## @param start_word [Boolean] Require matches at
|
|
460
|
+
## start of word
|
|
461
|
+
##
|
|
462
|
+
## @return [Array] array of regular expressions
|
|
463
|
+
##
|
|
464
|
+
def to_rx_array(separator: ' ', start_word: true)
|
|
465
|
+
bound = start_word ? '\b' : ''
|
|
466
|
+
str = gsub(/(#{separator})+/, separator)
|
|
467
|
+
str.split(/#{separator}/).map { |arg| /#{bound}#{arg.gsub(/[^a-z0-9]/i, '.?')}/i }
|
|
468
|
+
end
|
|
469
|
+
end
|