searchlink 2.3.74 → 2.3.76
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/searchlink/config.rb +23 -23
- data/lib/searchlink/curl/html.rb +38 -38
- data/lib/searchlink/curl/json.rb +19 -17
- data/lib/searchlink/curl.rb +2 -2
- data/lib/searchlink/exceptions.rb +2 -2
- data/lib/searchlink/help.rb +13 -13
- data/lib/searchlink/output.rb +21 -21
- data/lib/searchlink/parse.rb +113 -108
- data/lib/searchlink/plist.rb +11 -11
- data/lib/searchlink/script_plugin.rb +10 -10
- data/lib/searchlink/search.rb +6 -6
- data/lib/searchlink/searches/amazon.rb +4 -4
- data/lib/searchlink/searches/applemusic.rb +28 -28
- data/lib/searchlink/searches/bitly.rb +11 -11
- data/lib/searchlink/searches/definition.rb +7 -7
- data/lib/searchlink/searches/duckduckgo.rb +31 -27
- data/lib/searchlink/searches/github.rb +48 -48
- data/lib/searchlink/searches/google.rb +16 -16
- data/lib/searchlink/searches/helpers/chromium.rb +46 -46
- data/lib/searchlink/searches/helpers/firefox.rb +20 -20
- data/lib/searchlink/searches/helpers/safari.rb +14 -14
- data/lib/searchlink/searches/history.rb +78 -78
- data/lib/searchlink/searches/hook.rb +5 -5
- data/lib/searchlink/searches/itunes.rb +37 -37
- data/lib/searchlink/searches/lastfm.rb +13 -13
- data/lib/searchlink/searches/linkding.rb +14 -14
- data/lib/searchlink/searches/lyrics.rb +11 -11
- data/lib/searchlink/searches/pinboard.rb +35 -35
- data/lib/searchlink/searches/social.rb +45 -56
- data/lib/searchlink/searches/software.rb +4 -4
- data/lib/searchlink/searches/spelling.rb +10 -10
- data/lib/searchlink/searches/spotlight.rb +4 -4
- data/lib/searchlink/searches/stackoverflow.rb +5 -5
- data/lib/searchlink/searches/tmdb.rb +17 -17
- data/lib/searchlink/searches/twitter.rb +8 -8
- data/lib/searchlink/searches/wikipedia.rb +4 -4
- data/lib/searchlink/searches/youtube.rb +7 -7
- data/lib/searchlink/searches.rb +16 -16
- data/lib/searchlink/semver.rb +4 -4
- data/lib/searchlink/string.rb +55 -55
- data/lib/searchlink/url.rb +30 -32
- data/lib/searchlink/util.rb +3 -3
- data/lib/searchlink/version.rb +19 -21
- data/lib/searchlink/which.rb +5 -5
- data/lib/searchlink.rb +31 -31
- metadata +31 -18
- data/lib/tokens.rb +0 -3
@@ -5,9 +5,9 @@ module SL
|
|
5
5
|
class << self
|
6
6
|
def settings
|
7
7
|
{
|
8
|
-
trigger:
|
8
|
+
trigger: "wiki",
|
9
9
|
searches: [
|
10
|
-
[
|
10
|
+
["wiki", "Wikipedia Search"]
|
11
11
|
]
|
12
12
|
}
|
13
13
|
end
|
@@ -17,7 +17,7 @@ module SL
|
|
17
17
|
body = `/usr/bin/curl -sSL 'https://en.wikipedia.org/wiki/Special:Search?search=#{search_terms.url_encode}&go=Go'`
|
18
18
|
return false unless body
|
19
19
|
|
20
|
-
body = body.force_encoding(
|
20
|
+
body = body.force_encoding("utf-8") if RUBY_VERSION.to_f > 1.9
|
21
21
|
|
22
22
|
begin
|
23
23
|
title = body.match(/"wgTitle":"(.*?)"/)[1]
|
@@ -30,6 +30,6 @@ module SL
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
-
SL::Searches.register
|
33
|
+
SL::Searches.register "wikipedia", :search, self
|
34
34
|
end
|
35
35
|
end
|
@@ -8,10 +8,10 @@ module SL
|
|
8
8
|
class << self
|
9
9
|
def settings
|
10
10
|
{
|
11
|
-
trigger:
|
11
|
+
trigger: "yte?",
|
12
12
|
searches: [
|
13
|
-
[
|
14
|
-
[
|
13
|
+
["yt", "YouTube Search"],
|
14
|
+
["yte", "YouTube Embed"]
|
15
15
|
]
|
16
16
|
}
|
17
17
|
end
|
@@ -33,18 +33,18 @@ module SL
|
|
33
33
|
def embed_for_url(url)
|
34
34
|
return unless url =~ YOUTUBE_RX
|
35
35
|
|
36
|
-
id = Regexp.last_match(
|
36
|
+
id = Regexp.last_match("id")
|
37
37
|
title = [
|
38
38
|
%(<iframe width="560" height="315" src="https://www.youtube.com/embed/#{id}"),
|
39
39
|
%(title="YouTube video player" frameborder="0"),
|
40
40
|
%(allow="accelerometer; autoplay; clipboard-write; encrypted-media;),
|
41
41
|
%(gyroscope; picture-in-picture; web-share"),
|
42
42
|
%(allowfullscreen></iframe>)
|
43
|
-
].join(
|
44
|
-
[
|
43
|
+
].join(" ")
|
44
|
+
["embed", title]
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
|
-
SL::Searches.register
|
48
|
+
SL::Searches.register "youtube", :search, self
|
49
49
|
end
|
50
50
|
end
|
data/lib/searchlink/searches.rb
CHANGED
@@ -43,9 +43,9 @@ module SL
|
|
43
43
|
#
|
44
44
|
def available_searches_html
|
45
45
|
searches = plugins[:search]
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
.flat_map { |_, plugin| plugin[:searches] }
|
47
|
+
.reject { |s| s[1].nil? }
|
48
|
+
.sort_by { |s| s[0].is_a?(Array) ? s[0][0] : s[0] }
|
49
49
|
out = ['<table id="searches">',
|
50
50
|
"<thead><td>Shortcut</td><td>Search Type</td></thead>",
|
51
51
|
"<tbody>"]
|
@@ -53,7 +53,7 @@ module SL
|
|
53
53
|
searches.each do |s|
|
54
54
|
out << "<tr>
|
55
55
|
<td>
|
56
|
-
<code>!#{s[0].is_a?(Array) ? "#{s[0][0]} (#{s[0][1..-1].join(
|
56
|
+
<code>!#{s[0].is_a?(Array) ? "#{s[0][0]} (#{s[0][1..-1].join(',')})" : s[0]}
|
57
57
|
</code>
|
58
58
|
</td><td>#{s[1]}</td></tr>"
|
59
59
|
end
|
@@ -72,10 +72,10 @@ module SL
|
|
72
72
|
|
73
73
|
searches.each do |s|
|
74
74
|
shortcut = if s[0].is_a?(Array)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
75
|
+
"#{s[0][0]} (#{s[0][1..-1].join(',')})"
|
76
|
+
else
|
77
|
+
s[0]
|
78
|
+
end
|
79
79
|
|
80
80
|
out << "!#{shortcut}#{shortcut.spacer}#{s[1]}"
|
81
81
|
end
|
@@ -95,7 +95,7 @@ module SL
|
|
95
95
|
|
96
96
|
def did_you_mean(term)
|
97
97
|
matches = best_search_match(term)
|
98
|
-
matches.empty? ? "" : ", did you mean #{matches.map { |m| "!#{m}" }.join(
|
98
|
+
matches.empty? ? "" : ", did you mean #{matches.map { |m| "!#{m}" }.join(', ')}?"
|
99
99
|
end
|
100
100
|
|
101
101
|
def valid_searches
|
@@ -123,7 +123,7 @@ module SL
|
|
123
123
|
plugins[type][title] = {
|
124
124
|
trigger: settings.fetch(:trigger, title).normalize_trigger,
|
125
125
|
searches: settings[:searches],
|
126
|
-
class: klass
|
126
|
+
class: klass
|
127
127
|
}
|
128
128
|
end
|
129
129
|
|
@@ -154,11 +154,11 @@ module SL
|
|
154
154
|
config = IO.read(file)
|
155
155
|
|
156
156
|
cfg = case ext
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
157
|
+
when /^y/i
|
158
|
+
YAML.safe_load(config)
|
159
|
+
else
|
160
|
+
JSON.parse(config)
|
161
|
+
end
|
162
162
|
cfg["filename"] = File.basename(file)
|
163
163
|
cfg["path"] = file.shorten_path
|
164
164
|
SL::ScriptSearch.new(cfg)
|
@@ -241,5 +241,5 @@ require_relative "searches/youtube"
|
|
241
241
|
# import
|
242
242
|
require_relative "searches/stackoverflow"
|
243
243
|
|
244
|
-
#import
|
244
|
+
# import
|
245
245
|
require_relative "searches/linkding"
|
data/lib/searchlink/semver.rb
CHANGED
@@ -12,13 +12,13 @@ module SL
|
|
12
12
|
## @return [SemVer] SemVer object
|
13
13
|
##
|
14
14
|
def initialize(version_string)
|
15
|
-
raise VersionError
|
15
|
+
raise VersionError, "Invalid semantic version number: #{version_string}" unless version_string.valid_version?
|
16
16
|
|
17
17
|
@maj, @min, @patch = version_string.split(/\./)
|
18
18
|
@pre = nil
|
19
19
|
if @patch =~ /(-?[^0-9]+\d*)$/
|
20
|
-
@pre = Regexp.last_match(1).sub(/^-/,
|
21
|
-
@patch = @patch.sub(/(-?[^0-9]+\d*)$/,
|
20
|
+
@pre = Regexp.last_match(1).sub(/^-/, "")
|
21
|
+
@patch = @patch.sub(/(-?[^0-9]+\d*)$/, "")
|
22
22
|
end
|
23
23
|
|
24
24
|
@maj = @maj.to_i
|
@@ -135,7 +135,7 @@ module SL
|
|
135
135
|
end
|
136
136
|
|
137
137
|
def to_s
|
138
|
-
ver = [@maj, @min, @patch].join(
|
138
|
+
ver = [@maj, @min, @patch].join(".")
|
139
139
|
@pre.nil? ? ver : "#{ver}-#{@pre}"
|
140
140
|
end
|
141
141
|
end
|
data/lib/searchlink/string.rb
CHANGED
@@ -5,7 +5,7 @@ module SL
|
|
5
5
|
class ::String
|
6
6
|
# Scrub invalid characters from string
|
7
7
|
def scrubup
|
8
|
-
encode(
|
8
|
+
encode("utf-16", invalid: :replace).encode("utf-8").gsub(/\u00A0/, " ")
|
9
9
|
end
|
10
10
|
|
11
11
|
# @see #scrub
|
@@ -32,7 +32,7 @@ module SL
|
|
32
32
|
## @return [String] modified regular expression
|
33
33
|
##
|
34
34
|
def normalize_trigger
|
35
|
-
gsub(/\((?!\?:)/,
|
35
|
+
gsub(/\((?!\?:)/, "(?:").gsub(/(^(\^|\\A)|(\$|\\Z)$)/, "").downcase
|
36
36
|
end
|
37
37
|
|
38
38
|
##
|
@@ -56,29 +56,29 @@ module SL
|
|
56
56
|
def parse_flags
|
57
57
|
gsub(/(\+\+|--)([dirtvs]+)\b/) do
|
58
58
|
m = Regexp.last_match
|
59
|
-
bool = m[1] ==
|
60
|
-
output =
|
61
|
-
m[2].split(
|
59
|
+
bool = m[1] == "++" ? "" : "no-"
|
60
|
+
output = " "
|
61
|
+
m[2].split("").each do |arg|
|
62
62
|
output += case arg
|
63
|
-
when
|
63
|
+
when "d"
|
64
64
|
"--#{bool}debug "
|
65
|
-
when
|
65
|
+
when "i"
|
66
66
|
"--#{bool}inline "
|
67
|
-
when
|
67
|
+
when "r"
|
68
68
|
"--#{bool}prefix_random "
|
69
|
-
when
|
69
|
+
when "t"
|
70
70
|
"--#{bool}include_titles "
|
71
|
-
when
|
71
|
+
when "v"
|
72
72
|
"--#{bool}validate_links "
|
73
|
-
when
|
73
|
+
when "s"
|
74
74
|
"--#{bool}remove_seo "
|
75
75
|
else
|
76
|
-
|
76
|
+
""
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
80
80
|
output
|
81
|
-
end.gsub(/ +/,
|
81
|
+
end.gsub(/ +/, " ")
|
82
82
|
end
|
83
83
|
|
84
84
|
def parse_flags!
|
@@ -91,7 +91,7 @@ module SL
|
|
91
91
|
## @return { description_of_the_return_value }
|
92
92
|
##
|
93
93
|
def fix_gist_file
|
94
|
-
sub(/^file-/,
|
94
|
+
sub(/^file-/, "").sub(/-([^\-]+)$/, '.\1')
|
95
95
|
end
|
96
96
|
|
97
97
|
# Turn a string into a slug, removing spaces and
|
@@ -100,7 +100,7 @@ module SL
|
|
100
100
|
# @return [String] slugified string
|
101
101
|
#
|
102
102
|
def slugify
|
103
|
-
downcase.gsub(/[^a-z0-9_]/i,
|
103
|
+
downcase.gsub(/[^a-z0-9_]/i, "-").gsub(/-+/, "-").sub(/-?$/, "")
|
104
104
|
end
|
105
105
|
|
106
106
|
# Destructive slugify
|
@@ -116,11 +116,11 @@ module SL
|
|
116
116
|
## @return [String] cleaned URL/String
|
117
117
|
##
|
118
118
|
def clean
|
119
|
-
gsub(/\n+/,
|
120
|
-
.gsub(/"/,
|
121
|
-
.gsub(/\|/,
|
119
|
+
gsub(/\n+/, " ")
|
120
|
+
.gsub(/"/, """)
|
121
|
+
.gsub(/\|/, "-")
|
122
122
|
.gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
|
123
|
-
.sub(/\?&/,
|
123
|
+
.sub(/\?&/, "").strip
|
124
124
|
end
|
125
125
|
|
126
126
|
# convert itunes to apple music link
|
@@ -128,8 +128,8 @@ module SL
|
|
128
128
|
# @return [String] apple music link
|
129
129
|
def to_am
|
130
130
|
input = dup
|
131
|
-
input.sub!(%r{/itunes\.apple\.com},
|
132
|
-
append = input =~ %r{\?[^/]+=} ?
|
131
|
+
input.sub!(%r{/itunes\.apple\.com}, "geo.itunes.apple.com")
|
132
|
+
append = input =~ %r{\?[^/]+=} ? "&app=music" : "?app=music"
|
133
133
|
input + append
|
134
134
|
end
|
135
135
|
|
@@ -139,7 +139,7 @@ module SL
|
|
139
139
|
## @return [String] just hostname and path of URL
|
140
140
|
##
|
141
141
|
def remove_protocol
|
142
|
-
sub(%r{^(https?|s?ftp|file)://},
|
142
|
+
sub(%r{^(https?|s?ftp|file)://}, "")
|
143
143
|
end
|
144
144
|
|
145
145
|
##
|
@@ -158,11 +158,11 @@ module SL
|
|
158
158
|
def path_elements
|
159
159
|
path = url_path
|
160
160
|
# force trailing slash
|
161
|
-
path.sub!(%r{/?$},
|
161
|
+
path.sub!(%r{/?$}, "/")
|
162
162
|
# remove last path element
|
163
|
-
path.sub!(%r{/[^/]+[.\-][^/]+/$},
|
163
|
+
path.sub!(%r{/[^/]+[.\-][^/]+/$}, "")
|
164
164
|
# remove starting/ending slashes
|
165
|
-
path.gsub!(%r{(^/|/$)},
|
165
|
+
path.gsub!(%r{(^/|/$)}, "")
|
166
166
|
# split at slashes, delete sections that are shorter
|
167
167
|
# than 5 characters or only consist of numbers
|
168
168
|
path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
|
@@ -189,11 +189,11 @@ module SL
|
|
189
189
|
words = split(/\s+/)
|
190
190
|
|
191
191
|
punct_chars = {
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
192
|
+
"“" => "”",
|
193
|
+
"‘" => "’",
|
194
|
+
"[" => "]",
|
195
|
+
"(" => ")",
|
196
|
+
"<" => ">"
|
197
197
|
}
|
198
198
|
|
199
199
|
left_punct = []
|
@@ -205,10 +205,10 @@ module SL
|
|
205
205
|
end
|
206
206
|
end
|
207
207
|
|
208
|
-
tail =
|
208
|
+
tail = ""
|
209
209
|
left_punct.reverse.each { |c| tail += punct_chars[c] }
|
210
210
|
|
211
|
-
gsub(/[^a-z)\]’”.…]+$/i,
|
211
|
+
gsub(/[^a-z)\]’”.…]+$/i, "...").strip + tail
|
212
212
|
end
|
213
213
|
|
214
214
|
##
|
@@ -235,9 +235,9 @@ module SL
|
|
235
235
|
url = URI.parse(url)
|
236
236
|
host = url.hostname
|
237
237
|
unless host
|
238
|
-
return self unless SL.config[
|
238
|
+
return self unless SL.config["debug"]
|
239
239
|
|
240
|
-
SL.add_error(
|
240
|
+
SL.add_error("Invalid URL", "Could not remove SEO for #{url}")
|
241
241
|
return self
|
242
242
|
|
243
243
|
end
|
@@ -245,12 +245,12 @@ module SL
|
|
245
245
|
path = url.path
|
246
246
|
root_page = path =~ %r{^/?$} ? true : false
|
247
247
|
|
248
|
-
title.gsub!(/\s*(–|—)\s*/,
|
248
|
+
title.gsub!(/\s*(–|—)\s*/, " - ")
|
249
249
|
title.gsub!(/&[lr]dquo;/, '"')
|
250
250
|
title.gsub!(/&[lr]dquo;/, "'")
|
251
|
-
title.gsub!(/–/,
|
251
|
+
title.gsub!(/–/, " — ")
|
252
252
|
title = CGI.unescapeHTML(title)
|
253
|
-
title.gsub!(/ +/,
|
253
|
+
title.gsub!(/ +/, " ")
|
254
254
|
|
255
255
|
seo_title_separators = %w[| » « — – - · :]
|
256
256
|
|
@@ -258,7 +258,7 @@ module SL
|
|
258
258
|
re_parts = []
|
259
259
|
|
260
260
|
host_parts = host.sub(/(?:www\.)?(.*?)\.[^.]+$/, '\1').split(/\./).delete_if { |p| p.length < 3 }
|
261
|
-
h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join(
|
261
|
+
h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join(".?") }.join("|") : ""
|
262
262
|
re_parts.push(h_re) unless h_re.empty?
|
263
263
|
|
264
264
|
# p_re = path.path_elements.map{|seg| seg.downcase.split(//).join('.?') }.join('|')
|
@@ -268,7 +268,7 @@ module SL
|
|
268
268
|
|
269
269
|
dead_switch = 0
|
270
270
|
|
271
|
-
while title.downcase.gsub(/[^a-z]/i,
|
271
|
+
while title.downcase.gsub(/[^a-z]/i, "") =~ /#{site_re}/i
|
272
272
|
|
273
273
|
break if dead_switch > 5
|
274
274
|
|
@@ -277,14 +277,14 @@ module SL
|
|
277
277
|
|
278
278
|
next if parts.length == 1
|
279
279
|
|
280
|
-
remaining_separators = seo_title_separators[i..-1].map { |s| Regexp.escape(s) }.join(
|
280
|
+
remaining_separators = seo_title_separators[i..-1].map { |s| Regexp.escape(s) }.join("")
|
281
281
|
seps = Regexp.new("^[^#{remaining_separators}]+$")
|
282
282
|
|
283
283
|
longest = parts.longest_element.strip
|
284
284
|
|
285
285
|
unless parts.empty?
|
286
286
|
parts.delete_if do |pt|
|
287
|
-
compressed = pt.strip.downcase.gsub(/[^a-z]/i,
|
287
|
+
compressed = pt.strip.downcase.gsub(/[^a-z]/i, "")
|
288
288
|
compressed =~ /#{site_re}/ && pt =~ seps ? !root_page : false
|
289
289
|
end
|
290
290
|
end
|
@@ -302,7 +302,7 @@ module SL
|
|
302
302
|
dead_switch += 1
|
303
303
|
end
|
304
304
|
rescue StandardError => e
|
305
|
-
return self unless SL.config[
|
305
|
+
return self unless SL.config["debug"]
|
306
306
|
|
307
307
|
SL.add_error("Error SEO processing title for #{url}", e)
|
308
308
|
return self
|
@@ -314,7 +314,7 @@ module SL
|
|
314
314
|
title = seo_parts.longest_element.strip if seo_parts.length.positive?
|
315
315
|
end
|
316
316
|
|
317
|
-
title && title.length > 5 ? title.gsub(/\s+/,
|
317
|
+
title && title.length > 5 ? title.gsub(/\s+/, " ") : CGI.unescapeHTML(self)
|
318
318
|
end
|
319
319
|
|
320
320
|
##
|
@@ -340,12 +340,12 @@ module SL
|
|
340
340
|
|
341
341
|
words = split(/\s+/)
|
342
342
|
words.each do |word|
|
343
|
-
break unless trunc_title.join(
|
343
|
+
break unless trunc_title.join(" ").length.close_punctuation + word.length <= max
|
344
344
|
|
345
345
|
trunc_title << word
|
346
346
|
end
|
347
347
|
|
348
|
-
trunc_title.empty? ? words[0] : trunc_title.join(
|
348
|
+
trunc_title.empty? ? words[0] : trunc_title.join(" ")
|
349
349
|
end
|
350
350
|
|
351
351
|
##
|
@@ -370,7 +370,7 @@ module SL
|
|
370
370
|
## @param start_word [Boolean] Require match to be
|
371
371
|
## at beginning of word
|
372
372
|
##
|
373
|
-
def matches_score(terms, separator:
|
373
|
+
def matches_score(terms, separator: " ", start_word: true)
|
374
374
|
matched = 0
|
375
375
|
regexes = terms.to_rx_array(separator: separator, start_word: start_word)
|
376
376
|
|
@@ -383,7 +383,7 @@ module SL
|
|
383
383
|
((matched / regexes.count.to_f) * 10).round(3)
|
384
384
|
end
|
385
385
|
|
386
|
-
def matches_fuzzy(terms, separator:
|
386
|
+
def matches_fuzzy(terms, separator: " ", start_word: true, threshhold: 5)
|
387
387
|
sources = split(/(#{separator})+/)
|
388
388
|
words = terms.split(/(#{separator})+/)
|
389
389
|
matches = 0
|
@@ -429,7 +429,7 @@ module SL
|
|
429
429
|
## @param string [String] The string to match
|
430
430
|
##
|
431
431
|
def matches_exact(string)
|
432
|
-
comp = gsub(/[^a-z0-9 ]/i,
|
432
|
+
comp = gsub(/[^a-z0-9 ]/i, "")
|
433
433
|
comp =~ /\b#{string.gsub(/[^a-z0-9 ]/i, '').split(/ +/).map { |s| Regexp.escape(s) }.join(' +')}/i
|
434
434
|
end
|
435
435
|
|
@@ -440,7 +440,7 @@ module SL
|
|
440
440
|
##
|
441
441
|
def matches_none(terms)
|
442
442
|
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
443
|
-
rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i,
|
443
|
+
rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i, "") =~ rx }
|
444
444
|
true
|
445
445
|
end
|
446
446
|
|
@@ -451,7 +451,7 @@ module SL
|
|
451
451
|
##
|
452
452
|
def matches_any(terms)
|
453
453
|
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
454
|
-
rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i,
|
454
|
+
rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i, "") =~ rx }
|
455
455
|
false
|
456
456
|
end
|
457
457
|
|
@@ -462,7 +462,7 @@ module SL
|
|
462
462
|
##
|
463
463
|
def matches_all(terms)
|
464
464
|
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
465
|
-
rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i,
|
465
|
+
rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i, "") =~ rx }
|
466
466
|
true
|
467
467
|
end
|
468
468
|
|
@@ -475,8 +475,8 @@ module SL
|
|
475
475
|
##
|
476
476
|
## @return [Array] array of regular expressions
|
477
477
|
##
|
478
|
-
def to_rx_array(separator:
|
479
|
-
bound = start_word ? '\b' :
|
478
|
+
def to_rx_array(separator: " ", start_word: true)
|
479
|
+
bound = start_word ? '\b' : ""
|
480
480
|
str = gsub(/(#{separator})+/, separator)
|
481
481
|
str.split(/#{separator}/).map { |arg| /#{bound}#{arg.gsub(/[^a-z0-9]/i, '.?')}/i }
|
482
482
|
end
|
@@ -493,8 +493,8 @@ module SL
|
|
493
493
|
## Shorten path by adding ~ for home directory
|
494
494
|
##
|
495
495
|
def shorten_path
|
496
|
-
home_directory = ENV[
|
497
|
-
sub(home_directory,
|
496
|
+
home_directory = ENV["HOME"]
|
497
|
+
sub(home_directory, "~")
|
498
498
|
end
|
499
499
|
end
|
500
500
|
end
|
data/lib/searchlink/url.rb
CHANGED
@@ -9,39 +9,39 @@ module SL
|
|
9
9
|
def valid_link?(uri_str, limit = 5)
|
10
10
|
return false unless uri_str
|
11
11
|
|
12
|
-
SL.notify(
|
12
|
+
SL.notify("Validating", uri_str)
|
13
13
|
return false if limit.zero?
|
14
14
|
|
15
15
|
url = URI(uri_str)
|
16
16
|
return true unless url.scheme
|
17
17
|
|
18
|
-
url.path =
|
18
|
+
url.path = "/" if url.path == ""
|
19
19
|
# response = Net::HTTP.get_response(URI(uri_str))
|
20
20
|
response = false
|
21
21
|
|
22
|
-
Net::HTTP.start(url.host, url.port, use_ssl: url.scheme ==
|
22
|
+
Net::HTTP.start(url.host, url.port, use_ssl: url.scheme == "https") do |http|
|
23
23
|
response = http.request_head(url.path)
|
24
24
|
end
|
25
25
|
|
26
26
|
case response
|
27
27
|
when Net::HTTPMethodNotAllowed, Net::HTTPServiceUnavailable
|
28
28
|
unless /amazon\.com/ =~ url.host
|
29
|
-
SL.add_error(
|
29
|
+
SL.add_error("link validation", "Validation blocked: #{uri_str} (#{e})")
|
30
30
|
end
|
31
|
-
SL.notify(
|
31
|
+
SL.notify("Error validating", uri_str)
|
32
32
|
true
|
33
33
|
when Net::HTTPSuccess
|
34
34
|
true
|
35
35
|
when Net::HTTPRedirection
|
36
|
-
location = response[
|
36
|
+
location = response["location"]
|
37
37
|
valid_link?(location, limit - 1)
|
38
38
|
else
|
39
|
-
SL.notify(
|
39
|
+
SL.notify("Error validating", uri_str)
|
40
40
|
false
|
41
41
|
end
|
42
42
|
rescue StandardError => e
|
43
|
-
SL.notify(
|
44
|
-
SL.add_error(
|
43
|
+
SL.notify("Error validating", uri_str)
|
44
|
+
SL.add_error("link validation", "Possibly invalid => #{uri_str} (#{e})")
|
45
45
|
true
|
46
46
|
end
|
47
47
|
|
@@ -58,14 +58,14 @@ module SL
|
|
58
58
|
|
59
59
|
parts = url.hostname.split(/\./)
|
60
60
|
domain = if parts.count > 1
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
61
|
+
parts.slice(-2, 1).join("")
|
62
|
+
else
|
63
|
+
parts.join("")
|
64
|
+
end
|
65
65
|
|
66
66
|
path = url.path.split(%r{/}).last
|
67
67
|
if path
|
68
|
-
path.gsub!(/-/,
|
68
|
+
path.gsub!(/-/, " ").gsub!(/\.\w{2-4}$/, "")
|
69
69
|
else
|
70
70
|
path = domain
|
71
71
|
end
|
@@ -81,10 +81,10 @@ module SL
|
|
81
81
|
url = URI.parse(input.downcase)
|
82
82
|
|
83
83
|
title = if type == :ref_title
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
84
|
+
ref_title_for_url(url)
|
85
|
+
else
|
86
|
+
title(url.to_s) || input.sub(%r{^https?://}, "")
|
87
|
+
end
|
88
88
|
|
89
89
|
return [url.to_s, title] if url.hostname
|
90
90
|
end
|
@@ -94,15 +94,13 @@ module SL
|
|
94
94
|
def amazon_affiliatize(url, amazon_partner)
|
95
95
|
return url if amazon_partner.nil? || amazon_partner.empty?
|
96
96
|
|
97
|
-
unless url =~ %r{https?://(?<subdomain>.*?)amazon.com/(?:(?<title>.*?)/)?(?<type>[dg])p/(?<id>[^?]+)}
|
98
|
-
return [url, '']
|
99
|
-
end
|
97
|
+
return [url, ""] unless url =~ %r{https?://(?<subdomain>.*?)amazon.com/(?:(?<title>.*?)/)?(?<type>[dg])p/(?<id>[^?]+)}
|
100
98
|
|
101
99
|
m = Regexp.last_match
|
102
|
-
sd = m[
|
103
|
-
title = m[
|
104
|
-
t = m[
|
105
|
-
id = m[
|
100
|
+
sd = m["subdomain"]
|
101
|
+
title = m["title"].gsub(/-/, " ")
|
102
|
+
t = m["type"]
|
103
|
+
id = m["id"]
|
106
104
|
["https://#{sd}amazon.com/#{t}p/#{id}/?ref=as_li_ss_tl&ie=UTF8&linkCode=sl1&tag=#{amazon_partner}", title]
|
107
105
|
end
|
108
106
|
|
@@ -137,17 +135,17 @@ module SL
|
|
137
135
|
title = page.title || nil
|
138
136
|
|
139
137
|
if title.nil? || title =~ /^\s*$/
|
140
|
-
SL.add_error(
|
141
|
-
title = url.gsub(%r{(^https?://|/.*$)},
|
138
|
+
SL.add_error("Title not found", "Warning: missing title for #{url.strip}")
|
139
|
+
title = url.gsub(%r{(^https?://|/.*$)}, "").gsub(/-/, " ").strip
|
142
140
|
else
|
143
|
-
title = title.gsub(/\n/,
|
144
|
-
title.remove_seo!(url) if SL.config[
|
141
|
+
title = title.gsub(/\n/, " ").gsub(/\s+/, " ").strip # .sub(/[^a-z]*$/i,'')
|
142
|
+
title.remove_seo!(url) if SL.config["remove_seo"]
|
145
143
|
end
|
146
|
-
title.gsub!(/\|/,
|
147
|
-
title.remove_seo!(url.strip) if SL.config[
|
144
|
+
title.gsub!(/\|/, "—")
|
145
|
+
title.remove_seo!(url.strip) if SL.config["remove_seo"]
|
148
146
|
title.remove_protocol
|
149
147
|
rescue StandardError
|
150
|
-
SL.add_error(
|
148
|
+
SL.add_error("Error retrieving title", "Error determining title for #{url.strip}")
|
151
149
|
warn "Error retrieving title for #{url.strip}"
|
152
150
|
url.remove_protocol
|
153
151
|
end
|
data/lib/searchlink/util.rb
CHANGED
@@ -65,7 +65,7 @@ module SL
|
|
65
65
|
url, title, link_text = search.call
|
66
66
|
end
|
67
67
|
rescue Timeout::Error
|
68
|
-
SL.add_error(
|
68
|
+
SL.add_error("Timeout", "Search timed out")
|
69
69
|
url, title, link_text = false
|
70
70
|
end
|
71
71
|
|
@@ -81,9 +81,9 @@ module SL
|
|
81
81
|
## @return [String] path to new cache file
|
82
82
|
##
|
83
83
|
def cache_file_for(filename)
|
84
|
-
cache_folder = File.expand_path(
|
84
|
+
cache_folder = File.expand_path("~/.config/searchlink/cache")
|
85
85
|
FileUtils.mkdir_p(cache_folder) unless File.directory?(cache_folder)
|
86
|
-
File.join(cache_folder, filename.sub(/(\.cache)?$/,
|
86
|
+
File.join(cache_folder, filename.sub(/(\.cache)?$/, ".cache"))
|
87
87
|
end
|
88
88
|
end
|
89
89
|
end
|