searchlink 2.3.73 → 2.3.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/searchlink/config.rb +23 -23
- data/lib/searchlink/curl/html.rb +38 -38
- data/lib/searchlink/curl/json.rb +19 -17
- data/lib/searchlink/curl.rb +2 -2
- data/lib/searchlink/exceptions.rb +2 -2
- data/lib/searchlink/help.rb +13 -13
- data/lib/searchlink/output.rb +21 -21
- data/lib/searchlink/parse.rb +113 -108
- data/lib/searchlink/plist.rb +11 -11
- data/lib/searchlink/script_plugin.rb +10 -10
- data/lib/searchlink/search.rb +6 -6
- data/lib/searchlink/searches/amazon.rb +4 -4
- data/lib/searchlink/searches/applemusic.rb +28 -28
- data/lib/searchlink/searches/bitly.rb +11 -11
- data/lib/searchlink/searches/definition.rb +7 -7
- data/lib/searchlink/searches/duckduckgo.rb +31 -27
- data/lib/searchlink/searches/github.rb +48 -48
- data/lib/searchlink/searches/google.rb +16 -16
- data/lib/searchlink/searches/helpers/chromium.rb +46 -46
- data/lib/searchlink/searches/helpers/firefox.rb +20 -20
- data/lib/searchlink/searches/helpers/safari.rb +14 -14
- data/lib/searchlink/searches/history.rb +78 -78
- data/lib/searchlink/searches/hook.rb +5 -5
- data/lib/searchlink/searches/itunes.rb +37 -37
- data/lib/searchlink/searches/lastfm.rb +13 -13
- data/lib/searchlink/searches/linkding.rb +14 -14
- data/lib/searchlink/searches/lyrics.rb +11 -11
- data/lib/searchlink/searches/pinboard.rb +35 -35
- data/lib/searchlink/searches/social.rb +45 -56
- data/lib/searchlink/searches/software.rb +4 -4
- data/lib/searchlink/searches/spelling.rb +10 -10
- data/lib/searchlink/searches/spotlight.rb +4 -4
- data/lib/searchlink/searches/stackoverflow.rb +5 -5
- data/lib/searchlink/searches/tmdb.rb +17 -17
- data/lib/searchlink/searches/twitter.rb +8 -8
- data/lib/searchlink/searches/wikipedia.rb +4 -4
- data/lib/searchlink/searches/youtube.rb +7 -7
- data/lib/searchlink/searches.rb +16 -16
- data/lib/searchlink/semver.rb +4 -4
- data/lib/searchlink/string.rb +55 -55
- data/lib/searchlink/url.rb +30 -32
- data/lib/searchlink/util.rb +3 -3
- data/lib/searchlink/version.rb +19 -21
- data/lib/searchlink/which.rb +5 -5
- data/lib/searchlink.rb +31 -31
- metadata +31 -18
- data/lib/tokens.rb +0 -3
|
@@ -5,9 +5,9 @@ module SL
|
|
|
5
5
|
class << self
|
|
6
6
|
def settings
|
|
7
7
|
{
|
|
8
|
-
trigger:
|
|
8
|
+
trigger: "wiki",
|
|
9
9
|
searches: [
|
|
10
|
-
[
|
|
10
|
+
["wiki", "Wikipedia Search"]
|
|
11
11
|
]
|
|
12
12
|
}
|
|
13
13
|
end
|
|
@@ -17,7 +17,7 @@ module SL
|
|
|
17
17
|
body = `/usr/bin/curl -sSL 'https://en.wikipedia.org/wiki/Special:Search?search=#{search_terms.url_encode}&go=Go'`
|
|
18
18
|
return false unless body
|
|
19
19
|
|
|
20
|
-
body = body.force_encoding(
|
|
20
|
+
body = body.force_encoding("utf-8") if RUBY_VERSION.to_f > 1.9
|
|
21
21
|
|
|
22
22
|
begin
|
|
23
23
|
title = body.match(/"wgTitle":"(.*?)"/)[1]
|
|
@@ -30,6 +30,6 @@ module SL
|
|
|
30
30
|
end
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
-
SL::Searches.register
|
|
33
|
+
SL::Searches.register "wikipedia", :search, self
|
|
34
34
|
end
|
|
35
35
|
end
|
|
@@ -8,10 +8,10 @@ module SL
|
|
|
8
8
|
class << self
|
|
9
9
|
def settings
|
|
10
10
|
{
|
|
11
|
-
trigger:
|
|
11
|
+
trigger: "yte?",
|
|
12
12
|
searches: [
|
|
13
|
-
[
|
|
14
|
-
[
|
|
13
|
+
["yt", "YouTube Search"],
|
|
14
|
+
["yte", "YouTube Embed"]
|
|
15
15
|
]
|
|
16
16
|
}
|
|
17
17
|
end
|
|
@@ -33,18 +33,18 @@ module SL
|
|
|
33
33
|
def embed_for_url(url)
|
|
34
34
|
return unless url =~ YOUTUBE_RX
|
|
35
35
|
|
|
36
|
-
id = Regexp.last_match(
|
|
36
|
+
id = Regexp.last_match("id")
|
|
37
37
|
title = [
|
|
38
38
|
%(<iframe width="560" height="315" src="https://www.youtube.com/embed/#{id}"),
|
|
39
39
|
%(title="YouTube video player" frameborder="0"),
|
|
40
40
|
%(allow="accelerometer; autoplay; clipboard-write; encrypted-media;),
|
|
41
41
|
%(gyroscope; picture-in-picture; web-share"),
|
|
42
42
|
%(allowfullscreen></iframe>)
|
|
43
|
-
].join(
|
|
44
|
-
[
|
|
43
|
+
].join(" ")
|
|
44
|
+
["embed", title]
|
|
45
45
|
end
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
-
SL::Searches.register
|
|
48
|
+
SL::Searches.register "youtube", :search, self
|
|
49
49
|
end
|
|
50
50
|
end
|
data/lib/searchlink/searches.rb
CHANGED
|
@@ -43,9 +43,9 @@ module SL
|
|
|
43
43
|
#
|
|
44
44
|
def available_searches_html
|
|
45
45
|
searches = plugins[:search]
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
.flat_map { |_, plugin| plugin[:searches] }
|
|
47
|
+
.reject { |s| s[1].nil? }
|
|
48
|
+
.sort_by { |s| s[0].is_a?(Array) ? s[0][0] : s[0] }
|
|
49
49
|
out = ['<table id="searches">',
|
|
50
50
|
"<thead><td>Shortcut</td><td>Search Type</td></thead>",
|
|
51
51
|
"<tbody>"]
|
|
@@ -53,7 +53,7 @@ module SL
|
|
|
53
53
|
searches.each do |s|
|
|
54
54
|
out << "<tr>
|
|
55
55
|
<td>
|
|
56
|
-
<code>!#{s[0].is_a?(Array) ? "#{s[0][0]} (#{s[0][1..-1].join(
|
|
56
|
+
<code>!#{s[0].is_a?(Array) ? "#{s[0][0]} (#{s[0][1..-1].join(',')})" : s[0]}
|
|
57
57
|
</code>
|
|
58
58
|
</td><td>#{s[1]}</td></tr>"
|
|
59
59
|
end
|
|
@@ -72,10 +72,10 @@ module SL
|
|
|
72
72
|
|
|
73
73
|
searches.each do |s|
|
|
74
74
|
shortcut = if s[0].is_a?(Array)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
75
|
+
"#{s[0][0]} (#{s[0][1..-1].join(',')})"
|
|
76
|
+
else
|
|
77
|
+
s[0]
|
|
78
|
+
end
|
|
79
79
|
|
|
80
80
|
out << "!#{shortcut}#{shortcut.spacer}#{s[1]}"
|
|
81
81
|
end
|
|
@@ -95,7 +95,7 @@ module SL
|
|
|
95
95
|
|
|
96
96
|
def did_you_mean(term)
|
|
97
97
|
matches = best_search_match(term)
|
|
98
|
-
matches.empty? ? "" : ", did you mean #{matches.map { |m| "!#{m}" }.join(
|
|
98
|
+
matches.empty? ? "" : ", did you mean #{matches.map { |m| "!#{m}" }.join(', ')}?"
|
|
99
99
|
end
|
|
100
100
|
|
|
101
101
|
def valid_searches
|
|
@@ -123,7 +123,7 @@ module SL
|
|
|
123
123
|
plugins[type][title] = {
|
|
124
124
|
trigger: settings.fetch(:trigger, title).normalize_trigger,
|
|
125
125
|
searches: settings[:searches],
|
|
126
|
-
class: klass
|
|
126
|
+
class: klass
|
|
127
127
|
}
|
|
128
128
|
end
|
|
129
129
|
|
|
@@ -154,11 +154,11 @@ module SL
|
|
|
154
154
|
config = IO.read(file)
|
|
155
155
|
|
|
156
156
|
cfg = case ext
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
157
|
+
when /^y/i
|
|
158
|
+
YAML.safe_load(config)
|
|
159
|
+
else
|
|
160
|
+
JSON.parse(config)
|
|
161
|
+
end
|
|
162
162
|
cfg["filename"] = File.basename(file)
|
|
163
163
|
cfg["path"] = file.shorten_path
|
|
164
164
|
SL::ScriptSearch.new(cfg)
|
|
@@ -241,5 +241,5 @@ require_relative "searches/youtube"
|
|
|
241
241
|
# import
|
|
242
242
|
require_relative "searches/stackoverflow"
|
|
243
243
|
|
|
244
|
-
#import
|
|
244
|
+
# import
|
|
245
245
|
require_relative "searches/linkding"
|
data/lib/searchlink/semver.rb
CHANGED
|
@@ -12,13 +12,13 @@ module SL
|
|
|
12
12
|
## @return [SemVer] SemVer object
|
|
13
13
|
##
|
|
14
14
|
def initialize(version_string)
|
|
15
|
-
raise VersionError
|
|
15
|
+
raise VersionError, "Invalid semantic version number: #{version_string}" unless version_string.valid_version?
|
|
16
16
|
|
|
17
17
|
@maj, @min, @patch = version_string.split(/\./)
|
|
18
18
|
@pre = nil
|
|
19
19
|
if @patch =~ /(-?[^0-9]+\d*)$/
|
|
20
|
-
@pre = Regexp.last_match(1).sub(/^-/,
|
|
21
|
-
@patch = @patch.sub(/(-?[^0-9]+\d*)$/,
|
|
20
|
+
@pre = Regexp.last_match(1).sub(/^-/, "")
|
|
21
|
+
@patch = @patch.sub(/(-?[^0-9]+\d*)$/, "")
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
@maj = @maj.to_i
|
|
@@ -135,7 +135,7 @@ module SL
|
|
|
135
135
|
end
|
|
136
136
|
|
|
137
137
|
def to_s
|
|
138
|
-
ver = [@maj, @min, @patch].join(
|
|
138
|
+
ver = [@maj, @min, @patch].join(".")
|
|
139
139
|
@pre.nil? ? ver : "#{ver}-#{@pre}"
|
|
140
140
|
end
|
|
141
141
|
end
|
data/lib/searchlink/string.rb
CHANGED
|
@@ -5,7 +5,7 @@ module SL
|
|
|
5
5
|
class ::String
|
|
6
6
|
# Scrub invalid characters from string
|
|
7
7
|
def scrubup
|
|
8
|
-
encode(
|
|
8
|
+
encode("utf-16", invalid: :replace).encode("utf-8").gsub(/\u00A0/, " ")
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
# @see #scrub
|
|
@@ -32,7 +32,7 @@ module SL
|
|
|
32
32
|
## @return [String] modified regular expression
|
|
33
33
|
##
|
|
34
34
|
def normalize_trigger
|
|
35
|
-
gsub(/\((?!\?:)/,
|
|
35
|
+
gsub(/\((?!\?:)/, "(?:").gsub(/(^(\^|\\A)|(\$|\\Z)$)/, "").downcase
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
##
|
|
@@ -56,29 +56,29 @@ module SL
|
|
|
56
56
|
def parse_flags
|
|
57
57
|
gsub(/(\+\+|--)([dirtvs]+)\b/) do
|
|
58
58
|
m = Regexp.last_match
|
|
59
|
-
bool = m[1] ==
|
|
60
|
-
output =
|
|
61
|
-
m[2].split(
|
|
59
|
+
bool = m[1] == "++" ? "" : "no-"
|
|
60
|
+
output = " "
|
|
61
|
+
m[2].split("").each do |arg|
|
|
62
62
|
output += case arg
|
|
63
|
-
when
|
|
63
|
+
when "d"
|
|
64
64
|
"--#{bool}debug "
|
|
65
|
-
when
|
|
65
|
+
when "i"
|
|
66
66
|
"--#{bool}inline "
|
|
67
|
-
when
|
|
67
|
+
when "r"
|
|
68
68
|
"--#{bool}prefix_random "
|
|
69
|
-
when
|
|
69
|
+
when "t"
|
|
70
70
|
"--#{bool}include_titles "
|
|
71
|
-
when
|
|
71
|
+
when "v"
|
|
72
72
|
"--#{bool}validate_links "
|
|
73
|
-
when
|
|
73
|
+
when "s"
|
|
74
74
|
"--#{bool}remove_seo "
|
|
75
75
|
else
|
|
76
|
-
|
|
76
|
+
""
|
|
77
77
|
end
|
|
78
78
|
end
|
|
79
79
|
|
|
80
80
|
output
|
|
81
|
-
end.gsub(/ +/,
|
|
81
|
+
end.gsub(/ +/, " ")
|
|
82
82
|
end
|
|
83
83
|
|
|
84
84
|
def parse_flags!
|
|
@@ -91,7 +91,7 @@ module SL
|
|
|
91
91
|
## @return { description_of_the_return_value }
|
|
92
92
|
##
|
|
93
93
|
def fix_gist_file
|
|
94
|
-
sub(/^file-/,
|
|
94
|
+
sub(/^file-/, "").sub(/-([^\-]+)$/, '.\1')
|
|
95
95
|
end
|
|
96
96
|
|
|
97
97
|
# Turn a string into a slug, removing spaces and
|
|
@@ -100,7 +100,7 @@ module SL
|
|
|
100
100
|
# @return [String] slugified string
|
|
101
101
|
#
|
|
102
102
|
def slugify
|
|
103
|
-
downcase.gsub(/[^a-z0-9_]/i,
|
|
103
|
+
downcase.gsub(/[^a-z0-9_]/i, "-").gsub(/-+/, "-").sub(/-?$/, "")
|
|
104
104
|
end
|
|
105
105
|
|
|
106
106
|
# Destructive slugify
|
|
@@ -116,11 +116,11 @@ module SL
|
|
|
116
116
|
## @return [String] cleaned URL/String
|
|
117
117
|
##
|
|
118
118
|
def clean
|
|
119
|
-
gsub(/\n+/,
|
|
120
|
-
.gsub(/"/,
|
|
121
|
-
.gsub(/\|/,
|
|
119
|
+
gsub(/\n+/, " ")
|
|
120
|
+
.gsub(/"/, """)
|
|
121
|
+
.gsub(/\|/, "-")
|
|
122
122
|
.gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
|
|
123
|
-
.sub(/\?&/,
|
|
123
|
+
.sub(/\?&/, "").strip
|
|
124
124
|
end
|
|
125
125
|
|
|
126
126
|
# convert itunes to apple music link
|
|
@@ -128,8 +128,8 @@ module SL
|
|
|
128
128
|
# @return [String] apple music link
|
|
129
129
|
def to_am
|
|
130
130
|
input = dup
|
|
131
|
-
input.sub!(%r{/itunes\.apple\.com},
|
|
132
|
-
append = input =~ %r{\?[^/]+=} ?
|
|
131
|
+
input.sub!(%r{/itunes\.apple\.com}, "geo.itunes.apple.com")
|
|
132
|
+
append = input =~ %r{\?[^/]+=} ? "&app=music" : "?app=music"
|
|
133
133
|
input + append
|
|
134
134
|
end
|
|
135
135
|
|
|
@@ -139,7 +139,7 @@ module SL
|
|
|
139
139
|
## @return [String] just hostname and path of URL
|
|
140
140
|
##
|
|
141
141
|
def remove_protocol
|
|
142
|
-
sub(%r{^(https?|s?ftp|file)://},
|
|
142
|
+
sub(%r{^(https?|s?ftp|file)://}, "")
|
|
143
143
|
end
|
|
144
144
|
|
|
145
145
|
##
|
|
@@ -158,11 +158,11 @@ module SL
|
|
|
158
158
|
def path_elements
|
|
159
159
|
path = url_path
|
|
160
160
|
# force trailing slash
|
|
161
|
-
path.sub!(%r{/?$},
|
|
161
|
+
path.sub!(%r{/?$}, "/")
|
|
162
162
|
# remove last path element
|
|
163
|
-
path.sub!(%r{/[^/]+[.\-][^/]+/$},
|
|
163
|
+
path.sub!(%r{/[^/]+[.\-][^/]+/$}, "")
|
|
164
164
|
# remove starting/ending slashes
|
|
165
|
-
path.gsub!(%r{(^/|/$)},
|
|
165
|
+
path.gsub!(%r{(^/|/$)}, "")
|
|
166
166
|
# split at slashes, delete sections that are shorter
|
|
167
167
|
# than 5 characters or only consist of numbers
|
|
168
168
|
path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
|
|
@@ -189,11 +189,11 @@ module SL
|
|
|
189
189
|
words = split(/\s+/)
|
|
190
190
|
|
|
191
191
|
punct_chars = {
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
192
|
+
"“" => "”",
|
|
193
|
+
"‘" => "’",
|
|
194
|
+
"[" => "]",
|
|
195
|
+
"(" => ")",
|
|
196
|
+
"<" => ">"
|
|
197
197
|
}
|
|
198
198
|
|
|
199
199
|
left_punct = []
|
|
@@ -205,10 +205,10 @@ module SL
|
|
|
205
205
|
end
|
|
206
206
|
end
|
|
207
207
|
|
|
208
|
-
tail =
|
|
208
|
+
tail = ""
|
|
209
209
|
left_punct.reverse.each { |c| tail += punct_chars[c] }
|
|
210
210
|
|
|
211
|
-
gsub(/[^a-z)\]’”.…]+$/i,
|
|
211
|
+
gsub(/[^a-z)\]’”.…]+$/i, "...").strip + tail
|
|
212
212
|
end
|
|
213
213
|
|
|
214
214
|
##
|
|
@@ -235,9 +235,9 @@ module SL
|
|
|
235
235
|
url = URI.parse(url)
|
|
236
236
|
host = url.hostname
|
|
237
237
|
unless host
|
|
238
|
-
return self unless SL.config[
|
|
238
|
+
return self unless SL.config["debug"]
|
|
239
239
|
|
|
240
|
-
SL.add_error(
|
|
240
|
+
SL.add_error("Invalid URL", "Could not remove SEO for #{url}")
|
|
241
241
|
return self
|
|
242
242
|
|
|
243
243
|
end
|
|
@@ -245,12 +245,12 @@ module SL
|
|
|
245
245
|
path = url.path
|
|
246
246
|
root_page = path =~ %r{^/?$} ? true : false
|
|
247
247
|
|
|
248
|
-
title.gsub!(/\s*(–|—)\s*/,
|
|
248
|
+
title.gsub!(/\s*(–|—)\s*/, " - ")
|
|
249
249
|
title.gsub!(/&[lr]dquo;/, '"')
|
|
250
250
|
title.gsub!(/&[lr]dquo;/, "'")
|
|
251
|
-
title.gsub!(/–/,
|
|
251
|
+
title.gsub!(/–/, " — ")
|
|
252
252
|
title = CGI.unescapeHTML(title)
|
|
253
|
-
title.gsub!(/ +/,
|
|
253
|
+
title.gsub!(/ +/, " ")
|
|
254
254
|
|
|
255
255
|
seo_title_separators = %w[| » « — – - · :]
|
|
256
256
|
|
|
@@ -258,7 +258,7 @@ module SL
|
|
|
258
258
|
re_parts = []
|
|
259
259
|
|
|
260
260
|
host_parts = host.sub(/(?:www\.)?(.*?)\.[^.]+$/, '\1').split(/\./).delete_if { |p| p.length < 3 }
|
|
261
|
-
h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join(
|
|
261
|
+
h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join(".?") }.join("|") : ""
|
|
262
262
|
re_parts.push(h_re) unless h_re.empty?
|
|
263
263
|
|
|
264
264
|
# p_re = path.path_elements.map{|seg| seg.downcase.split(//).join('.?') }.join('|')
|
|
@@ -268,7 +268,7 @@ module SL
|
|
|
268
268
|
|
|
269
269
|
dead_switch = 0
|
|
270
270
|
|
|
271
|
-
while title.downcase.gsub(/[^a-z]/i,
|
|
271
|
+
while title.downcase.gsub(/[^a-z]/i, "") =~ /#{site_re}/i
|
|
272
272
|
|
|
273
273
|
break if dead_switch > 5
|
|
274
274
|
|
|
@@ -277,14 +277,14 @@ module SL
|
|
|
277
277
|
|
|
278
278
|
next if parts.length == 1
|
|
279
279
|
|
|
280
|
-
remaining_separators = seo_title_separators[i..-1].map { |s| Regexp.escape(s) }.join(
|
|
280
|
+
remaining_separators = seo_title_separators[i..-1].map { |s| Regexp.escape(s) }.join("")
|
|
281
281
|
seps = Regexp.new("^[^#{remaining_separators}]+$")
|
|
282
282
|
|
|
283
283
|
longest = parts.longest_element.strip
|
|
284
284
|
|
|
285
285
|
unless parts.empty?
|
|
286
286
|
parts.delete_if do |pt|
|
|
287
|
-
compressed = pt.strip.downcase.gsub(/[^a-z]/i,
|
|
287
|
+
compressed = pt.strip.downcase.gsub(/[^a-z]/i, "")
|
|
288
288
|
compressed =~ /#{site_re}/ && pt =~ seps ? !root_page : false
|
|
289
289
|
end
|
|
290
290
|
end
|
|
@@ -302,7 +302,7 @@ module SL
|
|
|
302
302
|
dead_switch += 1
|
|
303
303
|
end
|
|
304
304
|
rescue StandardError => e
|
|
305
|
-
return self unless SL.config[
|
|
305
|
+
return self unless SL.config["debug"]
|
|
306
306
|
|
|
307
307
|
SL.add_error("Error SEO processing title for #{url}", e)
|
|
308
308
|
return self
|
|
@@ -314,7 +314,7 @@ module SL
|
|
|
314
314
|
title = seo_parts.longest_element.strip if seo_parts.length.positive?
|
|
315
315
|
end
|
|
316
316
|
|
|
317
|
-
title && title.length > 5 ? title.gsub(/\s+/,
|
|
317
|
+
title && title.length > 5 ? title.gsub(/\s+/, " ") : CGI.unescapeHTML(self)
|
|
318
318
|
end
|
|
319
319
|
|
|
320
320
|
##
|
|
@@ -340,12 +340,12 @@ module SL
|
|
|
340
340
|
|
|
341
341
|
words = split(/\s+/)
|
|
342
342
|
words.each do |word|
|
|
343
|
-
break unless trunc_title.join(
|
|
343
|
+
break unless trunc_title.join(" ").length.close_punctuation + word.length <= max
|
|
344
344
|
|
|
345
345
|
trunc_title << word
|
|
346
346
|
end
|
|
347
347
|
|
|
348
|
-
trunc_title.empty? ? words[0] : trunc_title.join(
|
|
348
|
+
trunc_title.empty? ? words[0] : trunc_title.join(" ")
|
|
349
349
|
end
|
|
350
350
|
|
|
351
351
|
##
|
|
@@ -370,7 +370,7 @@ module SL
|
|
|
370
370
|
## @param start_word [Boolean] Require match to be
|
|
371
371
|
## at beginning of word
|
|
372
372
|
##
|
|
373
|
-
def matches_score(terms, separator:
|
|
373
|
+
def matches_score(terms, separator: " ", start_word: true)
|
|
374
374
|
matched = 0
|
|
375
375
|
regexes = terms.to_rx_array(separator: separator, start_word: start_word)
|
|
376
376
|
|
|
@@ -383,7 +383,7 @@ module SL
|
|
|
383
383
|
((matched / regexes.count.to_f) * 10).round(3)
|
|
384
384
|
end
|
|
385
385
|
|
|
386
|
-
def matches_fuzzy(terms, separator:
|
|
386
|
+
def matches_fuzzy(terms, separator: " ", start_word: true, threshhold: 5)
|
|
387
387
|
sources = split(/(#{separator})+/)
|
|
388
388
|
words = terms.split(/(#{separator})+/)
|
|
389
389
|
matches = 0
|
|
@@ -429,7 +429,7 @@ module SL
|
|
|
429
429
|
## @param string [String] The string to match
|
|
430
430
|
##
|
|
431
431
|
def matches_exact(string)
|
|
432
|
-
comp = gsub(/[^a-z0-9 ]/i,
|
|
432
|
+
comp = gsub(/[^a-z0-9 ]/i, "")
|
|
433
433
|
comp =~ /\b#{string.gsub(/[^a-z0-9 ]/i, '').split(/ +/).map { |s| Regexp.escape(s) }.join(' +')}/i
|
|
434
434
|
end
|
|
435
435
|
|
|
@@ -440,7 +440,7 @@ module SL
|
|
|
440
440
|
##
|
|
441
441
|
def matches_none(terms)
|
|
442
442
|
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
443
|
-
rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i,
|
|
443
|
+
rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i, "") =~ rx }
|
|
444
444
|
true
|
|
445
445
|
end
|
|
446
446
|
|
|
@@ -451,7 +451,7 @@ module SL
|
|
|
451
451
|
##
|
|
452
452
|
def matches_any(terms)
|
|
453
453
|
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
454
|
-
rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i,
|
|
454
|
+
rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i, "") =~ rx }
|
|
455
455
|
false
|
|
456
456
|
end
|
|
457
457
|
|
|
@@ -462,7 +462,7 @@ module SL
|
|
|
462
462
|
##
|
|
463
463
|
def matches_all(terms)
|
|
464
464
|
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
465
|
-
rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i,
|
|
465
|
+
rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i, "") =~ rx }
|
|
466
466
|
true
|
|
467
467
|
end
|
|
468
468
|
|
|
@@ -475,8 +475,8 @@ module SL
|
|
|
475
475
|
##
|
|
476
476
|
## @return [Array] array of regular expressions
|
|
477
477
|
##
|
|
478
|
-
def to_rx_array(separator:
|
|
479
|
-
bound = start_word ? '\b' :
|
|
478
|
+
def to_rx_array(separator: " ", start_word: true)
|
|
479
|
+
bound = start_word ? '\b' : ""
|
|
480
480
|
str = gsub(/(#{separator})+/, separator)
|
|
481
481
|
str.split(/#{separator}/).map { |arg| /#{bound}#{arg.gsub(/[^a-z0-9]/i, '.?')}/i }
|
|
482
482
|
end
|
|
@@ -493,8 +493,8 @@ module SL
|
|
|
493
493
|
## Shorten path by adding ~ for home directory
|
|
494
494
|
##
|
|
495
495
|
def shorten_path
|
|
496
|
-
home_directory = ENV[
|
|
497
|
-
sub(home_directory,
|
|
496
|
+
home_directory = ENV["HOME"]
|
|
497
|
+
sub(home_directory, "~")
|
|
498
498
|
end
|
|
499
499
|
end
|
|
500
500
|
end
|
data/lib/searchlink/url.rb
CHANGED
|
@@ -9,39 +9,39 @@ module SL
|
|
|
9
9
|
def valid_link?(uri_str, limit = 5)
|
|
10
10
|
return false unless uri_str
|
|
11
11
|
|
|
12
|
-
SL.notify(
|
|
12
|
+
SL.notify("Validating", uri_str)
|
|
13
13
|
return false if limit.zero?
|
|
14
14
|
|
|
15
15
|
url = URI(uri_str)
|
|
16
16
|
return true unless url.scheme
|
|
17
17
|
|
|
18
|
-
url.path =
|
|
18
|
+
url.path = "/" if url.path == ""
|
|
19
19
|
# response = Net::HTTP.get_response(URI(uri_str))
|
|
20
20
|
response = false
|
|
21
21
|
|
|
22
|
-
Net::HTTP.start(url.host, url.port, use_ssl: url.scheme ==
|
|
22
|
+
Net::HTTP.start(url.host, url.port, use_ssl: url.scheme == "https") do |http|
|
|
23
23
|
response = http.request_head(url.path)
|
|
24
24
|
end
|
|
25
25
|
|
|
26
26
|
case response
|
|
27
27
|
when Net::HTTPMethodNotAllowed, Net::HTTPServiceUnavailable
|
|
28
28
|
unless /amazon\.com/ =~ url.host
|
|
29
|
-
SL.add_error(
|
|
29
|
+
SL.add_error("link validation", "Validation blocked: #{uri_str} (#{e})")
|
|
30
30
|
end
|
|
31
|
-
SL.notify(
|
|
31
|
+
SL.notify("Error validating", uri_str)
|
|
32
32
|
true
|
|
33
33
|
when Net::HTTPSuccess
|
|
34
34
|
true
|
|
35
35
|
when Net::HTTPRedirection
|
|
36
|
-
location = response[
|
|
36
|
+
location = response["location"]
|
|
37
37
|
valid_link?(location, limit - 1)
|
|
38
38
|
else
|
|
39
|
-
SL.notify(
|
|
39
|
+
SL.notify("Error validating", uri_str)
|
|
40
40
|
false
|
|
41
41
|
end
|
|
42
42
|
rescue StandardError => e
|
|
43
|
-
SL.notify(
|
|
44
|
-
SL.add_error(
|
|
43
|
+
SL.notify("Error validating", uri_str)
|
|
44
|
+
SL.add_error("link validation", "Possibly invalid => #{uri_str} (#{e})")
|
|
45
45
|
true
|
|
46
46
|
end
|
|
47
47
|
|
|
@@ -58,14 +58,14 @@ module SL
|
|
|
58
58
|
|
|
59
59
|
parts = url.hostname.split(/\./)
|
|
60
60
|
domain = if parts.count > 1
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
61
|
+
parts.slice(-2, 1).join("")
|
|
62
|
+
else
|
|
63
|
+
parts.join("")
|
|
64
|
+
end
|
|
65
65
|
|
|
66
66
|
path = url.path.split(%r{/}).last
|
|
67
67
|
if path
|
|
68
|
-
path.gsub!(/-/,
|
|
68
|
+
path.gsub!(/-/, " ").gsub!(/\.\w{2-4}$/, "")
|
|
69
69
|
else
|
|
70
70
|
path = domain
|
|
71
71
|
end
|
|
@@ -81,10 +81,10 @@ module SL
|
|
|
81
81
|
url = URI.parse(input.downcase)
|
|
82
82
|
|
|
83
83
|
title = if type == :ref_title
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
84
|
+
ref_title_for_url(url)
|
|
85
|
+
else
|
|
86
|
+
title(url.to_s) || input.sub(%r{^https?://}, "")
|
|
87
|
+
end
|
|
88
88
|
|
|
89
89
|
return [url.to_s, title] if url.hostname
|
|
90
90
|
end
|
|
@@ -94,15 +94,13 @@ module SL
|
|
|
94
94
|
def amazon_affiliatize(url, amazon_partner)
|
|
95
95
|
return url if amazon_partner.nil? || amazon_partner.empty?
|
|
96
96
|
|
|
97
|
-
unless url =~ %r{https?://(?<subdomain>.*?)amazon.com/(?:(?<title>.*?)/)?(?<type>[dg])p/(?<id>[^?]+)}
|
|
98
|
-
return [url, '']
|
|
99
|
-
end
|
|
97
|
+
return [url, ""] unless url =~ %r{https?://(?<subdomain>.*?)amazon.com/(?:(?<title>.*?)/)?(?<type>[dg])p/(?<id>[^?]+)}
|
|
100
98
|
|
|
101
99
|
m = Regexp.last_match
|
|
102
|
-
sd = m[
|
|
103
|
-
title = m[
|
|
104
|
-
t = m[
|
|
105
|
-
id = m[
|
|
100
|
+
sd = m["subdomain"]
|
|
101
|
+
title = m["title"].gsub(/-/, " ")
|
|
102
|
+
t = m["type"]
|
|
103
|
+
id = m["id"]
|
|
106
104
|
["https://#{sd}amazon.com/#{t}p/#{id}/?ref=as_li_ss_tl&ie=UTF8&linkCode=sl1&tag=#{amazon_partner}", title]
|
|
107
105
|
end
|
|
108
106
|
|
|
@@ -137,17 +135,17 @@ module SL
|
|
|
137
135
|
title = page.title || nil
|
|
138
136
|
|
|
139
137
|
if title.nil? || title =~ /^\s*$/
|
|
140
|
-
SL.add_error(
|
|
141
|
-
title = url.gsub(%r{(^https?://|/.*$)},
|
|
138
|
+
SL.add_error("Title not found", "Warning: missing title for #{url.strip}")
|
|
139
|
+
title = url.gsub(%r{(^https?://|/.*$)}, "").gsub(/-/, " ").strip
|
|
142
140
|
else
|
|
143
|
-
title = title.gsub(/\n/,
|
|
144
|
-
title.remove_seo!(url) if SL.config[
|
|
141
|
+
title = title.gsub(/\n/, " ").gsub(/\s+/, " ").strip # .sub(/[^a-z]*$/i,'')
|
|
142
|
+
title.remove_seo!(url) if SL.config["remove_seo"]
|
|
145
143
|
end
|
|
146
|
-
title.gsub!(/\|/,
|
|
147
|
-
title.remove_seo!(url.strip) if SL.config[
|
|
144
|
+
title.gsub!(/\|/, "—")
|
|
145
|
+
title.remove_seo!(url.strip) if SL.config["remove_seo"]
|
|
148
146
|
title.remove_protocol
|
|
149
147
|
rescue StandardError
|
|
150
|
-
SL.add_error(
|
|
148
|
+
SL.add_error("Error retrieving title", "Error determining title for #{url.strip}")
|
|
151
149
|
warn "Error retrieving title for #{url.strip}"
|
|
152
150
|
url.remove_protocol
|
|
153
151
|
end
|
data/lib/searchlink/util.rb
CHANGED
|
@@ -65,7 +65,7 @@ module SL
|
|
|
65
65
|
url, title, link_text = search.call
|
|
66
66
|
end
|
|
67
67
|
rescue Timeout::Error
|
|
68
|
-
SL.add_error(
|
|
68
|
+
SL.add_error("Timeout", "Search timed out")
|
|
69
69
|
url, title, link_text = false
|
|
70
70
|
end
|
|
71
71
|
|
|
@@ -81,9 +81,9 @@ module SL
|
|
|
81
81
|
## @return [String] path to new cache file
|
|
82
82
|
##
|
|
83
83
|
def cache_file_for(filename)
|
|
84
|
-
cache_folder = File.expand_path(
|
|
84
|
+
cache_folder = File.expand_path("~/.config/searchlink/cache")
|
|
85
85
|
FileUtils.mkdir_p(cache_folder) unless File.directory?(cache_folder)
|
|
86
|
-
File.join(cache_folder, filename.sub(/(\.cache)?$/,
|
|
86
|
+
File.join(cache_folder, filename.sub(/(\.cache)?$/, ".cache"))
|
|
87
87
|
end
|
|
88
88
|
end
|
|
89
89
|
end
|