searchlink 2.3.63 → 2.3.65
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/searchlink +2 -4
- data/lib/searchlink/curl/html.rb +3 -4
- data/lib/searchlink/searches/github.rb +17 -11
- data/lib/searchlink/string.rb +415 -413
- data/lib/searchlink/version.rb +16 -6
- data/lib/tokens.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd9142e4c8363dca0dbbc82c7bb4be3112ce335f908641b4184dbde9e1525960
|
4
|
+
data.tar.gz: c8e7cf285e4de29c24a7fcb53014c2efa4da8a3c38a857f11747b97a8a653ba0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d2336776d6526804d9df51e180a9996a89cc9786ef4c0cefa5fede4e3ba2c9a931c4240c2116809f6fd928dc8155c7c3b26b0141add9bf1ed39000bdf73ca2e
|
7
|
+
data.tar.gz: c7710d400e7346a79063604af305027a851f490ec055f7b762bc9d414db5f5ebf1d21504bff3acccb7e5832c42d5914ce844b324f483a7c0044ff72720b637dc
|
data/bin/searchlink
CHANGED
@@ -46,8 +46,7 @@ if !ARGV.empty?
|
|
46
46
|
|
47
47
|
files.each do |file|
|
48
48
|
if File.exist?(file) && `file -b "#{file}"|grep -c text`.to_i.positive?
|
49
|
-
input =
|
50
|
-
input.scrub!
|
49
|
+
input = IO.read(file).scrubup
|
51
50
|
|
52
51
|
backup_file = "#{file}.bak"
|
53
52
|
backup_file = "#{file}.bak 1" if File.exist?(backup_file)
|
@@ -72,8 +71,7 @@ if !ARGV.empty?
|
|
72
71
|
end
|
73
72
|
end
|
74
73
|
else
|
75
|
-
input =
|
76
|
-
input.scrub!
|
74
|
+
input = $stdin.read.scrubup
|
77
75
|
sl.parse(input)
|
78
76
|
output = SL.output&.join('')
|
79
77
|
|
data/lib/searchlink/curl/html.rb
CHANGED
@@ -107,6 +107,7 @@ module Curl
|
|
107
107
|
## @param tag The tag
|
108
108
|
## @param source [Boolean] Return full tag instead of contents
|
109
109
|
##
|
110
|
+
## @return [Array] array of tag matches/contents
|
110
111
|
def extract_tag_contents(tag, source: false)
|
111
112
|
return @body.scan(%r{<#{tag}.*?>(?:.*?</#{tag}>)?}) if source
|
112
113
|
|
@@ -452,13 +453,11 @@ module Curl
|
|
452
453
|
end
|
453
454
|
|
454
455
|
# look for a charset in a content-encoding header
|
455
|
-
if content_type
|
456
|
-
encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
|
457
|
-
end
|
456
|
+
encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1] if content_type
|
458
457
|
|
459
458
|
# look for a charset in a meta tag in the first 1024 bytes
|
460
459
|
unless encoding
|
461
|
-
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m,
|
460
|
+
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
|
462
461
|
data.scan(/<meta.*?>/im).each do |meta|
|
463
462
|
encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
|
464
463
|
end
|
@@ -24,6 +24,8 @@ module SL
|
|
24
24
|
url, title, link_text = github(search_terms, link_text)
|
25
25
|
end
|
26
26
|
|
27
|
+
return SL.ddg("site:github.com #{search_terms}", link_text) unless url
|
28
|
+
|
27
29
|
link_text = title if link_text == '' || link_text == search_terms
|
28
30
|
|
29
31
|
[url, title, link_text]
|
@@ -32,9 +34,9 @@ module SL
|
|
32
34
|
def github_search_curl(endpoint, query)
|
33
35
|
headers = {
|
34
36
|
'Accept' => 'application/vnd.github+json',
|
35
|
-
'X-GitHub-Api-Version' => '2022-11-28'
|
37
|
+
'X-GitHub-Api-Version' => '2022-11-28'
|
36
38
|
}
|
37
|
-
headers['Authorization'] = "Bearer #{Secrets::GH_AUTH_TOKEN}" if Secrets::GH_AUTH_TOKEN
|
39
|
+
headers['Authorization'] = "Bearer #{Secrets::GH_AUTH_TOKEN}" if defined? Secrets::GH_AUTH_TOKEN
|
38
40
|
|
39
41
|
url = "https://api.github.com/search/#{endpoint}?q=#{query.url_encode}&per_page=1&page=1&order=desc"
|
40
42
|
res = Curl::Json.new(url, headers: headers)
|
@@ -51,7 +53,7 @@ module SL
|
|
51
53
|
'Accept' => 'application/vnd.github+json',
|
52
54
|
'X-GitHub-Api-Version' => '2022-11-28'
|
53
55
|
}
|
54
|
-
headers['Authorization'] = "Bearer #{Secrets::GH_AUTH_TOKEN}" if Secrets::GH_AUTH_TOKEN
|
56
|
+
headers['Authorization'] = "Bearer #{Secrets::GH_AUTH_TOKEN}" if defined? Secrets::GH_AUTH_TOKEN
|
55
57
|
|
56
58
|
url = "https://api.github.com/users/#{user}/gists?per_page=100&page=#{page}"
|
57
59
|
|
@@ -113,14 +115,18 @@ module SL
|
|
113
115
|
end
|
114
116
|
|
115
117
|
def search_github(search_terms, link_text)
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
118
|
+
replacements = [
|
119
|
+
[%r{(\S+)/(\S+)}, 'user:\1 \2'],
|
120
|
+
[/\bu\w*:(\w+)/, 'user:\1'],
|
121
|
+
[/\bl\w*:(\w+)/, 'language:\1'],
|
122
|
+
[/\bin?:r\w*/, 'in:readme'],
|
123
|
+
[/\bin?:t\w*/, 'in:topics'],
|
124
|
+
[/\bin?:d\w*/, 'in:description'],
|
125
|
+
[/\bin?:(t(itle)?|n(ame)?)/, 'in:name'],
|
126
|
+
[/\br:/, 'repo:']
|
127
|
+
]
|
128
|
+
|
129
|
+
replacements.each { |r| search_terms.gsub!(r[0], r[1]) }
|
124
130
|
|
125
131
|
search_terms += ' in:title' unless search_terms =~ /(in|user|repo):/
|
126
132
|
|
data/lib/searchlink/string.rb
CHANGED
@@ -1,487 +1,489 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
# @see #scrub
|
9
|
-
def scrub!
|
10
|
-
replace scrub
|
11
|
-
end
|
1
|
+
module SL
|
2
|
+
# String helpers
|
3
|
+
class ::String
|
4
|
+
# Scrub invalid characters from string
|
5
|
+
def scrubup
|
6
|
+
encode('utf-16', invalid: :replace).encode('utf-8').gsub(/\u00A0/, ' ')
|
7
|
+
end
|
12
8
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
def url_encode
|
18
|
-
ERB::Util.url_encode(gsub(/%22/, '"'))
|
19
|
-
end
|
9
|
+
# @see #scrub
|
10
|
+
def scrubup!
|
11
|
+
replace scrub
|
12
|
+
end
|
20
13
|
|
21
|
-
|
22
|
-
|
23
|
-
|
14
|
+
# URL Encode string
|
15
|
+
#
|
16
|
+
# @return [String] url encoded string
|
17
|
+
#
|
18
|
+
def url_encode
|
19
|
+
ERB::Util.url_encode(gsub(/%22/, '"'))
|
20
|
+
end
|
24
21
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
##
|
29
|
-
## @return [String] modified regular expression
|
30
|
-
##
|
31
|
-
def normalize_trigger
|
32
|
-
gsub(/\((?!\?:)/, '(?:').gsub(/(^(\^|\\A)|(\$|\\Z)$)/, '').downcase
|
33
|
-
end
|
22
|
+
def url_decode
|
23
|
+
CGI.unescape(self)
|
24
|
+
end
|
34
25
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
scan(/t/).each { len -= 1 }
|
44
|
-
case len
|
45
|
-
when 0..3
|
46
|
-
"\t\t"
|
47
|
-
when 4..12
|
48
|
-
" \t"
|
26
|
+
##
|
27
|
+
## Adds ?: to any parentheticals in a regular expression
|
28
|
+
## to avoid match groups
|
29
|
+
##
|
30
|
+
## @return [String] modified regular expression
|
31
|
+
##
|
32
|
+
def normalize_trigger
|
33
|
+
gsub(/\((?!\?:)/, '(?:').gsub(/(^(\^|\\A)|(\$|\\Z)$)/, '').downcase
|
49
34
|
end
|
50
|
-
end
|
51
35
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
when 't'
|
67
|
-
"--#{bool}include_titles "
|
68
|
-
when 'v'
|
69
|
-
"--#{bool}validate_links "
|
70
|
-
when 's'
|
71
|
-
"--#{bool}remove_seo "
|
72
|
-
else
|
73
|
-
''
|
74
|
-
end
|
36
|
+
##
|
37
|
+
## Generate a spacer based on character widths for help dialog display
|
38
|
+
##
|
39
|
+
## @return [String] string containing tabs
|
40
|
+
##
|
41
|
+
def spacer
|
42
|
+
len = length
|
43
|
+
scan(/[mwv]/).each { len += 1 }
|
44
|
+
scan(/t/).each { len -= 1 }
|
45
|
+
case len
|
46
|
+
when 0..3
|
47
|
+
"\t\t"
|
48
|
+
when 4..12
|
49
|
+
" \t"
|
75
50
|
end
|
51
|
+
end
|
76
52
|
|
77
|
-
|
78
|
-
|
79
|
-
|
53
|
+
# parse command line flags into long options
|
54
|
+
def parse_flags
|
55
|
+
gsub(/(\+\+|--)([dirtvs]+)\b/) do
|
56
|
+
m = Regexp.last_match
|
57
|
+
bool = m[1] == '++' ? '' : 'no-'
|
58
|
+
output = ' '
|
59
|
+
m[2].split('').each do |arg|
|
60
|
+
output += case arg
|
61
|
+
when 'd'
|
62
|
+
"--#{bool}debug "
|
63
|
+
when 'i'
|
64
|
+
"--#{bool}inline "
|
65
|
+
when 'r'
|
66
|
+
"--#{bool}prefix_random "
|
67
|
+
when 't'
|
68
|
+
"--#{bool}include_titles "
|
69
|
+
when 'v'
|
70
|
+
"--#{bool}validate_links "
|
71
|
+
when 's'
|
72
|
+
"--#{bool}remove_seo "
|
73
|
+
else
|
74
|
+
''
|
75
|
+
end
|
76
|
+
end
|
80
77
|
|
81
|
-
|
82
|
-
|
83
|
-
|
78
|
+
output
|
79
|
+
end.gsub(/ +/, ' ')
|
80
|
+
end
|
84
81
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
## @return { description_of_the_return_value }
|
89
|
-
##
|
90
|
-
def fix_gist_file
|
91
|
-
sub(/^file-/, '').sub(/-([^\-]+)$/, '.\1')
|
92
|
-
end
|
82
|
+
def parse_flags!
|
83
|
+
replace parse_flags
|
84
|
+
end
|
93
85
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
86
|
+
##
|
87
|
+
## Convert file-myfile-rb to myfile.rb
|
88
|
+
##
|
89
|
+
## @return { description_of_the_return_value }
|
90
|
+
##
|
91
|
+
def fix_gist_file
|
92
|
+
sub(/^file-/, '').sub(/-([^\-]+)$/, '.\1')
|
93
|
+
end
|
102
94
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
95
|
+
# Turn a string into a slug, removing spaces and
|
96
|
+
# non-alphanumeric characters
|
97
|
+
#
|
98
|
+
# @return [String] slugified string
|
99
|
+
#
|
100
|
+
def slugify
|
101
|
+
downcase.gsub(/[^a-z0-9_]/i, '-').gsub(/-+/, '-').sub(/-?$/, '')
|
102
|
+
end
|
108
103
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
##
|
115
|
-
def clean
|
116
|
-
gsub(/\n+/, ' ')
|
117
|
-
.gsub(/"/, '"')
|
118
|
-
.gsub(/\|/, '-')
|
119
|
-
.gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
|
120
|
-
.sub(/\?&/, '').strip
|
121
|
-
end
|
104
|
+
# Destructive slugify
|
105
|
+
# @see #slugify
|
106
|
+
def slugify!
|
107
|
+
replace slugify
|
108
|
+
end
|
122
109
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
110
|
+
##
|
111
|
+
## Remove newlines, escape quotes, and remove Google
|
112
|
+
## Analytics strings
|
113
|
+
##
|
114
|
+
## @return [String] cleaned URL/String
|
115
|
+
##
|
116
|
+
def clean
|
117
|
+
gsub(/\n+/, ' ')
|
118
|
+
.gsub(/"/, '"')
|
119
|
+
.gsub(/\|/, '-')
|
120
|
+
.gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
|
121
|
+
.sub(/\?&/, '').strip
|
122
|
+
end
|
132
123
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
124
|
+
# convert itunes to apple music link
|
125
|
+
#
|
126
|
+
# @return [String] apple music link
|
127
|
+
def to_am
|
128
|
+
input = dup
|
129
|
+
input.sub!(%r{/itunes\.apple\.com}, 'geo.itunes.apple.com')
|
130
|
+
append = input =~ %r{\?[^/]+=} ? '&app=music' : '?app=music'
|
131
|
+
input + append
|
132
|
+
end
|
141
133
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
134
|
+
##
|
135
|
+
## Remove the protocol from a URL
|
136
|
+
##
|
137
|
+
## @return [String] just hostname and path of URL
|
138
|
+
##
|
139
|
+
def remove_protocol
|
140
|
+
sub(%r{^(https?|s?ftp|file)://}, '')
|
141
|
+
end
|
150
142
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
# remove last path element
|
160
|
-
path.sub!(%r{/[^/]+[.\-][^/]+/$}, '')
|
161
|
-
# remove starting/ending slashes
|
162
|
-
path.gsub!(%r{(^/|/$)}, '')
|
163
|
-
# split at slashes, delete sections that are shorter
|
164
|
-
# than 5 characters or only consist of numbers
|
165
|
-
path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
|
166
|
-
end
|
143
|
+
##
|
144
|
+
## Return just the path of a URL
|
145
|
+
##
|
146
|
+
## @return [String] The path.
|
147
|
+
##
|
148
|
+
def url_path
|
149
|
+
URI.parse(self).path
|
150
|
+
end
|
167
151
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
152
|
+
# Extract the most relevant portions from a URL path
|
153
|
+
#
|
154
|
+
# @return [Array] array of relevant path elements
|
155
|
+
#
|
156
|
+
def path_elements
|
157
|
+
path = url_path
|
158
|
+
# force trailing slash
|
159
|
+
path.sub!(%r{/?$}, '/')
|
160
|
+
# remove last path element
|
161
|
+
path.sub!(%r{/[^/]+[.\-][^/]+/$}, '')
|
162
|
+
# remove starting/ending slashes
|
163
|
+
path.gsub!(%r{(^/|/$)}, '')
|
164
|
+
# split at slashes, delete sections that are shorter
|
165
|
+
# than 5 characters or only consist of numbers
|
166
|
+
path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
|
167
|
+
end
|
176
168
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
return self unless self =~ /[“‘\[(<]/
|
185
|
-
|
186
|
-
words = split(/\s+/)
|
187
|
-
|
188
|
-
punct_chars = {
|
189
|
-
'“' => '”',
|
190
|
-
'‘' => '’',
|
191
|
-
'[' => ']',
|
192
|
-
'(' => ')',
|
193
|
-
'<' => '>'
|
194
|
-
}
|
195
|
-
|
196
|
-
left_punct = []
|
197
|
-
|
198
|
-
words.each do |w|
|
199
|
-
punct_chars.each do |k, v|
|
200
|
-
left_punct.push(k) if w =~ /#{Regexp.escape(k)}/
|
201
|
-
left_punct.delete_at(left_punct.rindex(k)) if w =~ /#{Regexp.escape(v)}/
|
202
|
-
end
|
169
|
+
##
|
170
|
+
## Destructive punctuation close
|
171
|
+
##
|
172
|
+
## @see #close_punctuation
|
173
|
+
##
|
174
|
+
def close_punctuation!
|
175
|
+
replace close_punctuation
|
203
176
|
end
|
204
177
|
|
205
|
-
|
206
|
-
|
178
|
+
##
|
179
|
+
## Complete incomplete punctuation pairs
|
180
|
+
##
|
181
|
+
## @return [String] string with all punctuation
|
182
|
+
## properly paired
|
183
|
+
##
|
184
|
+
def close_punctuation
|
185
|
+
return self unless self =~ /[“‘\[(<]/
|
186
|
+
|
187
|
+
words = split(/\s+/)
|
188
|
+
|
189
|
+
punct_chars = {
|
190
|
+
'“' => '”',
|
191
|
+
'‘' => '’',
|
192
|
+
'[' => ']',
|
193
|
+
'(' => ')',
|
194
|
+
'<' => '>'
|
195
|
+
}
|
196
|
+
|
197
|
+
left_punct = []
|
198
|
+
|
199
|
+
words.each do |w|
|
200
|
+
punct_chars.each do |k, v|
|
201
|
+
left_punct.push(k) if w =~ /#{Regexp.escape(k)}/
|
202
|
+
left_punct.delete_at(left_punct.rindex(k)) if w =~ /#{Regexp.escape(v)}/
|
203
|
+
end
|
204
|
+
end
|
207
205
|
|
208
|
-
|
209
|
-
|
206
|
+
tail = ''
|
207
|
+
left_punct.reverse.each { |c| tail += punct_chars[c] }
|
210
208
|
|
211
|
-
|
212
|
-
|
213
|
-
##
|
214
|
-
## @param url The url of the page from which the
|
215
|
-
## title came
|
216
|
-
##
|
217
|
-
## @see #remove_seo
|
218
|
-
##
|
219
|
-
def remove_seo!(url)
|
220
|
-
replace remove_seo(url)
|
221
|
-
end
|
209
|
+
gsub(/[^a-z)\]’”.…]+$/i, '...').strip + tail
|
210
|
+
end
|
222
211
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
unless host
|
235
|
-
return self unless SL.config['debug']
|
212
|
+
##
|
213
|
+
## Destructively remove SEO elements from a title
|
214
|
+
##
|
215
|
+
## @param url The url of the page from which the
|
216
|
+
## title came
|
217
|
+
##
|
218
|
+
## @see #remove_seo
|
219
|
+
##
|
220
|
+
def remove_seo!(url)
|
221
|
+
replace remove_seo(url)
|
222
|
+
end
|
236
223
|
|
237
|
-
|
238
|
-
|
224
|
+
##
|
225
|
+
## Remove SEO elements from a title
|
226
|
+
##
|
227
|
+
## @param url The url of the page from which the title came
|
228
|
+
##
|
229
|
+
## @return [String] cleaned title
|
230
|
+
##
|
231
|
+
def remove_seo(url)
|
232
|
+
title = dup
|
233
|
+
url = URI.parse(url)
|
234
|
+
host = url.hostname
|
235
|
+
unless host
|
236
|
+
return self unless SL.config['debug']
|
237
|
+
|
238
|
+
SL.add_error('Invalid URL', "Could not remove SEO for #{url}")
|
239
|
+
return self
|
239
240
|
|
240
|
-
|
241
|
+
end
|
241
242
|
|
242
|
-
|
243
|
-
|
243
|
+
path = url.path
|
244
|
+
root_page = path =~ %r{^/?$} ? true : false
|
244
245
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
246
|
+
title.gsub!(/\s*(–|—)\s*/, ' - ')
|
247
|
+
title.gsub!(/&[lr]dquo;/, '"')
|
248
|
+
title.gsub!(/&[lr]dquo;/, "'")
|
249
|
+
title.gsub!(/–/, ' — ')
|
250
|
+
title = CGI.unescapeHTML(title)
|
251
|
+
title.gsub!(/ +/, ' ')
|
251
252
|
|
252
|
-
|
253
|
+
seo_title_separators = %w[| » « — – - · :]
|
253
254
|
|
254
|
-
|
255
|
-
|
255
|
+
begin
|
256
|
+
re_parts = []
|
256
257
|
|
257
|
-
|
258
|
-
|
259
|
-
|
258
|
+
host_parts = host.sub(/(?:www\.)?(.*?)\.[^.]+$/, '\1').split(/\./).delete_if { |p| p.length < 3 }
|
259
|
+
h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join('.?') }.join('|') : ''
|
260
|
+
re_parts.push(h_re) unless h_re.empty?
|
260
261
|
|
261
|
-
|
262
|
-
|
262
|
+
# p_re = path.path_elements.map{|seg| seg.downcase.split(//).join('.?') }.join('|')
|
263
|
+
# re_parts.push(p_re) if p_re.length > 0
|
263
264
|
|
264
|
-
|
265
|
+
site_re = "(#{re_parts.join('|')})"
|
265
266
|
|
266
|
-
|
267
|
+
dead_switch = 0
|
267
268
|
|
268
|
-
|
269
|
+
while title.downcase.gsub(/[^a-z]/i, '') =~ /#{site_re}/i
|
269
270
|
|
270
|
-
|
271
|
+
break if dead_switch > 5
|
271
272
|
|
272
|
-
|
273
|
-
|
273
|
+
seo_title_separators.each_with_index do |sep, i|
|
274
|
+
parts = title.split(/ *#{Regexp.escape(sep)} +/)
|
274
275
|
|
275
|
-
|
276
|
+
next if parts.length == 1
|
276
277
|
|
277
|
-
|
278
|
-
|
278
|
+
remaining_separators = seo_title_separators[i..].map { |s| Regexp.escape(s) }.join('')
|
279
|
+
seps = Regexp.new("^[^#{remaining_separators}]+$")
|
279
280
|
|
280
|
-
|
281
|
+
longest = parts.longest_element.strip
|
281
282
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
283
|
+
unless parts.empty?
|
284
|
+
parts.delete_if do |pt|
|
285
|
+
compressed = pt.strip.downcase.gsub(/[^a-z]/i, '')
|
286
|
+
compressed =~ /#{site_re}/ && pt =~ seps ? !root_page : false
|
287
|
+
end
|
286
288
|
end
|
287
|
-
end
|
288
289
|
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
290
|
+
title = if parts.empty?
|
291
|
+
longest
|
292
|
+
elsif parts.length < 2
|
293
|
+
parts.join(sep)
|
294
|
+
elsif parts.length > 2
|
295
|
+
parts.longest_element.strip
|
296
|
+
else
|
297
|
+
parts.join(sep)
|
298
|
+
end
|
299
|
+
end
|
300
|
+
dead_switch += 1
|
298
301
|
end
|
299
|
-
|
302
|
+
rescue StandardError => e
|
303
|
+
return self unless SL.config['debug']
|
304
|
+
|
305
|
+
SL.add_error("Error SEO processing title for #{url}", e)
|
306
|
+
return self
|
300
307
|
end
|
301
|
-
rescue StandardError => e
|
302
|
-
return self unless SL.config['debug']
|
303
308
|
|
304
|
-
|
305
|
-
|
306
|
-
|
309
|
+
seps = Regexp.new(" *[#{seo_title_separators.map { |s| Regexp.escape(s) }.join('')}] +")
|
310
|
+
if title =~ seps
|
311
|
+
seo_parts = title.split(seps)
|
312
|
+
title = seo_parts.longest_element.strip if seo_parts.length.positive?
|
313
|
+
end
|
307
314
|
|
308
|
-
|
309
|
-
if title =~ seps
|
310
|
-
seo_parts = title.split(seps)
|
311
|
-
title = seo_parts.longest_element.strip if seo_parts.length.positive?
|
315
|
+
title && title.length > 5 ? title.gsub(/\s+/, ' ') : CGI.unescapeHTML(self)
|
312
316
|
end
|
313
317
|
|
314
|
-
|
315
|
-
|
318
|
+
##
|
319
|
+
## Truncate in place
|
320
|
+
##
|
321
|
+
## @see #truncate
|
322
|
+
##
|
323
|
+
## @param max [Number] The maximum length
|
324
|
+
##
|
325
|
+
def truncate!(max)
|
326
|
+
replace truncate(max)
|
327
|
+
end
|
316
328
|
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
def truncate!(max)
|
325
|
-
replace truncate(max)
|
326
|
-
end
|
329
|
+
##
|
330
|
+
## Truncate string to given length, preserving words
|
331
|
+
##
|
332
|
+
## @param max [Number] The maximum length
|
333
|
+
##
|
334
|
+
def truncate(max)
|
335
|
+
return self if length < max
|
327
336
|
|
328
|
-
|
329
|
-
## Truncate string to given length, preserving words
|
330
|
-
##
|
331
|
-
## @param max [Number] The maximum length
|
332
|
-
##
|
333
|
-
def truncate(max)
|
334
|
-
return self if length < max
|
337
|
+
trunc_title = []
|
335
338
|
|
336
|
-
|
339
|
+
words = split(/\s+/)
|
340
|
+
words.each do |word|
|
341
|
+
break unless trunc_title.join(' ').length.close_punctuation + word.length <= max
|
337
342
|
|
338
|
-
|
339
|
-
|
340
|
-
break unless trunc_title.join(' ').length.close_punctuation + word.length <= max
|
343
|
+
trunc_title << word
|
344
|
+
end
|
341
345
|
|
342
|
-
trunc_title
|
346
|
+
trunc_title.empty? ? words[0] : trunc_title.join(' ')
|
343
347
|
end
|
344
348
|
|
345
|
-
|
346
|
-
|
349
|
+
##
|
350
|
+
## Test an AppleScript response, substituting nil for
|
351
|
+
## 'Missing Value'
|
352
|
+
##
|
353
|
+
## @return [Nil, String] nil if string is
|
354
|
+
## "missing value"
|
355
|
+
##
|
356
|
+
def nil_if_missing
|
357
|
+
return nil if self =~ /missing value/
|
358
|
+
|
359
|
+
self
|
360
|
+
end
|
347
361
|
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
362
|
+
##
|
363
|
+
## Score string based on number of matches, 0 - 10
|
364
|
+
##
|
365
|
+
## @param terms [String] The terms to
|
366
|
+
## match
|
367
|
+
## @param separator [String] The word separator
|
368
|
+
## @param start_word [Boolean] Require match to be
|
369
|
+
## at beginning of word
|
370
|
+
##
|
371
|
+
def matches_score(terms, separator: ' ', start_word: true)
|
372
|
+
matched = 0
|
373
|
+
regexes = terms.to_rx_array(separator: separator, start_word: start_word)
|
374
|
+
|
375
|
+
regexes.each do |rx|
|
376
|
+
matched += 1 if self =~ rx
|
377
|
+
end
|
360
378
|
|
361
|
-
|
362
|
-
## Score string based on number of matches, 0 - 10
|
363
|
-
##
|
364
|
-
## @param terms [String] The terms to
|
365
|
-
## match
|
366
|
-
## @param separator [String] The word separator
|
367
|
-
## @param start_word [Boolean] Require match to be
|
368
|
-
## at beginning of word
|
369
|
-
##
|
370
|
-
def matches_score(terms, separator: ' ', start_word: true)
|
371
|
-
matched = 0
|
372
|
-
regexes = terms.to_rx_array(separator: separator, start_word: start_word)
|
379
|
+
return 0 if matched.zero?
|
373
380
|
|
374
|
-
|
375
|
-
matched += 1 if self =~ rx
|
381
|
+
((matched / regexes.count.to_f) * 10).round(3)
|
376
382
|
end
|
377
383
|
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
sources.each do |src|
|
388
|
-
words.each do |term|
|
389
|
-
d = src.distance(term)
|
390
|
-
matches += 1 if d <= threshhold
|
384
|
+
def matches_fuzzy(terms, separator: ' ', start_word: true, threshhold: 5)
|
385
|
+
sources = split(/(#{separator})+/)
|
386
|
+
words = terms.split(/(#{separator})+/)
|
387
|
+
matches = 0
|
388
|
+
sources.each do |src|
|
389
|
+
words.each do |term|
|
390
|
+
d = src.distance(term)
|
391
|
+
matches += 1 if d <= threshhold
|
392
|
+
end
|
391
393
|
end
|
392
|
-
end
|
393
394
|
|
394
|
-
|
395
|
-
|
395
|
+
((matches / words.count.to_f) * 10).round(3)
|
396
|
+
end
|
396
397
|
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
398
|
+
def distance(t)
|
399
|
+
s = self.dup
|
400
|
+
m = s.length
|
401
|
+
n = t.length
|
402
|
+
return m if n == 0
|
403
|
+
return n if m == 0
|
404
|
+
d = Array.new(m+1) {Array.new(n+1)}
|
405
|
+
|
406
|
+
(0..m).each {|i| d[i][0] = i}
|
407
|
+
(0..n).each {|j| d[0][j] = j}
|
408
|
+
(1..n).each do |j|
|
409
|
+
(1..m).each do |i|
|
410
|
+
d[i][j] = if s[i-1] == t[j-1] # adjust index into string
|
411
|
+
d[i-1][j-1] # no operation required
|
412
|
+
else
|
413
|
+
[ d[i-1][j]+1, # deletion
|
414
|
+
d[i][j-1]+1, # insertion
|
415
|
+
d[i-1][j-1]+1, # substitution
|
416
|
+
].min
|
417
|
+
end
|
418
|
+
end
|
417
419
|
end
|
420
|
+
d[m][n]
|
418
421
|
end
|
419
|
-
d[m][n]
|
420
|
-
end
|
421
422
|
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
423
|
+
##
|
424
|
+
## Test if self contains exactl match for string (case insensitive)
|
425
|
+
##
|
426
|
+
## @param string [String] The string to match
|
427
|
+
##
|
428
|
+
def matches_exact(string)
|
429
|
+
comp = gsub(/[^a-z0-9 ]/i, '')
|
430
|
+
comp =~ /\b#{string.gsub(/[^a-z0-9 ]/i, '').split(/ +/).map { |s| Regexp.escape(s) }.join(' +')}/i
|
431
|
+
end
|
431
432
|
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
433
|
+
##
|
434
|
+
## Test that self does not contain any of terms
|
435
|
+
##
|
436
|
+
## @param terms [String] The terms to test
|
437
|
+
##
|
438
|
+
def matches_none(terms)
|
439
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
440
|
+
rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
441
|
+
true
|
442
|
+
end
|
442
443
|
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
444
|
+
##
|
445
|
+
## Test if self contains any of terms
|
446
|
+
##
|
447
|
+
## @param terms [String] The terms to test
|
448
|
+
##
|
449
|
+
def matches_any(terms)
|
450
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
451
|
+
rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
452
|
+
false
|
453
|
+
end
|
453
454
|
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
455
|
+
##
|
456
|
+
## Test that self matches every word in terms
|
457
|
+
##
|
458
|
+
## @param terms [String] The terms to test
|
459
|
+
##
|
460
|
+
def matches_all(terms)
|
461
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
462
|
+
rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
463
|
+
true
|
464
|
+
end
|
464
465
|
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
466
|
+
##
|
467
|
+
## Break a string into an array of Regexps
|
468
|
+
##
|
469
|
+
## @param separator [String] The word separator
|
470
|
+
## @param start_word [Boolean] Require matches at
|
471
|
+
## start of word
|
472
|
+
##
|
473
|
+
## @return [Array] array of regular expressions
|
474
|
+
##
|
475
|
+
def to_rx_array(separator: ' ', start_word: true)
|
476
|
+
bound = start_word ? '\b' : ''
|
477
|
+
str = gsub(/(#{separator})+/, separator)
|
478
|
+
str.split(/#{separator}/).map { |arg| /#{bound}#{arg.gsub(/[^a-z0-9]/i, '.?')}/i }
|
479
|
+
end
|
479
480
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
481
|
+
##
|
482
|
+
## Indent each line of string with 4 spaces
|
483
|
+
##
|
484
|
+
## @return [String] indented string
|
485
|
+
def code_indent
|
486
|
+
split(/\n/).map { |l| " #{l}" }.join("\n")
|
487
|
+
end
|
486
488
|
end
|
487
489
|
end
|
data/lib/searchlink/version.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
module SL
|
2
|
-
VERSION = '2.3.
|
2
|
+
VERSION = '2.3.65'
|
3
3
|
end
|
4
4
|
|
5
5
|
module SL
|
6
6
|
class << self
|
7
7
|
def version_check
|
8
|
-
cachefile = File.expand_path('~/.
|
8
|
+
cachefile = File.expand_path('~/.config/searchlink/cache/update.txt')
|
9
|
+
|
10
|
+
FileUtils.mkdir_p(File.dirname(cachefile)) unless File.directory?(File.dirname(cachefile))
|
11
|
+
|
9
12
|
if File.exist?(cachefile)
|
10
13
|
last_check, latest_tag = IO.read(cachefile).strip.split(/\|/)
|
11
14
|
last_time = Time.parse(last_check)
|
@@ -22,12 +25,14 @@ module SL
|
|
22
25
|
latest_tag ||= SL::VERSION
|
23
26
|
latest = SemVer.new(latest_tag)
|
24
27
|
current = SemVer.new(SL::VERSION)
|
25
|
-
|
26
|
-
File.open(cachefile, 'w') { |f| f.puts("#{last_time.strftime('%c')}|#{latest.to_s}") }
|
27
28
|
|
28
|
-
|
29
|
+
File.open(cachefile, 'w') { |f| f.puts("#{last_time.strftime('%c')}|#{latest}") }
|
30
|
+
|
31
|
+
if latest_tag && current.older_than(latest)
|
32
|
+
return "SearchLink v#{current}, #{latest} available. Run 'update' to download."
|
33
|
+
end
|
29
34
|
|
30
|
-
"SearchLink v#{current
|
35
|
+
"SearchLink v#{current}"
|
31
36
|
end
|
32
37
|
|
33
38
|
# Check for a newer version than local copy using GitHub release tag
|
@@ -63,6 +68,11 @@ module SL
|
|
63
68
|
end
|
64
69
|
|
65
70
|
def update_searchlink
|
71
|
+
if %x{uname}.strip !~ /Darwin/
|
72
|
+
add_output('Auto updating only available on macOS')
|
73
|
+
return
|
74
|
+
end
|
75
|
+
|
66
76
|
new_version = SL.new_version?
|
67
77
|
if new_version
|
68
78
|
folder = File.expand_path('~/Downloads')
|
data/lib/tokens.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: searchlink
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.65
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Terpstra
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|