searchlink 2.3.64 → 2.3.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/searchlink +2 -4
- data/lib/searchlink/searches/github.rb +17 -11
- data/lib/searchlink/string.rb +415 -413
- data/lib/searchlink/version.rb +9 -4
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bd9142e4c8363dca0dbbc82c7bb4be3112ce335f908641b4184dbde9e1525960
|
|
4
|
+
data.tar.gz: c8e7cf285e4de29c24a7fcb53014c2efa4da8a3c38a857f11747b97a8a653ba0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6d2336776d6526804d9df51e180a9996a89cc9786ef4c0cefa5fede4e3ba2c9a931c4240c2116809f6fd928dc8155c7c3b26b0141add9bf1ed39000bdf73ca2e
|
|
7
|
+
data.tar.gz: c7710d400e7346a79063604af305027a851f490ec055f7b762bc9d414db5f5ebf1d21504bff3acccb7e5832c42d5914ce844b324f483a7c0044ff72720b637dc
|
data/bin/searchlink
CHANGED
|
@@ -46,8 +46,7 @@ if !ARGV.empty?
|
|
|
46
46
|
|
|
47
47
|
files.each do |file|
|
|
48
48
|
if File.exist?(file) && `file -b "#{file}"|grep -c text`.to_i.positive?
|
|
49
|
-
input =
|
|
50
|
-
input.scrub!
|
|
49
|
+
input = IO.read(file).scrubup
|
|
51
50
|
|
|
52
51
|
backup_file = "#{file}.bak"
|
|
53
52
|
backup_file = "#{file}.bak 1" if File.exist?(backup_file)
|
|
@@ -72,8 +71,7 @@ if !ARGV.empty?
|
|
|
72
71
|
end
|
|
73
72
|
end
|
|
74
73
|
else
|
|
75
|
-
input =
|
|
76
|
-
input.scrub!
|
|
74
|
+
input = $stdin.read.scrubup
|
|
77
75
|
sl.parse(input)
|
|
78
76
|
output = SL.output&.join('')
|
|
79
77
|
|
|
@@ -24,6 +24,8 @@ module SL
|
|
|
24
24
|
url, title, link_text = github(search_terms, link_text)
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
return SL.ddg("site:github.com #{search_terms}", link_text) unless url
|
|
28
|
+
|
|
27
29
|
link_text = title if link_text == '' || link_text == search_terms
|
|
28
30
|
|
|
29
31
|
[url, title, link_text]
|
|
@@ -32,9 +34,9 @@ module SL
|
|
|
32
34
|
def github_search_curl(endpoint, query)
|
|
33
35
|
headers = {
|
|
34
36
|
'Accept' => 'application/vnd.github+json',
|
|
35
|
-
'X-GitHub-Api-Version' => '2022-11-28'
|
|
37
|
+
'X-GitHub-Api-Version' => '2022-11-28'
|
|
36
38
|
}
|
|
37
|
-
headers['Authorization'] = "Bearer #{Secrets::GH_AUTH_TOKEN}" if Secrets::GH_AUTH_TOKEN
|
|
39
|
+
headers['Authorization'] = "Bearer #{Secrets::GH_AUTH_TOKEN}" if defined? Secrets::GH_AUTH_TOKEN
|
|
38
40
|
|
|
39
41
|
url = "https://api.github.com/search/#{endpoint}?q=#{query.url_encode}&per_page=1&page=1&order=desc"
|
|
40
42
|
res = Curl::Json.new(url, headers: headers)
|
|
@@ -51,7 +53,7 @@ module SL
|
|
|
51
53
|
'Accept' => 'application/vnd.github+json',
|
|
52
54
|
'X-GitHub-Api-Version' => '2022-11-28'
|
|
53
55
|
}
|
|
54
|
-
headers['Authorization'] = "Bearer #{Secrets::GH_AUTH_TOKEN}" if Secrets::GH_AUTH_TOKEN
|
|
56
|
+
headers['Authorization'] = "Bearer #{Secrets::GH_AUTH_TOKEN}" if defined? Secrets::GH_AUTH_TOKEN
|
|
55
57
|
|
|
56
58
|
url = "https://api.github.com/users/#{user}/gists?per_page=100&page=#{page}"
|
|
57
59
|
|
|
@@ -113,14 +115,18 @@ module SL
|
|
|
113
115
|
end
|
|
114
116
|
|
|
115
117
|
def search_github(search_terms, link_text)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
118
|
+
replacements = [
|
|
119
|
+
[%r{(\S+)/(\S+)}, 'user:\1 \2'],
|
|
120
|
+
[/\bu\w*:(\w+)/, 'user:\1'],
|
|
121
|
+
[/\bl\w*:(\w+)/, 'language:\1'],
|
|
122
|
+
[/\bin?:r\w*/, 'in:readme'],
|
|
123
|
+
[/\bin?:t\w*/, 'in:topics'],
|
|
124
|
+
[/\bin?:d\w*/, 'in:description'],
|
|
125
|
+
[/\bin?:(t(itle)?|n(ame)?)/, 'in:name'],
|
|
126
|
+
[/\br:/, 'repo:']
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
replacements.each { |r| search_terms.gsub!(r[0], r[1]) }
|
|
124
130
|
|
|
125
131
|
search_terms += ' in:title' unless search_terms =~ /(in|user|repo):/
|
|
126
132
|
|
data/lib/searchlink/string.rb
CHANGED
|
@@ -1,487 +1,489 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# @see #scrub
|
|
9
|
-
def scrub!
|
|
10
|
-
replace scrub
|
|
11
|
-
end
|
|
1
|
+
module SL
|
|
2
|
+
# String helpers
|
|
3
|
+
class ::String
|
|
4
|
+
# Scrub invalid characters from string
|
|
5
|
+
def scrubup
|
|
6
|
+
encode('utf-16', invalid: :replace).encode('utf-8').gsub(/\u00A0/, ' ')
|
|
7
|
+
end
|
|
12
8
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def url_encode
|
|
18
|
-
ERB::Util.url_encode(gsub(/%22/, '"'))
|
|
19
|
-
end
|
|
9
|
+
# @see #scrub
|
|
10
|
+
def scrubup!
|
|
11
|
+
replace scrub
|
|
12
|
+
end
|
|
20
13
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
14
|
+
# URL Encode string
|
|
15
|
+
#
|
|
16
|
+
# @return [String] url encoded string
|
|
17
|
+
#
|
|
18
|
+
def url_encode
|
|
19
|
+
ERB::Util.url_encode(gsub(/%22/, '"'))
|
|
20
|
+
end
|
|
24
21
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
##
|
|
29
|
-
## @return [String] modified regular expression
|
|
30
|
-
##
|
|
31
|
-
def normalize_trigger
|
|
32
|
-
gsub(/\((?!\?:)/, '(?:').gsub(/(^(\^|\\A)|(\$|\\Z)$)/, '').downcase
|
|
33
|
-
end
|
|
22
|
+
def url_decode
|
|
23
|
+
CGI.unescape(self)
|
|
24
|
+
end
|
|
34
25
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
scan(/t/).each { len -= 1 }
|
|
44
|
-
case len
|
|
45
|
-
when 0..3
|
|
46
|
-
"\t\t"
|
|
47
|
-
when 4..12
|
|
48
|
-
" \t"
|
|
26
|
+
##
|
|
27
|
+
## Adds ?: to any parentheticals in a regular expression
|
|
28
|
+
## to avoid match groups
|
|
29
|
+
##
|
|
30
|
+
## @return [String] modified regular expression
|
|
31
|
+
##
|
|
32
|
+
def normalize_trigger
|
|
33
|
+
gsub(/\((?!\?:)/, '(?:').gsub(/(^(\^|\\A)|(\$|\\Z)$)/, '').downcase
|
|
49
34
|
end
|
|
50
|
-
end
|
|
51
35
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
when 't'
|
|
67
|
-
"--#{bool}include_titles "
|
|
68
|
-
when 'v'
|
|
69
|
-
"--#{bool}validate_links "
|
|
70
|
-
when 's'
|
|
71
|
-
"--#{bool}remove_seo "
|
|
72
|
-
else
|
|
73
|
-
''
|
|
74
|
-
end
|
|
36
|
+
##
|
|
37
|
+
## Generate a spacer based on character widths for help dialog display
|
|
38
|
+
##
|
|
39
|
+
## @return [String] string containing tabs
|
|
40
|
+
##
|
|
41
|
+
def spacer
|
|
42
|
+
len = length
|
|
43
|
+
scan(/[mwv]/).each { len += 1 }
|
|
44
|
+
scan(/t/).each { len -= 1 }
|
|
45
|
+
case len
|
|
46
|
+
when 0..3
|
|
47
|
+
"\t\t"
|
|
48
|
+
when 4..12
|
|
49
|
+
" \t"
|
|
75
50
|
end
|
|
51
|
+
end
|
|
76
52
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
53
|
+
# parse command line flags into long options
|
|
54
|
+
def parse_flags
|
|
55
|
+
gsub(/(\+\+|--)([dirtvs]+)\b/) do
|
|
56
|
+
m = Regexp.last_match
|
|
57
|
+
bool = m[1] == '++' ? '' : 'no-'
|
|
58
|
+
output = ' '
|
|
59
|
+
m[2].split('').each do |arg|
|
|
60
|
+
output += case arg
|
|
61
|
+
when 'd'
|
|
62
|
+
"--#{bool}debug "
|
|
63
|
+
when 'i'
|
|
64
|
+
"--#{bool}inline "
|
|
65
|
+
when 'r'
|
|
66
|
+
"--#{bool}prefix_random "
|
|
67
|
+
when 't'
|
|
68
|
+
"--#{bool}include_titles "
|
|
69
|
+
when 'v'
|
|
70
|
+
"--#{bool}validate_links "
|
|
71
|
+
when 's'
|
|
72
|
+
"--#{bool}remove_seo "
|
|
73
|
+
else
|
|
74
|
+
''
|
|
75
|
+
end
|
|
76
|
+
end
|
|
80
77
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
output
|
|
79
|
+
end.gsub(/ +/, ' ')
|
|
80
|
+
end
|
|
84
81
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
## @return { description_of_the_return_value }
|
|
89
|
-
##
|
|
90
|
-
def fix_gist_file
|
|
91
|
-
sub(/^file-/, '').sub(/-([^\-]+)$/, '.\1')
|
|
92
|
-
end
|
|
82
|
+
def parse_flags!
|
|
83
|
+
replace parse_flags
|
|
84
|
+
end
|
|
93
85
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
86
|
+
##
|
|
87
|
+
## Convert file-myfile-rb to myfile.rb
|
|
88
|
+
##
|
|
89
|
+
## @return { description_of_the_return_value }
|
|
90
|
+
##
|
|
91
|
+
def fix_gist_file
|
|
92
|
+
sub(/^file-/, '').sub(/-([^\-]+)$/, '.\1')
|
|
93
|
+
end
|
|
102
94
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
95
|
+
# Turn a string into a slug, removing spaces and
|
|
96
|
+
# non-alphanumeric characters
|
|
97
|
+
#
|
|
98
|
+
# @return [String] slugified string
|
|
99
|
+
#
|
|
100
|
+
def slugify
|
|
101
|
+
downcase.gsub(/[^a-z0-9_]/i, '-').gsub(/-+/, '-').sub(/-?$/, '')
|
|
102
|
+
end
|
|
108
103
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
##
|
|
115
|
-
def clean
|
|
116
|
-
gsub(/\n+/, ' ')
|
|
117
|
-
.gsub(/"/, '"')
|
|
118
|
-
.gsub(/\|/, '-')
|
|
119
|
-
.gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
|
|
120
|
-
.sub(/\?&/, '').strip
|
|
121
|
-
end
|
|
104
|
+
# Destructive slugify
|
|
105
|
+
# @see #slugify
|
|
106
|
+
def slugify!
|
|
107
|
+
replace slugify
|
|
108
|
+
end
|
|
122
109
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
110
|
+
##
|
|
111
|
+
## Remove newlines, escape quotes, and remove Google
|
|
112
|
+
## Analytics strings
|
|
113
|
+
##
|
|
114
|
+
## @return [String] cleaned URL/String
|
|
115
|
+
##
|
|
116
|
+
def clean
|
|
117
|
+
gsub(/\n+/, ' ')
|
|
118
|
+
.gsub(/"/, '"')
|
|
119
|
+
.gsub(/\|/, '-')
|
|
120
|
+
.gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
|
|
121
|
+
.sub(/\?&/, '').strip
|
|
122
|
+
end
|
|
132
123
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
124
|
+
# convert itunes to apple music link
|
|
125
|
+
#
|
|
126
|
+
# @return [String] apple music link
|
|
127
|
+
def to_am
|
|
128
|
+
input = dup
|
|
129
|
+
input.sub!(%r{/itunes\.apple\.com}, 'geo.itunes.apple.com')
|
|
130
|
+
append = input =~ %r{\?[^/]+=} ? '&app=music' : '?app=music'
|
|
131
|
+
input + append
|
|
132
|
+
end
|
|
141
133
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
134
|
+
##
|
|
135
|
+
## Remove the protocol from a URL
|
|
136
|
+
##
|
|
137
|
+
## @return [String] just hostname and path of URL
|
|
138
|
+
##
|
|
139
|
+
def remove_protocol
|
|
140
|
+
sub(%r{^(https?|s?ftp|file)://}, '')
|
|
141
|
+
end
|
|
150
142
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
# remove last path element
|
|
160
|
-
path.sub!(%r{/[^/]+[.\-][^/]+/$}, '')
|
|
161
|
-
# remove starting/ending slashes
|
|
162
|
-
path.gsub!(%r{(^/|/$)}, '')
|
|
163
|
-
# split at slashes, delete sections that are shorter
|
|
164
|
-
# than 5 characters or only consist of numbers
|
|
165
|
-
path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
|
|
166
|
-
end
|
|
143
|
+
##
|
|
144
|
+
## Return just the path of a URL
|
|
145
|
+
##
|
|
146
|
+
## @return [String] The path.
|
|
147
|
+
##
|
|
148
|
+
def url_path
|
|
149
|
+
URI.parse(self).path
|
|
150
|
+
end
|
|
167
151
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
152
|
+
# Extract the most relevant portions from a URL path
|
|
153
|
+
#
|
|
154
|
+
# @return [Array] array of relevant path elements
|
|
155
|
+
#
|
|
156
|
+
def path_elements
|
|
157
|
+
path = url_path
|
|
158
|
+
# force trailing slash
|
|
159
|
+
path.sub!(%r{/?$}, '/')
|
|
160
|
+
# remove last path element
|
|
161
|
+
path.sub!(%r{/[^/]+[.\-][^/]+/$}, '')
|
|
162
|
+
# remove starting/ending slashes
|
|
163
|
+
path.gsub!(%r{(^/|/$)}, '')
|
|
164
|
+
# split at slashes, delete sections that are shorter
|
|
165
|
+
# than 5 characters or only consist of numbers
|
|
166
|
+
path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
|
|
167
|
+
end
|
|
176
168
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
return self unless self =~ /[“‘\[(<]/
|
|
185
|
-
|
|
186
|
-
words = split(/\s+/)
|
|
187
|
-
|
|
188
|
-
punct_chars = {
|
|
189
|
-
'“' => '”',
|
|
190
|
-
'‘' => '’',
|
|
191
|
-
'[' => ']',
|
|
192
|
-
'(' => ')',
|
|
193
|
-
'<' => '>'
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
left_punct = []
|
|
197
|
-
|
|
198
|
-
words.each do |w|
|
|
199
|
-
punct_chars.each do |k, v|
|
|
200
|
-
left_punct.push(k) if w =~ /#{Regexp.escape(k)}/
|
|
201
|
-
left_punct.delete_at(left_punct.rindex(k)) if w =~ /#{Regexp.escape(v)}/
|
|
202
|
-
end
|
|
169
|
+
##
|
|
170
|
+
## Destructive punctuation close
|
|
171
|
+
##
|
|
172
|
+
## @see #close_punctuation
|
|
173
|
+
##
|
|
174
|
+
def close_punctuation!
|
|
175
|
+
replace close_punctuation
|
|
203
176
|
end
|
|
204
177
|
|
|
205
|
-
|
|
206
|
-
|
|
178
|
+
##
|
|
179
|
+
## Complete incomplete punctuation pairs
|
|
180
|
+
##
|
|
181
|
+
## @return [String] string with all punctuation
|
|
182
|
+
## properly paired
|
|
183
|
+
##
|
|
184
|
+
def close_punctuation
|
|
185
|
+
return self unless self =~ /[“‘\[(<]/
|
|
186
|
+
|
|
187
|
+
words = split(/\s+/)
|
|
188
|
+
|
|
189
|
+
punct_chars = {
|
|
190
|
+
'“' => '”',
|
|
191
|
+
'‘' => '’',
|
|
192
|
+
'[' => ']',
|
|
193
|
+
'(' => ')',
|
|
194
|
+
'<' => '>'
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
left_punct = []
|
|
198
|
+
|
|
199
|
+
words.each do |w|
|
|
200
|
+
punct_chars.each do |k, v|
|
|
201
|
+
left_punct.push(k) if w =~ /#{Regexp.escape(k)}/
|
|
202
|
+
left_punct.delete_at(left_punct.rindex(k)) if w =~ /#{Regexp.escape(v)}/
|
|
203
|
+
end
|
|
204
|
+
end
|
|
207
205
|
|
|
208
|
-
|
|
209
|
-
|
|
206
|
+
tail = ''
|
|
207
|
+
left_punct.reverse.each { |c| tail += punct_chars[c] }
|
|
210
208
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
##
|
|
214
|
-
## @param url The url of the page from which the
|
|
215
|
-
## title came
|
|
216
|
-
##
|
|
217
|
-
## @see #remove_seo
|
|
218
|
-
##
|
|
219
|
-
def remove_seo!(url)
|
|
220
|
-
replace remove_seo(url)
|
|
221
|
-
end
|
|
209
|
+
gsub(/[^a-z)\]’”.…]+$/i, '...').strip + tail
|
|
210
|
+
end
|
|
222
211
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
unless host
|
|
235
|
-
return self unless SL.config['debug']
|
|
212
|
+
##
|
|
213
|
+
## Destructively remove SEO elements from a title
|
|
214
|
+
##
|
|
215
|
+
## @param url The url of the page from which the
|
|
216
|
+
## title came
|
|
217
|
+
##
|
|
218
|
+
## @see #remove_seo
|
|
219
|
+
##
|
|
220
|
+
def remove_seo!(url)
|
|
221
|
+
replace remove_seo(url)
|
|
222
|
+
end
|
|
236
223
|
|
|
237
|
-
|
|
238
|
-
|
|
224
|
+
##
|
|
225
|
+
## Remove SEO elements from a title
|
|
226
|
+
##
|
|
227
|
+
## @param url The url of the page from which the title came
|
|
228
|
+
##
|
|
229
|
+
## @return [String] cleaned title
|
|
230
|
+
##
|
|
231
|
+
def remove_seo(url)
|
|
232
|
+
title = dup
|
|
233
|
+
url = URI.parse(url)
|
|
234
|
+
host = url.hostname
|
|
235
|
+
unless host
|
|
236
|
+
return self unless SL.config['debug']
|
|
237
|
+
|
|
238
|
+
SL.add_error('Invalid URL', "Could not remove SEO for #{url}")
|
|
239
|
+
return self
|
|
239
240
|
|
|
240
|
-
|
|
241
|
+
end
|
|
241
242
|
|
|
242
|
-
|
|
243
|
-
|
|
243
|
+
path = url.path
|
|
244
|
+
root_page = path =~ %r{^/?$} ? true : false
|
|
244
245
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
246
|
+
title.gsub!(/\s*(–|—)\s*/, ' - ')
|
|
247
|
+
title.gsub!(/&[lr]dquo;/, '"')
|
|
248
|
+
title.gsub!(/&[lr]dquo;/, "'")
|
|
249
|
+
title.gsub!(/–/, ' — ')
|
|
250
|
+
title = CGI.unescapeHTML(title)
|
|
251
|
+
title.gsub!(/ +/, ' ')
|
|
251
252
|
|
|
252
|
-
|
|
253
|
+
seo_title_separators = %w[| » « — – - · :]
|
|
253
254
|
|
|
254
|
-
|
|
255
|
-
|
|
255
|
+
begin
|
|
256
|
+
re_parts = []
|
|
256
257
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
258
|
+
host_parts = host.sub(/(?:www\.)?(.*?)\.[^.]+$/, '\1').split(/\./).delete_if { |p| p.length < 3 }
|
|
259
|
+
h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join('.?') }.join('|') : ''
|
|
260
|
+
re_parts.push(h_re) unless h_re.empty?
|
|
260
261
|
|
|
261
|
-
|
|
262
|
-
|
|
262
|
+
# p_re = path.path_elements.map{|seg| seg.downcase.split(//).join('.?') }.join('|')
|
|
263
|
+
# re_parts.push(p_re) if p_re.length > 0
|
|
263
264
|
|
|
264
|
-
|
|
265
|
+
site_re = "(#{re_parts.join('|')})"
|
|
265
266
|
|
|
266
|
-
|
|
267
|
+
dead_switch = 0
|
|
267
268
|
|
|
268
|
-
|
|
269
|
+
while title.downcase.gsub(/[^a-z]/i, '') =~ /#{site_re}/i
|
|
269
270
|
|
|
270
|
-
|
|
271
|
+
break if dead_switch > 5
|
|
271
272
|
|
|
272
|
-
|
|
273
|
-
|
|
273
|
+
seo_title_separators.each_with_index do |sep, i|
|
|
274
|
+
parts = title.split(/ *#{Regexp.escape(sep)} +/)
|
|
274
275
|
|
|
275
|
-
|
|
276
|
+
next if parts.length == 1
|
|
276
277
|
|
|
277
|
-
|
|
278
|
-
|
|
278
|
+
remaining_separators = seo_title_separators[i..].map { |s| Regexp.escape(s) }.join('')
|
|
279
|
+
seps = Regexp.new("^[^#{remaining_separators}]+$")
|
|
279
280
|
|
|
280
|
-
|
|
281
|
+
longest = parts.longest_element.strip
|
|
281
282
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
283
|
+
unless parts.empty?
|
|
284
|
+
parts.delete_if do |pt|
|
|
285
|
+
compressed = pt.strip.downcase.gsub(/[^a-z]/i, '')
|
|
286
|
+
compressed =~ /#{site_re}/ && pt =~ seps ? !root_page : false
|
|
287
|
+
end
|
|
286
288
|
end
|
|
287
|
-
end
|
|
288
289
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
290
|
+
title = if parts.empty?
|
|
291
|
+
longest
|
|
292
|
+
elsif parts.length < 2
|
|
293
|
+
parts.join(sep)
|
|
294
|
+
elsif parts.length > 2
|
|
295
|
+
parts.longest_element.strip
|
|
296
|
+
else
|
|
297
|
+
parts.join(sep)
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
dead_switch += 1
|
|
298
301
|
end
|
|
299
|
-
|
|
302
|
+
rescue StandardError => e
|
|
303
|
+
return self unless SL.config['debug']
|
|
304
|
+
|
|
305
|
+
SL.add_error("Error SEO processing title for #{url}", e)
|
|
306
|
+
return self
|
|
300
307
|
end
|
|
301
|
-
rescue StandardError => e
|
|
302
|
-
return self unless SL.config['debug']
|
|
303
308
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
309
|
+
seps = Regexp.new(" *[#{seo_title_separators.map { |s| Regexp.escape(s) }.join('')}] +")
|
|
310
|
+
if title =~ seps
|
|
311
|
+
seo_parts = title.split(seps)
|
|
312
|
+
title = seo_parts.longest_element.strip if seo_parts.length.positive?
|
|
313
|
+
end
|
|
307
314
|
|
|
308
|
-
|
|
309
|
-
if title =~ seps
|
|
310
|
-
seo_parts = title.split(seps)
|
|
311
|
-
title = seo_parts.longest_element.strip if seo_parts.length.positive?
|
|
315
|
+
title && title.length > 5 ? title.gsub(/\s+/, ' ') : CGI.unescapeHTML(self)
|
|
312
316
|
end
|
|
313
317
|
|
|
314
|
-
|
|
315
|
-
|
|
318
|
+
##
|
|
319
|
+
## Truncate in place
|
|
320
|
+
##
|
|
321
|
+
## @see #truncate
|
|
322
|
+
##
|
|
323
|
+
## @param max [Number] The maximum length
|
|
324
|
+
##
|
|
325
|
+
def truncate!(max)
|
|
326
|
+
replace truncate(max)
|
|
327
|
+
end
|
|
316
328
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
def truncate!(max)
|
|
325
|
-
replace truncate(max)
|
|
326
|
-
end
|
|
329
|
+
##
|
|
330
|
+
## Truncate string to given length, preserving words
|
|
331
|
+
##
|
|
332
|
+
## @param max [Number] The maximum length
|
|
333
|
+
##
|
|
334
|
+
def truncate(max)
|
|
335
|
+
return self if length < max
|
|
327
336
|
|
|
328
|
-
|
|
329
|
-
## Truncate string to given length, preserving words
|
|
330
|
-
##
|
|
331
|
-
## @param max [Number] The maximum length
|
|
332
|
-
##
|
|
333
|
-
def truncate(max)
|
|
334
|
-
return self if length < max
|
|
337
|
+
trunc_title = []
|
|
335
338
|
|
|
336
|
-
|
|
339
|
+
words = split(/\s+/)
|
|
340
|
+
words.each do |word|
|
|
341
|
+
break unless trunc_title.join(' ').length.close_punctuation + word.length <= max
|
|
337
342
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
break unless trunc_title.join(' ').length.close_punctuation + word.length <= max
|
|
343
|
+
trunc_title << word
|
|
344
|
+
end
|
|
341
345
|
|
|
342
|
-
trunc_title
|
|
346
|
+
trunc_title.empty? ? words[0] : trunc_title.join(' ')
|
|
343
347
|
end
|
|
344
348
|
|
|
345
|
-
|
|
346
|
-
|
|
349
|
+
##
|
|
350
|
+
## Test an AppleScript response, substituting nil for
|
|
351
|
+
## 'Missing Value'
|
|
352
|
+
##
|
|
353
|
+
## @return [Nil, String] nil if string is
|
|
354
|
+
## "missing value"
|
|
355
|
+
##
|
|
356
|
+
def nil_if_missing
|
|
357
|
+
return nil if self =~ /missing value/
|
|
358
|
+
|
|
359
|
+
self
|
|
360
|
+
end
|
|
347
361
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
362
|
+
##
|
|
363
|
+
## Score string based on number of matches, 0 - 10
|
|
364
|
+
##
|
|
365
|
+
## @param terms [String] The terms to
|
|
366
|
+
## match
|
|
367
|
+
## @param separator [String] The word separator
|
|
368
|
+
## @param start_word [Boolean] Require match to be
|
|
369
|
+
## at beginning of word
|
|
370
|
+
##
|
|
371
|
+
def matches_score(terms, separator: ' ', start_word: true)
|
|
372
|
+
matched = 0
|
|
373
|
+
regexes = terms.to_rx_array(separator: separator, start_word: start_word)
|
|
374
|
+
|
|
375
|
+
regexes.each do |rx|
|
|
376
|
+
matched += 1 if self =~ rx
|
|
377
|
+
end
|
|
360
378
|
|
|
361
|
-
|
|
362
|
-
## Score string based on number of matches, 0 - 10
|
|
363
|
-
##
|
|
364
|
-
## @param terms [String] The terms to
|
|
365
|
-
## match
|
|
366
|
-
## @param separator [String] The word separator
|
|
367
|
-
## @param start_word [Boolean] Require match to be
|
|
368
|
-
## at beginning of word
|
|
369
|
-
##
|
|
370
|
-
def matches_score(terms, separator: ' ', start_word: true)
|
|
371
|
-
matched = 0
|
|
372
|
-
regexes = terms.to_rx_array(separator: separator, start_word: start_word)
|
|
379
|
+
return 0 if matched.zero?
|
|
373
380
|
|
|
374
|
-
|
|
375
|
-
matched += 1 if self =~ rx
|
|
381
|
+
((matched / regexes.count.to_f) * 10).round(3)
|
|
376
382
|
end
|
|
377
383
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
sources.each do |src|
|
|
388
|
-
words.each do |term|
|
|
389
|
-
d = src.distance(term)
|
|
390
|
-
matches += 1 if d <= threshhold
|
|
384
|
+
def matches_fuzzy(terms, separator: ' ', start_word: true, threshhold: 5)
|
|
385
|
+
sources = split(/(#{separator})+/)
|
|
386
|
+
words = terms.split(/(#{separator})+/)
|
|
387
|
+
matches = 0
|
|
388
|
+
sources.each do |src|
|
|
389
|
+
words.each do |term|
|
|
390
|
+
d = src.distance(term)
|
|
391
|
+
matches += 1 if d <= threshhold
|
|
392
|
+
end
|
|
391
393
|
end
|
|
392
|
-
end
|
|
393
394
|
|
|
394
|
-
|
|
395
|
-
|
|
395
|
+
((matches / words.count.to_f) * 10).round(3)
|
|
396
|
+
end
|
|
396
397
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
398
|
+
def distance(t)
|
|
399
|
+
s = self.dup
|
|
400
|
+
m = s.length
|
|
401
|
+
n = t.length
|
|
402
|
+
return m if n == 0
|
|
403
|
+
return n if m == 0
|
|
404
|
+
d = Array.new(m+1) {Array.new(n+1)}
|
|
405
|
+
|
|
406
|
+
(0..m).each {|i| d[i][0] = i}
|
|
407
|
+
(0..n).each {|j| d[0][j] = j}
|
|
408
|
+
(1..n).each do |j|
|
|
409
|
+
(1..m).each do |i|
|
|
410
|
+
d[i][j] = if s[i-1] == t[j-1] # adjust index into string
|
|
411
|
+
d[i-1][j-1] # no operation required
|
|
412
|
+
else
|
|
413
|
+
[ d[i-1][j]+1, # deletion
|
|
414
|
+
d[i][j-1]+1, # insertion
|
|
415
|
+
d[i-1][j-1]+1, # substitution
|
|
416
|
+
].min
|
|
417
|
+
end
|
|
418
|
+
end
|
|
417
419
|
end
|
|
420
|
+
d[m][n]
|
|
418
421
|
end
|
|
419
|
-
d[m][n]
|
|
420
|
-
end
|
|
421
422
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
423
|
+
##
|
|
424
|
+
## Test if self contains exactl match for string (case insensitive)
|
|
425
|
+
##
|
|
426
|
+
## @param string [String] The string to match
|
|
427
|
+
##
|
|
428
|
+
def matches_exact(string)
|
|
429
|
+
comp = gsub(/[^a-z0-9 ]/i, '')
|
|
430
|
+
comp =~ /\b#{string.gsub(/[^a-z0-9 ]/i, '').split(/ +/).map { |s| Regexp.escape(s) }.join(' +')}/i
|
|
431
|
+
end
|
|
431
432
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
433
|
+
##
|
|
434
|
+
## Test that self does not contain any of terms
|
|
435
|
+
##
|
|
436
|
+
## @param terms [String] The terms to test
|
|
437
|
+
##
|
|
438
|
+
def matches_none(terms)
|
|
439
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
440
|
+
rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
|
441
|
+
true
|
|
442
|
+
end
|
|
442
443
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
444
|
+
##
|
|
445
|
+
## Test if self contains any of terms
|
|
446
|
+
##
|
|
447
|
+
## @param terms [String] The terms to test
|
|
448
|
+
##
|
|
449
|
+
def matches_any(terms)
|
|
450
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
451
|
+
rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
|
452
|
+
false
|
|
453
|
+
end
|
|
453
454
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
455
|
+
##
|
|
456
|
+
## Test that self matches every word in terms
|
|
457
|
+
##
|
|
458
|
+
## @param terms [String] The terms to test
|
|
459
|
+
##
|
|
460
|
+
def matches_all(terms)
|
|
461
|
+
rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
|
|
462
|
+
rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i, '') =~ rx }
|
|
463
|
+
true
|
|
464
|
+
end
|
|
464
465
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
466
|
+
##
|
|
467
|
+
## Break a string into an array of Regexps
|
|
468
|
+
##
|
|
469
|
+
## @param separator [String] The word separator
|
|
470
|
+
## @param start_word [Boolean] Require matches at
|
|
471
|
+
## start of word
|
|
472
|
+
##
|
|
473
|
+
## @return [Array] array of regular expressions
|
|
474
|
+
##
|
|
475
|
+
def to_rx_array(separator: ' ', start_word: true)
|
|
476
|
+
bound = start_word ? '\b' : ''
|
|
477
|
+
str = gsub(/(#{separator})+/, separator)
|
|
478
|
+
str.split(/#{separator}/).map { |arg| /#{bound}#{arg.gsub(/[^a-z0-9]/i, '.?')}/i }
|
|
479
|
+
end
|
|
479
480
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
481
|
+
##
|
|
482
|
+
## Indent each line of string with 4 spaces
|
|
483
|
+
##
|
|
484
|
+
## @return [String] indented string
|
|
485
|
+
def code_indent
|
|
486
|
+
split(/\n/).map { |l| " #{l}" }.join("\n")
|
|
487
|
+
end
|
|
486
488
|
end
|
|
487
489
|
end
|
data/lib/searchlink/version.rb
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
module SL
|
|
2
|
-
VERSION = '2.3.
|
|
2
|
+
VERSION = '2.3.65'
|
|
3
3
|
end
|
|
4
4
|
|
|
5
5
|
module SL
|
|
6
6
|
class << self
|
|
7
7
|
def version_check
|
|
8
|
-
cachefile = File.expand_path('~/.config/cache/update.txt')
|
|
8
|
+
cachefile = File.expand_path('~/.config/searchlink/cache/update.txt')
|
|
9
|
+
|
|
10
|
+
FileUtils.mkdir_p(File.dirname(cachefile)) unless File.directory?(File.dirname(cachefile))
|
|
11
|
+
|
|
9
12
|
if File.exist?(cachefile)
|
|
10
13
|
last_check, latest_tag = IO.read(cachefile).strip.split(/\|/)
|
|
11
14
|
last_time = Time.parse(last_check)
|
|
@@ -23,9 +26,11 @@ module SL
|
|
|
23
26
|
latest = SemVer.new(latest_tag)
|
|
24
27
|
current = SemVer.new(SL::VERSION)
|
|
25
28
|
|
|
26
|
-
File.open(cachefile, 'w') { |f| f.puts("#{last_time.strftime('%c')}|#{latest
|
|
29
|
+
File.open(cachefile, 'w') { |f| f.puts("#{last_time.strftime('%c')}|#{latest}") }
|
|
27
30
|
|
|
28
|
-
|
|
31
|
+
if latest_tag && current.older_than(latest)
|
|
32
|
+
return "SearchLink v#{current}, #{latest} available. Run 'update' to download."
|
|
33
|
+
end
|
|
29
34
|
|
|
30
35
|
"SearchLink v#{current}"
|
|
31
36
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: searchlink
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.3.
|
|
4
|
+
version: 2.3.65
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Brett Terpstra
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-01-
|
|
11
|
+
date: 2024-01-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|