searchlink 2.3.59
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/searchlink +84 -0
- data/lib/searchlink/array.rb +7 -0
- data/lib/searchlink/config.rb +230 -0
- data/lib/searchlink/curl/html.rb +482 -0
- data/lib/searchlink/curl/json.rb +90 -0
- data/lib/searchlink/curl.rb +7 -0
- data/lib/searchlink/help.rb +103 -0
- data/lib/searchlink/output.rb +270 -0
- data/lib/searchlink/parse.rb +668 -0
- data/lib/searchlink/plist.rb +213 -0
- data/lib/searchlink/search.rb +70 -0
- data/lib/searchlink/searches/amazon.rb +25 -0
- data/lib/searchlink/searches/applemusic.rb +123 -0
- data/lib/searchlink/searches/bitly.rb +50 -0
- data/lib/searchlink/searches/definition.rb +67 -0
- data/lib/searchlink/searches/duckduckgo.rb +167 -0
- data/lib/searchlink/searches/github.rb +245 -0
- data/lib/searchlink/searches/google.rb +67 -0
- data/lib/searchlink/searches/helpers/chromium.rb +318 -0
- data/lib/searchlink/searches/helpers/firefox.rb +135 -0
- data/lib/searchlink/searches/helpers/safari.rb +133 -0
- data/lib/searchlink/searches/history.rb +166 -0
- data/lib/searchlink/searches/hook.rb +77 -0
- data/lib/searchlink/searches/itunes.rb +97 -0
- data/lib/searchlink/searches/lastfm.rb +41 -0
- data/lib/searchlink/searches/lyrics.rb +91 -0
- data/lib/searchlink/searches/pinboard.rb +183 -0
- data/lib/searchlink/searches/social.rb +105 -0
- data/lib/searchlink/searches/software.rb +27 -0
- data/lib/searchlink/searches/spelling.rb +59 -0
- data/lib/searchlink/searches/spotlight.rb +28 -0
- data/lib/searchlink/searches/stackoverflow.rb +31 -0
- data/lib/searchlink/searches/tmdb.rb +52 -0
- data/lib/searchlink/searches/twitter.rb +46 -0
- data/lib/searchlink/searches/wikipedia.rb +33 -0
- data/lib/searchlink/searches/youtube.rb +48 -0
- data/lib/searchlink/searches.rb +194 -0
- data/lib/searchlink/semver.rb +140 -0
- data/lib/searchlink/string.rb +469 -0
- data/lib/searchlink/url.rb +153 -0
- data/lib/searchlink/util.rb +87 -0
- data/lib/searchlink/version.rb +93 -0
- data/lib/searchlink/which.rb +175 -0
- data/lib/searchlink.rb +66 -0
- data/lib/tokens.rb +3 -0
- metadata +299 -0
@@ -0,0 +1,482 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Curl
|
4
|
+
# String helpers
|
5
|
+
class ::String
|
6
|
+
def remove_entities
|
7
|
+
gsub(/ /, ' ')
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# Class for CURLing an HTML page
|
12
|
+
class Html
|
13
|
+
attr_reader :url, :code, :headers, :meta, :links, :head, :body,
|
14
|
+
:source, :title, :description, :body_links, :body_images
|
15
|
+
|
16
|
+
##
|
17
|
+
## Create a new page object from a URL
|
18
|
+
##
|
19
|
+
## @param url [String] The url
|
20
|
+
## @param headers [Hash] The headers to use in the curl call
|
21
|
+
## @param headers_only [Boolean] Return headers only
|
22
|
+
## @param compressed [Boolean] Expect compressed result
|
23
|
+
##
|
24
|
+
## @return [HTMLCurl] new page object
|
25
|
+
##
|
26
|
+
def initialize(url, headers: nil, headers_only: false, compressed: false)
|
27
|
+
@curl = TTY::Which.which('curl')
|
28
|
+
res = curl_html(url, headers: headers, headers_only: headers_only, compressed: compressed)
|
29
|
+
@url = res[:url]
|
30
|
+
@code = res[:code]
|
31
|
+
@headers = res[:headers]
|
32
|
+
@meta = res[:meta]
|
33
|
+
@links = res[:links]
|
34
|
+
@head = res[:head] unless res[:head].nil?
|
35
|
+
@body = reencode(res[:body])
|
36
|
+
@source = res[:source]
|
37
|
+
@title = @meta['og:title'] || @meta['title'] unless @meta.nil?
|
38
|
+
@description = @meta['og:description'] || @meta['description'] unless @meta.nil?
|
39
|
+
@body_links = content_links
|
40
|
+
@body_images = content_images
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
## Extract text between two regular expressions
|
45
|
+
##
|
46
|
+
## @param before [String, Regexp] The before
|
47
|
+
## @param after [String, Regexp] The after
|
48
|
+
##
|
49
|
+
## @return [Array] array of matches
|
50
|
+
##
|
51
|
+
def extract(before, after)
|
52
|
+
before = /#{Regexp.escape(before)}/ unless before.instance_of?(Regexp)
|
53
|
+
after = /#{Regexp.escape(after)}/ unless after.instance_of?(Regexp)
|
54
|
+
@body.scan(/#{before.source}(.*?)#{after.source}/)
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
## Extract an array of tags or tag attributes
|
59
|
+
##
|
60
|
+
## @param tag [String] The tag
|
61
|
+
## @param attribute [String] The attribute
|
62
|
+
## @param source [Boolean] Return full tag source
|
63
|
+
## (negates attribute if true)
|
64
|
+
## @param content [Boolean] Return only tag
|
65
|
+
## contents
|
66
|
+
##
|
67
|
+
## @return [Hash, Array] if source, return array of full
|
68
|
+
## tags, if content, return array of tag contents,
|
69
|
+
## otherwise, return a hash of tags including
|
70
|
+
## attributes and content
|
71
|
+
##
|
72
|
+
## If attribute is not given, tag contents will be returned
|
73
|
+
##
|
74
|
+
## @example page.extract_tag('h1') => [Array of h1 tag
|
75
|
+
## contents]
|
76
|
+
## @example page.extract_tag('img', 'src') => [Array of img
|
77
|
+
## src attributes]
|
78
|
+
##
|
79
|
+
def extract_tag(tag, attribute = nil, source: false, content: false)
|
80
|
+
res = extract_tag_contents(tag, source: true)
|
81
|
+
|
82
|
+
return res if source
|
83
|
+
|
84
|
+
res.map! do |tag_source|
|
85
|
+
m = tag_source.to_enum(:scan, /(\S+)=(['"])(.*?)\2/).map { Regexp.last_match }
|
86
|
+
attrs = m.each_with_object({}) { |at, a| a[at[1]] = at[3] }
|
87
|
+
tags = tag_source.match(/<.*?>(?<content>.*?)</)
|
88
|
+
contents = tags.nil? ? nil : tags['content']
|
89
|
+
{
|
90
|
+
tag: tag,
|
91
|
+
source: tag_source,
|
92
|
+
attrs: attrs,
|
93
|
+
content: contents
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
return res.map { |r| r[:content] } if content
|
98
|
+
|
99
|
+
return res if attribute.nil?
|
100
|
+
|
101
|
+
res.map { |r| r[:attrs][attribute] }
|
102
|
+
end
|
103
|
+
|
104
|
+
##
|
105
|
+
## Extract tag contents or full tag source
|
106
|
+
##
|
107
|
+
## @param tag The tag
|
108
|
+
## @param source [Boolean] Return full tag instead of contents
|
109
|
+
##
|
110
|
+
def extract_tag_contents(tag, source: false)
|
111
|
+
return @body.scan(%r{<#{tag}.*?>(?:.*?</#{tag}>)?}) if source
|
112
|
+
|
113
|
+
@body.scan(/<#{tag}.*?>(.*?)</).map { |t| t[0] }
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
## Return all tags in body, or a specific tag
|
118
|
+
##
|
119
|
+
## @param tag [String, Array] The tag to return,
|
120
|
+
## can be an array
|
121
|
+
##
|
122
|
+
## @return [Array] Array of tags. If no tag is specified, a
|
123
|
+
## hierarchical array of all tags in the document
|
124
|
+
## is returned. If one or more tags are specified,
|
125
|
+
## return a flattened list in document order.
|
126
|
+
##
|
127
|
+
def tags(tag = nil)
|
128
|
+
tags = content_tags(@body)
|
129
|
+
return tags if tag.nil?
|
130
|
+
|
131
|
+
tag = [tag] unless tag.is_a?(Array)
|
132
|
+
tag.map!(&:downcase)
|
133
|
+
flatten_tags(tags).dup.delete_if { |t| !tag.include?(t[:tag].downcase) }
|
134
|
+
end
|
135
|
+
|
136
|
+
##
|
137
|
+
## Get all images from the page
|
138
|
+
##
|
139
|
+
## @return [Array] Array of images, both from picture sources and img tags
|
140
|
+
##
|
141
|
+
def images
|
142
|
+
output = []
|
143
|
+
%w[og:image twitter:image].each do |src|
|
144
|
+
next unless @meta.key?(src)
|
145
|
+
|
146
|
+
output << {
|
147
|
+
type: 'opengraph',
|
148
|
+
attrs: nil,
|
149
|
+
src: @meta[src]
|
150
|
+
}
|
151
|
+
end
|
152
|
+
images = tags(%w[img source])
|
153
|
+
images.each do |img|
|
154
|
+
case img[:tag].downcase
|
155
|
+
when /source/
|
156
|
+
srcsets = img[:attrs].filter { |k| k[:key] =~ /srcset/i }
|
157
|
+
if srcsets.count.positive?
|
158
|
+
srcset = []
|
159
|
+
srcsets.each do |src|
|
160
|
+
src[:value].split(/ *, */).each do |s|
|
161
|
+
image, media = s.split(/ /)
|
162
|
+
srcset << {
|
163
|
+
src: image,
|
164
|
+
media: media
|
165
|
+
}
|
166
|
+
end
|
167
|
+
end
|
168
|
+
output << {
|
169
|
+
type: 'srcset',
|
170
|
+
attrs: img[:attrs],
|
171
|
+
images: srcset
|
172
|
+
}
|
173
|
+
end
|
174
|
+
when /img/
|
175
|
+
output << {
|
176
|
+
type: 'img',
|
177
|
+
src: img[:attrs].filter { |a| a[:key] =~ /src/i }.first[:value],
|
178
|
+
attrs: img[:attrs]
|
179
|
+
}
|
180
|
+
end
|
181
|
+
end
|
182
|
+
output
|
183
|
+
end
|
184
|
+
|
185
|
+
def to_s
|
186
|
+
headers = @headers.nil? ? 0 : @headers.count
|
187
|
+
meta = @meta.nil? ? 0 : @meta.count
|
188
|
+
links = @links.nil? ? 0 : @links.count
|
189
|
+
[
|
190
|
+
%(<HTMLCurl: @code="#{@code}" @url="#{@url}" @title="#{@title}"),
|
191
|
+
%(@description=#{@description} @headers:#{headers} @meta:#{meta} @links:#{links}>)
|
192
|
+
].join(' ')
|
193
|
+
end
|
194
|
+
|
195
|
+
##
|
196
|
+
## Return all headers of given level
|
197
|
+
##
|
198
|
+
## @param level [Number] The level (1-6)
|
199
|
+
##
|
200
|
+
## @return [Array] array of headers with text and all tag attributes as symbols
|
201
|
+
##
|
202
|
+
def h(level = '\d')
|
203
|
+
res = []
|
204
|
+
headlines = @body.to_enum(:scan, %r{<h(?<level>#{level})(?<tag> .*?)?>(?<text>.*?)</h#{level}>}i).map { Regexp.last_match }
|
205
|
+
headlines.each do |m|
|
206
|
+
headline = { level: m['level'] }
|
207
|
+
if m['tag'].nil?
|
208
|
+
attrs = nil
|
209
|
+
else
|
210
|
+
attrs = m['tag'].to_enum(:scan, /(?<attr>\w+)=(?<quot>["'])(?<content>.*?)\k<quot>/).map { Regexp.last_match }
|
211
|
+
attrs.each { |a| headline[a['attr'].to_sym] = a['content'] }
|
212
|
+
end
|
213
|
+
headline[:text] = m['text'].remove_entities
|
214
|
+
res << headline
|
215
|
+
end
|
216
|
+
res
|
217
|
+
end
|
218
|
+
|
219
|
+
private
|
220
|
+
|
221
|
+
##
|
222
|
+
## Flatten the array of tags
|
223
|
+
##
|
224
|
+
## @param tags [Array] Document tags
|
225
|
+
##
|
226
|
+
def flatten_tags(tags)
|
227
|
+
flattened = []
|
228
|
+
|
229
|
+
tags.each do |t|
|
230
|
+
flattened << { tag: t[:tag], attrs: t[:attrs], content: t[:content] }
|
231
|
+
flattened.concat(flatten_tags(t[:tags])) unless t[:tags].nil?
|
232
|
+
end
|
233
|
+
|
234
|
+
flattened
|
235
|
+
end
|
236
|
+
|
237
|
+
##
|
238
|
+
## Return an array of all tags in the content
|
239
|
+
##
|
240
|
+
## @param content [String] The content to parse
|
241
|
+
##
|
242
|
+
def content_tags(content)
|
243
|
+
return nil if content.nil?
|
244
|
+
|
245
|
+
res = content.to_enum(:scan, %r{(?mix)
|
246
|
+
<(?<tag>(?!</)[a-z0-9]+)(?<attrs>\s[^>]+)?
|
247
|
+
(?:\s*/>|>(?<content>.*?)</\k<tag>>)}).map { Regexp.last_match }
|
248
|
+
res.map do |tag|
|
249
|
+
if tag['attrs'].nil?
|
250
|
+
attrs = nil
|
251
|
+
else
|
252
|
+
attrs = tag['attrs'].strip.to_enum(:scan, /(?ix)
|
253
|
+
(?<key>[@a-z0-9-]+)(?:=(?<quot>["'])
|
254
|
+
(?<value>[^"']+)\k<quot>|[ >])?/i).map { Regexp.last_match }
|
255
|
+
attrs.map! { |a| { key: a['key'], value: a['key'] =~ /^(class|rel)$/ ? a['value'].split(/ /) : a['value'] } }
|
256
|
+
end
|
257
|
+
{
|
258
|
+
tag: tag['tag'],
|
259
|
+
source: tag.to_s,
|
260
|
+
attrs: attrs,
|
261
|
+
content: tag['content'],
|
262
|
+
tags: content_tags(tag['content'])
|
263
|
+
}
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
##
|
268
|
+
## Extract all meta tags from the document head
|
269
|
+
##
|
270
|
+
## @param head [String] The head content
|
271
|
+
##
|
272
|
+
## @return [Hash] hash of meta tags and values
|
273
|
+
##
|
274
|
+
def meta_tags(head)
|
275
|
+
meta = {}
|
276
|
+
title = head.match(%r{(?<=<title>)(.*?)(?=</title>)})
|
277
|
+
meta['title'] = title.nil? ? nil : title[1]
|
278
|
+
refresh = head.match(/http-equiv=(['"])refresh\1(.*?)>/)
|
279
|
+
url = refresh.nil? ? nil : refresh[2].match(/url=(.*?)['"]/)
|
280
|
+
meta['refresh_url'] = url
|
281
|
+
meta_tags = head.scan(/<meta.*?>/)
|
282
|
+
meta_tags.each do |tag|
|
283
|
+
meta_name = tag.match(/(?:name|property|http-equiv)=(["'])(.*?)\1/)
|
284
|
+
next if meta_name.nil?
|
285
|
+
|
286
|
+
meta_value = tag.match(/(?:content)=(['"])(.*?)\1/)
|
287
|
+
next if meta_value.nil?
|
288
|
+
|
289
|
+
meta[meta_name[2].downcase] = meta_value[2]
|
290
|
+
end
|
291
|
+
meta
|
292
|
+
rescue StandardError => e
|
293
|
+
warn e
|
294
|
+
{}
|
295
|
+
end
|
296
|
+
|
297
|
+
##
|
298
|
+
## Extract all <link> tags from head
|
299
|
+
##
|
300
|
+
## @param head [String] The head content
|
301
|
+
##
|
302
|
+
## @return [Array] Array of links
|
303
|
+
##
|
304
|
+
def link_tags(head)
|
305
|
+
links = []
|
306
|
+
link_tags = head.scan(/<link.*?>/)
|
307
|
+
link_tags.each do |tag|
|
308
|
+
link_rel = tag.match(/rel=(['"])(.*?)\1/)
|
309
|
+
link_rel = link_rel.nil? ? nil : link_rel[2]
|
310
|
+
|
311
|
+
next if link_rel =~ /preload/
|
312
|
+
|
313
|
+
link_href = tag.match(/href=(["'])(.*?)\1/)
|
314
|
+
next if link_href.nil?
|
315
|
+
|
316
|
+
link_href = link_href[2]
|
317
|
+
|
318
|
+
link_title = tag.match(/title=(['"])(.*?)\1/)
|
319
|
+
link_title = link_title.nil? ? nil : link_title[2]
|
320
|
+
|
321
|
+
link_type = tag.match(/type=(['"])(.*?)\1/)
|
322
|
+
link_type = link_type.nil? ? nil : link_type[2]
|
323
|
+
|
324
|
+
links << { rel: link_rel, href: link_href, type: link_type, title: link_title }
|
325
|
+
end
|
326
|
+
links
|
327
|
+
end
|
328
|
+
|
329
|
+
##
|
330
|
+
## Get all links in the body of the page
|
331
|
+
##
|
332
|
+
## @return [Array] array of links with href, title,
|
333
|
+
## rel, text and class
|
334
|
+
##
|
335
|
+
def content_links
|
336
|
+
links = []
|
337
|
+
link_tags = @body.to_enum(:scan, %r{<a (?<tag>.*?)>(?<text>.*?)</a>}).map { Regexp.last_match }
|
338
|
+
link_tags.each do |m|
|
339
|
+
href = m['tag'].match(/href=(["'])(.*?)\1/)
|
340
|
+
href = href[2] unless href.nil?
|
341
|
+
title = m['tag'].match(/title=(["'])(.*?)\1/)
|
342
|
+
title = title[2] unless title.nil?
|
343
|
+
rel = m['tag'].match(/rel=(["'])(.*?)\1/)
|
344
|
+
rel = rel[2] unless rel.nil?
|
345
|
+
link_class = m['tag'].match(/class=(["'])(.*?)\1/)
|
346
|
+
link_class = link_class[2] unless link_class.nil?
|
347
|
+
text = m['text'].remove_entities
|
348
|
+
link = {
|
349
|
+
href: href,
|
350
|
+
title: title,
|
351
|
+
rel: rel,
|
352
|
+
text: text,
|
353
|
+
class: link_class
|
354
|
+
}
|
355
|
+
links << link
|
356
|
+
end
|
357
|
+
links
|
358
|
+
end
|
359
|
+
|
360
|
+
##
|
361
|
+
## Get all img tags in the body of the page
|
362
|
+
##
|
363
|
+
## @return [Array] array of images with src and all attributes
|
364
|
+
##
|
365
|
+
def content_images
|
366
|
+
images = []
|
367
|
+
image_tags = @body.to_enum(:scan, %r{<img (?<tag>.*?)/?>}).map { Regexp.last_match }
|
368
|
+
image_tags.each do |m|
|
369
|
+
attrs = m['tag'].to_enum(:scan, /(?<attr>\w+)=(?<quot>["'])(?<content>.*?)\k<quot>/).map { Regexp.last_match }
|
370
|
+
image = {}
|
371
|
+
attrs.each { |a| image[a['attr'].to_sym] = a['content'] }
|
372
|
+
images << image
|
373
|
+
end
|
374
|
+
images
|
375
|
+
end
|
376
|
+
|
377
|
+
|
378
|
+
|
379
|
+
##
|
380
|
+
## Curls the html for the page
|
381
|
+
##
|
382
|
+
## @param url [String] The url
|
383
|
+
## @param headers [Hash] The headers
|
384
|
+
## @param headers_only [Boolean] Return headers only
|
385
|
+
## @param compressed [Boolean] expect compressed results
|
386
|
+
##
|
387
|
+
## @return [Hash] hash of url, code, headers, meta, links, head, body, and source
|
388
|
+
##
|
389
|
+
def curl_html(url, headers: nil, headers_only: false, compressed: false)
|
390
|
+
flags = 'SsL'
|
391
|
+
flags += headers_only ? 'I' : 'i'
|
392
|
+
agent = ['Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us)',
|
393
|
+
'AppleWebKit/533.17.9 (KHTML, like Gecko)',
|
394
|
+
'Version/5.0.2 Mobile/8J2 Safari/6533.18.5'].join(' ')
|
395
|
+
headers = headers.nil? ? '' : headers.map { |h, v| %(-H "#{h}: #{v}") }.join(' ')
|
396
|
+
compress = compressed ? '--compressed' : ''
|
397
|
+
source = `#{@curl} -#{flags} #{compress} #{headers} '#{url}' 2>/dev/null`
|
398
|
+
source = `#{@curl} -#{flags} #{compress} -A "#{agent}" #{headers} '#{url}' 2>/dev/null` if source.nil? || source.empty?
|
399
|
+
|
400
|
+
return false if source.nil? || source.empty?
|
401
|
+
|
402
|
+
source.strip!
|
403
|
+
|
404
|
+
headers = {}
|
405
|
+
lines = source.split(/\r\n/)
|
406
|
+
code = lines[0].match(/(\d\d\d)/)[1]
|
407
|
+
lines.shift
|
408
|
+
lines.each_with_index do |line, idx|
|
409
|
+
if line =~ /^([\w-]+): (.*?)$/
|
410
|
+
m = Regexp.last_match
|
411
|
+
headers[m[1]] = m[2]
|
412
|
+
else
|
413
|
+
source = lines[idx..].join("\n")
|
414
|
+
break
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
if headers['content-type'] =~ /json/
|
419
|
+
return { url: url, code: code, headers: headers, meta: nil, links: nil, head: nil, body: source.strip, source: source.strip, body_links: nil, body_images: nil }
|
420
|
+
end
|
421
|
+
|
422
|
+
head = source.match(%r{(?<=<head>)(.*?)(?=</head>)}mi)
|
423
|
+
|
424
|
+
if head.nil?
|
425
|
+
{ url: url, code: code, headers: headers, meta: nil, links: nil, head: nil, body: source.strip, source: source.strip, body_links: nil, body_images: nil }
|
426
|
+
else
|
427
|
+
meta = meta_tags(head[1])
|
428
|
+
links = link_tags(head[1])
|
429
|
+
body = source.match(%r{<body.*?>(.*?)</body>}mi)[1]
|
430
|
+
{ url: url, code: code, headers: headers, meta: meta, links: links, head: head[1], body: body, source: source.strip, body_links: body_links, body_images: body_images }
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
##
|
435
|
+
## Reencode the content (borrowed from Nokogiri)
|
436
|
+
##
|
437
|
+
## @param body [String] The body
|
438
|
+
## @param content_type [String] Force content type
|
439
|
+
##
|
440
|
+
def reencode(body, content_type = nil)
|
441
|
+
if body.encoding == Encoding::ASCII_8BIT
|
442
|
+
encoding = nil
|
443
|
+
|
444
|
+
# look for a Byte Order Mark (BOM)
|
445
|
+
initial_bytes = body[0..2].bytes
|
446
|
+
if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
447
|
+
encoding = Encoding::UTF_8
|
448
|
+
elsif initial_bytes[0..1] == [0xFE, 0xFF]
|
449
|
+
encoding = Encoding::UTF_16BE
|
450
|
+
elsif initial_bytes[0..1] == [0xFF, 0xFE]
|
451
|
+
encoding = Encoding::UTF_16LE
|
452
|
+
end
|
453
|
+
|
454
|
+
# look for a charset in a content-encoding header
|
455
|
+
if content_type
|
456
|
+
encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
|
457
|
+
end
|
458
|
+
|
459
|
+
# look for a charset in a meta tag in the first 1024 bytes
|
460
|
+
unless encoding
|
461
|
+
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, "")
|
462
|
+
data.scan(/<meta.*?>/im).each do |meta|
|
463
|
+
encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
# if all else fails, default to the official default encoding for HTML
|
468
|
+
encoding ||= Encoding::ISO_8859_1
|
469
|
+
|
470
|
+
# change the encoding to match the detected or inferred encoding
|
471
|
+
body = body.dup
|
472
|
+
begin
|
473
|
+
body.force_encoding(encoding)
|
474
|
+
rescue ArgumentError
|
475
|
+
body.force_encoding(Encoding::ISO_8859_1)
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
body.encode(Encoding::UTF_8)
|
480
|
+
end
|
481
|
+
end
|
482
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Curl
|
4
|
+
# Class for CURLing a JSON response
|
5
|
+
class Json
|
6
|
+
attr_reader :url, :code, :json, :headers
|
7
|
+
|
8
|
+
##
|
9
|
+
## Create a new Curl::Json page object
|
10
|
+
##
|
11
|
+
## @param url [String] The url to curl
|
12
|
+
## @param headers [Hash] The headers to send
|
13
|
+
## @param compressed [Boolean] Expect compressed results
|
14
|
+
##
|
15
|
+
## @return [Curl::Json] Curl::Json object with url, code, parsed json, and response headers
|
16
|
+
##
|
17
|
+
def initialize(url, headers: nil, compressed: false, symbolize_names: false)
|
18
|
+
@curl = TTY::Which.which('curl')
|
19
|
+
page = curl_json(url, headers: headers, compressed: compressed, symbolize_names: symbolize_names)
|
20
|
+
@url = page[:url]
|
21
|
+
@code = page[:code]
|
22
|
+
@json = page[:json]
|
23
|
+
@headers = page[:headers]
|
24
|
+
end
|
25
|
+
|
26
|
+
def path(path, json = @json)
|
27
|
+
parts = path.split(/./)
|
28
|
+
target = json
|
29
|
+
parts.each do |part|
|
30
|
+
if part =~ /(?<key>[^\[]+)\[(?<int>\d+)\]/
|
31
|
+
target = target[key][int.to_i]
|
32
|
+
else
|
33
|
+
target = target[part]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
target
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
##
|
43
|
+
## Curl the JSON contents
|
44
|
+
##
|
45
|
+
## @param url [String] The url
|
46
|
+
## @param headers [Hash] The headers to send
|
47
|
+
## @param compressed [Boolean] Expect compressed results
|
48
|
+
##
|
49
|
+
## @return [Hash] hash of url, code, headers, and parsed json
|
50
|
+
##
|
51
|
+
def curl_json(url, headers: nil, compressed: false, symbolize_names: false)
|
52
|
+
flags = 'SsLi'
|
53
|
+
agent = ['Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us)',
|
54
|
+
'AppleWebKit/533.17.9 (KHTML, like Gecko)',
|
55
|
+
'Version/5.0.2 Mobile/8J2 Safari/6533.18.5'].join(' ')
|
56
|
+
headers = headers.nil? ? '' : headers.map { |h, v| %(-H "#{h}: #{v}") }.join(' ')
|
57
|
+
compress = compressed ? '--compressed' : ''
|
58
|
+
source = `#{@curl} -#{flags} #{compress} #{headers} '#{url}' 2>/dev/null`
|
59
|
+
source = `#{@curl} -#{flags} #{compress} -A "#{agent}" #{headers} '#{url}' 2>/dev/null` if source.nil? || source.empty?
|
60
|
+
|
61
|
+
return false if source.nil? || source.empty?
|
62
|
+
|
63
|
+
source.strip!
|
64
|
+
|
65
|
+
headers = {}
|
66
|
+
lines = source.split(/\r\n/)
|
67
|
+
code = lines[0].match(/(\d\d\d)/)[1]
|
68
|
+
lines.shift
|
69
|
+
lines.each_with_index do |line, idx|
|
70
|
+
if line =~ /^([\w-]+): (.*?)$/
|
71
|
+
m = Regexp.last_match
|
72
|
+
headers[m[1]] = m[2]
|
73
|
+
else
|
74
|
+
source = lines[idx..].join("\n")
|
75
|
+
break
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
json = source.strip.force_encoding('utf-8')
|
80
|
+
|
81
|
+
json.gsub!(/[\u{1F600}-\u{1F6FF}]/, '')
|
82
|
+
|
83
|
+
{ url: url, code: code, headers: headers, json: JSON.parse(json, symbolize_names: symbolize_names) }
|
84
|
+
rescue StandardError => e
|
85
|
+
warn e
|
86
|
+
warn e.backtrace
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module SL
|
2
|
+
class SearchLink
|
3
|
+
def help_css
|
4
|
+
<<~ENDCSS
|
5
|
+
body{-webkit-font-smoothing:antialiased;font-family:"Avenir Next",Avenir,"Helvetica Neue",Helvetica,Arial,Verdana,sans-serif;
|
6
|
+
margin:30px 0 0;padding:0;background:#fff;color:#303030;font-size:16px;line-height:1.5;text-align:center}h1{color:#000}
|
7
|
+
h2{color:#111}p,td,div{color:#111;font-family:"Avenir Next",Avenir,"Helvetica Neue",Helvetica,Arial,Verdana,sans-serif;
|
8
|
+
word-wrap:break-word}a{color:#de5456;text-decoration:none;-webkit-transition:color .2s ease-in-out;
|
9
|
+
-moz-transition:color .2s ease-in-out;-o-transition:color .2s ease-in-out;-ms-transition:color .2s ease-in-out;
|
10
|
+
transition:color .2s ease-in-out}a:hover{color:#3593d9}h1,h2,h3,h4,h5{margin:2.75rem 0 2rem;font-weight:500;line-height:1.15}
|
11
|
+
h1{margin-top:0;font-size:2em}h2{font-size:1.7em}ul,ol,pre,table,blockquote{margin-top:2em;margin-bottom:2em}
|
12
|
+
caption,col,colgroup,table,tbody,td,tfoot,th,thead,tr{border-spacing:0}table{border:1px solid rgba(0,0,0,0.25);
|
13
|
+
border-collapse:collapse;display:table;empty-cells:hide;margin:-1px 0 1.3125em;padding:0;table-layout:fixed;margin:0 auto}
|
14
|
+
caption{display:table-caption;font-weight:700}col{display:table-column}colgroup{display:table-column-group}
|
15
|
+
tbody{display:table-row-group}tfoot{display:table-footer-group}thead{display:table-header-group}
|
16
|
+
td,th{display:table-cell}tr{display:table-row}table th,table td{font-size:1.2em;line-height:1.3;padding:.5em 1em 0}
|
17
|
+
table thead{background:rgba(0,0,0,0.15);border:1px solid rgba(0,0,0,0.15);border-bottom:1px solid rgba(0,0,0,0.2)}
|
18
|
+
table tbody{background:rgba(0,0,0,0.05)}table tfoot{background:rgba(0,0,0,0.15);border:1px solid rgba(0,0,0,0.15);
|
19
|
+
border-top:1px solid rgba(0,0,0,0.2)}p{font-size:1.1429em;line-height:1.72em;margin:1.3125em 0}dt,th{font-weight:700}
|
20
|
+
table tr:nth-child(odd),table th:nth-child(odd),table td:nth-child(odd){background:rgba(255,255,255,0.06)}
|
21
|
+
table tr:nth-child(even),table td:nth-child(even){background:rgba(200,200,200,0.25)}
|
22
|
+
input[type=text] {padding: 5px;border-radius: 5px;border: solid 1px #ccc;font-size: 20px;}
|
23
|
+
ENDCSS
|
24
|
+
end
|
25
|
+
|
26
|
+
def help_js
|
27
|
+
<<~EOJS
|
28
|
+
function filterTable() {
|
29
|
+
let input, filter, table, tr, i, txtValue;
|
30
|
+
input = document.getElementById("filter");
|
31
|
+
filter = input.value.toUpperCase();
|
32
|
+
table = document.getElementById("searches");
|
33
|
+
table2 = document.getElementById("custom");
|
34
|
+
|
35
|
+
tr = table.getElementsByTagName("tr");
|
36
|
+
|
37
|
+
for (i = 0; i < tr.length; i++) {
|
38
|
+
txtValue = tr[i].textContent || tr[i].innerText;
|
39
|
+
if (txtValue.toUpperCase().indexOf(filter) > -1) {
|
40
|
+
tr[i].style.display = "";
|
41
|
+
} else {
|
42
|
+
tr[i].style.display = "none";
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
tr = table2.getElementsByTagName("tr");
|
47
|
+
|
48
|
+
for (i = 0; i < tr.length; i++) {
|
49
|
+
txtValue = tr[i].textContent || tr[i].innerText;
|
50
|
+
if (txtValue.toUpperCase().indexOf(filter) > -1) {
|
51
|
+
tr[i].style.display = "";
|
52
|
+
} else {
|
53
|
+
tr[i].style.display = "none";
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
EOJS
|
58
|
+
end
|
59
|
+
|
60
|
+
def help_text
|
61
|
+
text = <<~EOHELP
|
62
|
+
-- [Available searches] -------------------
|
63
|
+
#{SL::Searches.available_searches}
|
64
|
+
EOHELP
|
65
|
+
|
66
|
+
if SL.config['custom_site_searches']
|
67
|
+
text += "\n-- [Custom Searches] ----------------------\n"
|
68
|
+
SL.config['custom_site_searches'].sort_by { |l, s| l }.each { |label, site| text += "!#{label}#{label.spacer} #{site}\n" }
|
69
|
+
end
|
70
|
+
text
|
71
|
+
end
|
72
|
+
|
73
|
+
def help_html
|
74
|
+
out = ['<input type="text" id="filter" onkeyup="filterTable()" placeholder="Filter searches">']
|
75
|
+
out << '<h2>Available Searches</h2>'
|
76
|
+
out << SL::Searches.available_searches_html
|
77
|
+
out << '<h2>Custom Searches</h2>'
|
78
|
+
out << '<table id="custom">'
|
79
|
+
out << '<thead><td>Shortcut</td><td>Search Type</td></thead>'
|
80
|
+
out << '<tbody>'
|
81
|
+
SL.config['custom_site_searches'].each { |label, site| out << "<tr><td><code>!#{label}</code></td><td>#{site}</td></tr>" }
|
82
|
+
out << '</tbody>'
|
83
|
+
out << '</table>'
|
84
|
+
out.join("\n")
|
85
|
+
end
|
86
|
+
|
87
|
+
def help_dialog
|
88
|
+
text = ["<html><head><style>#{help_css}</style><script>#{help_js}</script></head><body>"]
|
89
|
+
text << '<h1>SearchLink Help</h1>'
|
90
|
+
text << "<p>[#{SL.version_check}] [<a href='https://github.com/ttscoff/searchlink/wiki'>Wiki</a>]</p>"
|
91
|
+
text << help_html
|
92
|
+
text << '<p><a href="https://github.com/ttscoff/searchlink/wiki">Visit the wiki</a> for additional information</p>'
|
93
|
+
text << '</body>'
|
94
|
+
html_file = File.expand_path('~/.searchlink_searches.html')
|
95
|
+
File.open(html_file, 'w') { |f| f.puts text.join("\n") }
|
96
|
+
`open #{html_file}`
|
97
|
+
end
|
98
|
+
|
99
|
+
def help_cli
|
100
|
+
$stdout.puts help_text
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|