searchlink 2.3.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/searchlink +84 -0
- data/lib/searchlink/array.rb +7 -0
- data/lib/searchlink/config.rb +230 -0
- data/lib/searchlink/curl/html.rb +482 -0
- data/lib/searchlink/curl/json.rb +90 -0
- data/lib/searchlink/curl.rb +7 -0
- data/lib/searchlink/help.rb +103 -0
- data/lib/searchlink/output.rb +270 -0
- data/lib/searchlink/parse.rb +668 -0
- data/lib/searchlink/plist.rb +213 -0
- data/lib/searchlink/search.rb +70 -0
- data/lib/searchlink/searches/amazon.rb +25 -0
- data/lib/searchlink/searches/applemusic.rb +123 -0
- data/lib/searchlink/searches/bitly.rb +50 -0
- data/lib/searchlink/searches/definition.rb +67 -0
- data/lib/searchlink/searches/duckduckgo.rb +167 -0
- data/lib/searchlink/searches/github.rb +245 -0
- data/lib/searchlink/searches/google.rb +67 -0
- data/lib/searchlink/searches/helpers/chromium.rb +318 -0
- data/lib/searchlink/searches/helpers/firefox.rb +135 -0
- data/lib/searchlink/searches/helpers/safari.rb +133 -0
- data/lib/searchlink/searches/history.rb +166 -0
- data/lib/searchlink/searches/hook.rb +77 -0
- data/lib/searchlink/searches/itunes.rb +97 -0
- data/lib/searchlink/searches/lastfm.rb +41 -0
- data/lib/searchlink/searches/lyrics.rb +91 -0
- data/lib/searchlink/searches/pinboard.rb +183 -0
- data/lib/searchlink/searches/social.rb +105 -0
- data/lib/searchlink/searches/software.rb +27 -0
- data/lib/searchlink/searches/spelling.rb +59 -0
- data/lib/searchlink/searches/spotlight.rb +28 -0
- data/lib/searchlink/searches/stackoverflow.rb +31 -0
- data/lib/searchlink/searches/tmdb.rb +52 -0
- data/lib/searchlink/searches/twitter.rb +46 -0
- data/lib/searchlink/searches/wikipedia.rb +33 -0
- data/lib/searchlink/searches/youtube.rb +48 -0
- data/lib/searchlink/searches.rb +194 -0
- data/lib/searchlink/semver.rb +140 -0
- data/lib/searchlink/string.rb +469 -0
- data/lib/searchlink/url.rb +153 -0
- data/lib/searchlink/util.rb +87 -0
- data/lib/searchlink/version.rb +93 -0
- data/lib/searchlink/which.rb +175 -0
- data/lib/searchlink.rb +66 -0
- data/lib/tokens.rb +3 -0
- metadata +299 -0
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Curl
|
|
4
|
+
# String helpers
|
|
5
|
+
class ::String
|
|
6
|
+
def remove_entities
|
|
7
|
+
gsub(/ /, ' ')
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Class for CURLing an HTML page
|
|
12
|
+
class Html
|
|
13
|
+
attr_reader :url, :code, :headers, :meta, :links, :head, :body,
|
|
14
|
+
:source, :title, :description, :body_links, :body_images
|
|
15
|
+
|
|
16
|
+
##
|
|
17
|
+
## Create a new page object from a URL
|
|
18
|
+
##
|
|
19
|
+
## @param url [String] The url
|
|
20
|
+
## @param headers [Hash] The headers to use in the curl call
|
|
21
|
+
## @param headers_only [Boolean] Return headers only
|
|
22
|
+
## @param compressed [Boolean] Expect compressed result
|
|
23
|
+
##
|
|
24
|
+
## @return [HTMLCurl] new page object
|
|
25
|
+
##
|
|
26
|
+
def initialize(url, headers: nil, headers_only: false, compressed: false)
|
|
27
|
+
@curl = TTY::Which.which('curl')
|
|
28
|
+
res = curl_html(url, headers: headers, headers_only: headers_only, compressed: compressed)
|
|
29
|
+
@url = res[:url]
|
|
30
|
+
@code = res[:code]
|
|
31
|
+
@headers = res[:headers]
|
|
32
|
+
@meta = res[:meta]
|
|
33
|
+
@links = res[:links]
|
|
34
|
+
@head = res[:head] unless res[:head].nil?
|
|
35
|
+
@body = reencode(res[:body])
|
|
36
|
+
@source = res[:source]
|
|
37
|
+
@title = @meta['og:title'] || @meta['title'] unless @meta.nil?
|
|
38
|
+
@description = @meta['og:description'] || @meta['description'] unless @meta.nil?
|
|
39
|
+
@body_links = content_links
|
|
40
|
+
@body_images = content_images
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
##
|
|
44
|
+
## Extract text between two regular expressions
|
|
45
|
+
##
|
|
46
|
+
## @param before [String, Regexp] The before
|
|
47
|
+
## @param after [String, Regexp] The after
|
|
48
|
+
##
|
|
49
|
+
## @return [Array] array of matches
|
|
50
|
+
##
|
|
51
|
+
def extract(before, after)
|
|
52
|
+
before = /#{Regexp.escape(before)}/ unless before.instance_of?(Regexp)
|
|
53
|
+
after = /#{Regexp.escape(after)}/ unless after.instance_of?(Regexp)
|
|
54
|
+
@body.scan(/#{before.source}(.*?)#{after.source}/)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
##
|
|
58
|
+
## Extract an array of tags or tag attributes
|
|
59
|
+
##
|
|
60
|
+
## @param tag [String] The tag
|
|
61
|
+
## @param attribute [String] The attribute
|
|
62
|
+
## @param source [Boolean] Return full tag source
|
|
63
|
+
## (negates attribute if true)
|
|
64
|
+
## @param content [Boolean] Return only tag
|
|
65
|
+
## contents
|
|
66
|
+
##
|
|
67
|
+
## @return [Hash, Array] if source, return array of full
|
|
68
|
+
## tags, if content, return array of tag contents,
|
|
69
|
+
## otherwise, return a hash of tags including
|
|
70
|
+
## attributes and content
|
|
71
|
+
##
|
|
72
|
+
## If attribute is not given, tag contents will be returned
|
|
73
|
+
##
|
|
74
|
+
## @example page.extract_tag('h1') => [Array of h1 tag
|
|
75
|
+
## contents]
|
|
76
|
+
## @example page.extract_tag('img', 'src') => [Array of img
|
|
77
|
+
## src attributes]
|
|
78
|
+
##
|
|
79
|
+
def extract_tag(tag, attribute = nil, source: false, content: false)
|
|
80
|
+
res = extract_tag_contents(tag, source: true)
|
|
81
|
+
|
|
82
|
+
return res if source
|
|
83
|
+
|
|
84
|
+
res.map! do |tag_source|
|
|
85
|
+
m = tag_source.to_enum(:scan, /(\S+)=(['"])(.*?)\2/).map { Regexp.last_match }
|
|
86
|
+
attrs = m.each_with_object({}) { |at, a| a[at[1]] = at[3] }
|
|
87
|
+
tags = tag_source.match(/<.*?>(?<content>.*?)</)
|
|
88
|
+
contents = tags.nil? ? nil : tags['content']
|
|
89
|
+
{
|
|
90
|
+
tag: tag,
|
|
91
|
+
source: tag_source,
|
|
92
|
+
attrs: attrs,
|
|
93
|
+
content: contents
|
|
94
|
+
}
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
return res.map { |r| r[:content] } if content
|
|
98
|
+
|
|
99
|
+
return res if attribute.nil?
|
|
100
|
+
|
|
101
|
+
res.map { |r| r[:attrs][attribute] }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
##
|
|
105
|
+
## Extract tag contents or full tag source
|
|
106
|
+
##
|
|
107
|
+
## @param tag The tag
|
|
108
|
+
## @param source [Boolean] Return full tag instead of contents
|
|
109
|
+
##
|
|
110
|
+
def extract_tag_contents(tag, source: false)
|
|
111
|
+
return @body.scan(%r{<#{tag}.*?>(?:.*?</#{tag}>)?}) if source
|
|
112
|
+
|
|
113
|
+
@body.scan(/<#{tag}.*?>(.*?)</).map { |t| t[0] }
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
##
|
|
117
|
+
## Return all tags in body, or a specific tag
|
|
118
|
+
##
|
|
119
|
+
## @param tag [String, Array] The tag to return,
|
|
120
|
+
## can be an array
|
|
121
|
+
##
|
|
122
|
+
## @return [Array] Array of tags. If no tag is specified, a
|
|
123
|
+
## hierarchical array of all tags in the document
|
|
124
|
+
## is returned. If one or more tags are specified,
|
|
125
|
+
## return a flattened list in document order.
|
|
126
|
+
##
|
|
127
|
+
def tags(tag = nil)
|
|
128
|
+
tags = content_tags(@body)
|
|
129
|
+
return tags if tag.nil?
|
|
130
|
+
|
|
131
|
+
tag = [tag] unless tag.is_a?(Array)
|
|
132
|
+
tag.map!(&:downcase)
|
|
133
|
+
flatten_tags(tags).dup.delete_if { |t| !tag.include?(t[:tag].downcase) }
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
##
|
|
137
|
+
## Get all images from the page
|
|
138
|
+
##
|
|
139
|
+
## @return [Array] Array of images, both from picture sources and img tags
|
|
140
|
+
##
|
|
141
|
+
def images
|
|
142
|
+
output = []
|
|
143
|
+
%w[og:image twitter:image].each do |src|
|
|
144
|
+
next unless @meta.key?(src)
|
|
145
|
+
|
|
146
|
+
output << {
|
|
147
|
+
type: 'opengraph',
|
|
148
|
+
attrs: nil,
|
|
149
|
+
src: @meta[src]
|
|
150
|
+
}
|
|
151
|
+
end
|
|
152
|
+
images = tags(%w[img source])
|
|
153
|
+
images.each do |img|
|
|
154
|
+
case img[:tag].downcase
|
|
155
|
+
when /source/
|
|
156
|
+
srcsets = img[:attrs].filter { |k| k[:key] =~ /srcset/i }
|
|
157
|
+
if srcsets.count.positive?
|
|
158
|
+
srcset = []
|
|
159
|
+
srcsets.each do |src|
|
|
160
|
+
src[:value].split(/ *, */).each do |s|
|
|
161
|
+
image, media = s.split(/ /)
|
|
162
|
+
srcset << {
|
|
163
|
+
src: image,
|
|
164
|
+
media: media
|
|
165
|
+
}
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
output << {
|
|
169
|
+
type: 'srcset',
|
|
170
|
+
attrs: img[:attrs],
|
|
171
|
+
images: srcset
|
|
172
|
+
}
|
|
173
|
+
end
|
|
174
|
+
when /img/
|
|
175
|
+
output << {
|
|
176
|
+
type: 'img',
|
|
177
|
+
src: img[:attrs].filter { |a| a[:key] =~ /src/i }.first[:value],
|
|
178
|
+
attrs: img[:attrs]
|
|
179
|
+
}
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
output
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def to_s
|
|
186
|
+
headers = @headers.nil? ? 0 : @headers.count
|
|
187
|
+
meta = @meta.nil? ? 0 : @meta.count
|
|
188
|
+
links = @links.nil? ? 0 : @links.count
|
|
189
|
+
[
|
|
190
|
+
%(<HTMLCurl: @code="#{@code}" @url="#{@url}" @title="#{@title}"),
|
|
191
|
+
%(@description=#{@description} @headers:#{headers} @meta:#{meta} @links:#{links}>)
|
|
192
|
+
].join(' ')
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
##
|
|
196
|
+
## Return all headers of given level
|
|
197
|
+
##
|
|
198
|
+
## @param level [Number] The level (1-6)
|
|
199
|
+
##
|
|
200
|
+
## @return [Array] array of headers with text and all tag attributes as symbols
|
|
201
|
+
##
|
|
202
|
+
def h(level = '\d')
|
|
203
|
+
res = []
|
|
204
|
+
headlines = @body.to_enum(:scan, %r{<h(?<level>#{level})(?<tag> .*?)?>(?<text>.*?)</h#{level}>}i).map { Regexp.last_match }
|
|
205
|
+
headlines.each do |m|
|
|
206
|
+
headline = { level: m['level'] }
|
|
207
|
+
if m['tag'].nil?
|
|
208
|
+
attrs = nil
|
|
209
|
+
else
|
|
210
|
+
attrs = m['tag'].to_enum(:scan, /(?<attr>\w+)=(?<quot>["'])(?<content>.*?)\k<quot>/).map { Regexp.last_match }
|
|
211
|
+
attrs.each { |a| headline[a['attr'].to_sym] = a['content'] }
|
|
212
|
+
end
|
|
213
|
+
headline[:text] = m['text'].remove_entities
|
|
214
|
+
res << headline
|
|
215
|
+
end
|
|
216
|
+
res
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
private
|
|
220
|
+
|
|
221
|
+
##
|
|
222
|
+
## Flatten the array of tags
|
|
223
|
+
##
|
|
224
|
+
## @param tags [Array] Document tags
|
|
225
|
+
##
|
|
226
|
+
def flatten_tags(tags)
|
|
227
|
+
flattened = []
|
|
228
|
+
|
|
229
|
+
tags.each do |t|
|
|
230
|
+
flattened << { tag: t[:tag], attrs: t[:attrs], content: t[:content] }
|
|
231
|
+
flattened.concat(flatten_tags(t[:tags])) unless t[:tags].nil?
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
flattened
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
##
|
|
238
|
+
## Return an array of all tags in the content
|
|
239
|
+
##
|
|
240
|
+
## @param content [String] The content to parse
|
|
241
|
+
##
|
|
242
|
+
def content_tags(content)
|
|
243
|
+
return nil if content.nil?
|
|
244
|
+
|
|
245
|
+
res = content.to_enum(:scan, %r{(?mix)
|
|
246
|
+
<(?<tag>(?!</)[a-z0-9]+)(?<attrs>\s[^>]+)?
|
|
247
|
+
(?:\s*/>|>(?<content>.*?)</\k<tag>>)}).map { Regexp.last_match }
|
|
248
|
+
res.map do |tag|
|
|
249
|
+
if tag['attrs'].nil?
|
|
250
|
+
attrs = nil
|
|
251
|
+
else
|
|
252
|
+
attrs = tag['attrs'].strip.to_enum(:scan, /(?ix)
|
|
253
|
+
(?<key>[@a-z0-9-]+)(?:=(?<quot>["'])
|
|
254
|
+
(?<value>[^"']+)\k<quot>|[ >])?/i).map { Regexp.last_match }
|
|
255
|
+
attrs.map! { |a| { key: a['key'], value: a['key'] =~ /^(class|rel)$/ ? a['value'].split(/ /) : a['value'] } }
|
|
256
|
+
end
|
|
257
|
+
{
|
|
258
|
+
tag: tag['tag'],
|
|
259
|
+
source: tag.to_s,
|
|
260
|
+
attrs: attrs,
|
|
261
|
+
content: tag['content'],
|
|
262
|
+
tags: content_tags(tag['content'])
|
|
263
|
+
}
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
##
|
|
268
|
+
## Extract all meta tags from the document head
|
|
269
|
+
##
|
|
270
|
+
## @param head [String] The head content
|
|
271
|
+
##
|
|
272
|
+
## @return [Hash] hash of meta tags and values
|
|
273
|
+
##
|
|
274
|
+
def meta_tags(head)
|
|
275
|
+
meta = {}
|
|
276
|
+
title = head.match(%r{(?<=<title>)(.*?)(?=</title>)})
|
|
277
|
+
meta['title'] = title.nil? ? nil : title[1]
|
|
278
|
+
refresh = head.match(/http-equiv=(['"])refresh\1(.*?)>/)
|
|
279
|
+
url = refresh.nil? ? nil : refresh[2].match(/url=(.*?)['"]/)
|
|
280
|
+
meta['refresh_url'] = url
|
|
281
|
+
meta_tags = head.scan(/<meta.*?>/)
|
|
282
|
+
meta_tags.each do |tag|
|
|
283
|
+
meta_name = tag.match(/(?:name|property|http-equiv)=(["'])(.*?)\1/)
|
|
284
|
+
next if meta_name.nil?
|
|
285
|
+
|
|
286
|
+
meta_value = tag.match(/(?:content)=(['"])(.*?)\1/)
|
|
287
|
+
next if meta_value.nil?
|
|
288
|
+
|
|
289
|
+
meta[meta_name[2].downcase] = meta_value[2]
|
|
290
|
+
end
|
|
291
|
+
meta
|
|
292
|
+
rescue StandardError => e
|
|
293
|
+
warn e
|
|
294
|
+
{}
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
##
|
|
298
|
+
## Extract all <link> tags from head
|
|
299
|
+
##
|
|
300
|
+
## @param head [String] The head content
|
|
301
|
+
##
|
|
302
|
+
## @return [Array] Array of links
|
|
303
|
+
##
|
|
304
|
+
def link_tags(head)
|
|
305
|
+
links = []
|
|
306
|
+
link_tags = head.scan(/<link.*?>/)
|
|
307
|
+
link_tags.each do |tag|
|
|
308
|
+
link_rel = tag.match(/rel=(['"])(.*?)\1/)
|
|
309
|
+
link_rel = link_rel.nil? ? nil : link_rel[2]
|
|
310
|
+
|
|
311
|
+
next if link_rel =~ /preload/
|
|
312
|
+
|
|
313
|
+
link_href = tag.match(/href=(["'])(.*?)\1/)
|
|
314
|
+
next if link_href.nil?
|
|
315
|
+
|
|
316
|
+
link_href = link_href[2]
|
|
317
|
+
|
|
318
|
+
link_title = tag.match(/title=(['"])(.*?)\1/)
|
|
319
|
+
link_title = link_title.nil? ? nil : link_title[2]
|
|
320
|
+
|
|
321
|
+
link_type = tag.match(/type=(['"])(.*?)\1/)
|
|
322
|
+
link_type = link_type.nil? ? nil : link_type[2]
|
|
323
|
+
|
|
324
|
+
links << { rel: link_rel, href: link_href, type: link_type, title: link_title }
|
|
325
|
+
end
|
|
326
|
+
links
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
##
|
|
330
|
+
## Get all links in the body of the page
|
|
331
|
+
##
|
|
332
|
+
## @return [Array] array of links with href, title,
|
|
333
|
+
## rel, text and class
|
|
334
|
+
##
|
|
335
|
+
def content_links
|
|
336
|
+
links = []
|
|
337
|
+
link_tags = @body.to_enum(:scan, %r{<a (?<tag>.*?)>(?<text>.*?)</a>}).map { Regexp.last_match }
|
|
338
|
+
link_tags.each do |m|
|
|
339
|
+
href = m['tag'].match(/href=(["'])(.*?)\1/)
|
|
340
|
+
href = href[2] unless href.nil?
|
|
341
|
+
title = m['tag'].match(/title=(["'])(.*?)\1/)
|
|
342
|
+
title = title[2] unless title.nil?
|
|
343
|
+
rel = m['tag'].match(/rel=(["'])(.*?)\1/)
|
|
344
|
+
rel = rel[2] unless rel.nil?
|
|
345
|
+
link_class = m['tag'].match(/class=(["'])(.*?)\1/)
|
|
346
|
+
link_class = link_class[2] unless link_class.nil?
|
|
347
|
+
text = m['text'].remove_entities
|
|
348
|
+
link = {
|
|
349
|
+
href: href,
|
|
350
|
+
title: title,
|
|
351
|
+
rel: rel,
|
|
352
|
+
text: text,
|
|
353
|
+
class: link_class
|
|
354
|
+
}
|
|
355
|
+
links << link
|
|
356
|
+
end
|
|
357
|
+
links
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
##
|
|
361
|
+
## Get all img tags in the body of the page
|
|
362
|
+
##
|
|
363
|
+
## @return [Array] array of images with src and all attributes
|
|
364
|
+
##
|
|
365
|
+
def content_images
|
|
366
|
+
images = []
|
|
367
|
+
image_tags = @body.to_enum(:scan, %r{<img (?<tag>.*?)/?>}).map { Regexp.last_match }
|
|
368
|
+
image_tags.each do |m|
|
|
369
|
+
attrs = m['tag'].to_enum(:scan, /(?<attr>\w+)=(?<quot>["'])(?<content>.*?)\k<quot>/).map { Regexp.last_match }
|
|
370
|
+
image = {}
|
|
371
|
+
attrs.each { |a| image[a['attr'].to_sym] = a['content'] }
|
|
372
|
+
images << image
|
|
373
|
+
end
|
|
374
|
+
images
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
##
|
|
380
|
+
## Curls the html for the page
|
|
381
|
+
##
|
|
382
|
+
## @param url [String] The url
|
|
383
|
+
## @param headers [Hash] The headers
|
|
384
|
+
## @param headers_only [Boolean] Return headers only
|
|
385
|
+
## @param compressed [Boolean] expect compressed results
|
|
386
|
+
##
|
|
387
|
+
## @return [Hash] hash of url, code, headers, meta, links, head, body, and source
|
|
388
|
+
##
|
|
389
|
+
def curl_html(url, headers: nil, headers_only: false, compressed: false)
|
|
390
|
+
flags = 'SsL'
|
|
391
|
+
flags += headers_only ? 'I' : 'i'
|
|
392
|
+
agent = ['Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us)',
|
|
393
|
+
'AppleWebKit/533.17.9 (KHTML, like Gecko)',
|
|
394
|
+
'Version/5.0.2 Mobile/8J2 Safari/6533.18.5'].join(' ')
|
|
395
|
+
headers = headers.nil? ? '' : headers.map { |h, v| %(-H "#{h}: #{v}") }.join(' ')
|
|
396
|
+
compress = compressed ? '--compressed' : ''
|
|
397
|
+
source = `#{@curl} -#{flags} #{compress} #{headers} '#{url}' 2>/dev/null`
|
|
398
|
+
source = `#{@curl} -#{flags} #{compress} -A "#{agent}" #{headers} '#{url}' 2>/dev/null` if source.nil? || source.empty?
|
|
399
|
+
|
|
400
|
+
return false if source.nil? || source.empty?
|
|
401
|
+
|
|
402
|
+
source.strip!
|
|
403
|
+
|
|
404
|
+
headers = {}
|
|
405
|
+
lines = source.split(/\r\n/)
|
|
406
|
+
code = lines[0].match(/(\d\d\d)/)[1]
|
|
407
|
+
lines.shift
|
|
408
|
+
lines.each_with_index do |line, idx|
|
|
409
|
+
if line =~ /^([\w-]+): (.*?)$/
|
|
410
|
+
m = Regexp.last_match
|
|
411
|
+
headers[m[1]] = m[2]
|
|
412
|
+
else
|
|
413
|
+
source = lines[idx..].join("\n")
|
|
414
|
+
break
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
if headers['content-type'] =~ /json/
|
|
419
|
+
return { url: url, code: code, headers: headers, meta: nil, links: nil, head: nil, body: source.strip, source: source.strip, body_links: nil, body_images: nil }
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
head = source.match(%r{(?<=<head>)(.*?)(?=</head>)}mi)
|
|
423
|
+
|
|
424
|
+
if head.nil?
|
|
425
|
+
{ url: url, code: code, headers: headers, meta: nil, links: nil, head: nil, body: source.strip, source: source.strip, body_links: nil, body_images: nil }
|
|
426
|
+
else
|
|
427
|
+
meta = meta_tags(head[1])
|
|
428
|
+
links = link_tags(head[1])
|
|
429
|
+
body = source.match(%r{<body.*?>(.*?)</body>}mi)[1]
|
|
430
|
+
{ url: url, code: code, headers: headers, meta: meta, links: links, head: head[1], body: body, source: source.strip, body_links: body_links, body_images: body_images }
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
##
|
|
435
|
+
## Reencode the content (borrowed from Nokogiri)
|
|
436
|
+
##
|
|
437
|
+
## @param body [String] The body
|
|
438
|
+
## @param content_type [String] Force content type
|
|
439
|
+
##
|
|
440
|
+
def reencode(body, content_type = nil)
|
|
441
|
+
if body.encoding == Encoding::ASCII_8BIT
|
|
442
|
+
encoding = nil
|
|
443
|
+
|
|
444
|
+
# look for a Byte Order Mark (BOM)
|
|
445
|
+
initial_bytes = body[0..2].bytes
|
|
446
|
+
if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
|
447
|
+
encoding = Encoding::UTF_8
|
|
448
|
+
elsif initial_bytes[0..1] == [0xFE, 0xFF]
|
|
449
|
+
encoding = Encoding::UTF_16BE
|
|
450
|
+
elsif initial_bytes[0..1] == [0xFF, 0xFE]
|
|
451
|
+
encoding = Encoding::UTF_16LE
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
# look for a charset in a content-encoding header
|
|
455
|
+
if content_type
|
|
456
|
+
encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
# look for a charset in a meta tag in the first 1024 bytes
|
|
460
|
+
unless encoding
|
|
461
|
+
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, "")
|
|
462
|
+
data.scan(/<meta.*?>/im).each do |meta|
|
|
463
|
+
encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
|
|
464
|
+
end
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
# if all else fails, default to the official default encoding for HTML
|
|
468
|
+
encoding ||= Encoding::ISO_8859_1
|
|
469
|
+
|
|
470
|
+
# change the encoding to match the detected or inferred encoding
|
|
471
|
+
body = body.dup
|
|
472
|
+
begin
|
|
473
|
+
body.force_encoding(encoding)
|
|
474
|
+
rescue ArgumentError
|
|
475
|
+
body.force_encoding(Encoding::ISO_8859_1)
|
|
476
|
+
end
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
body.encode(Encoding::UTF_8)
|
|
480
|
+
end
|
|
481
|
+
end
|
|
482
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Curl
|
|
4
|
+
# Class for CURLing a JSON response
|
|
5
|
+
class Json
|
|
6
|
+
attr_reader :url, :code, :json, :headers
|
|
7
|
+
|
|
8
|
+
##
|
|
9
|
+
## Create a new Curl::Json page object
|
|
10
|
+
##
|
|
11
|
+
## @param url [String] The url to curl
|
|
12
|
+
## @param headers [Hash] The headers to send
|
|
13
|
+
## @param compressed [Boolean] Expect compressed results
|
|
14
|
+
##
|
|
15
|
+
## @return [Curl::Json] Curl::Json object with url, code, parsed json, and response headers
|
|
16
|
+
##
|
|
17
|
+
def initialize(url, headers: nil, compressed: false, symbolize_names: false)
|
|
18
|
+
@curl = TTY::Which.which('curl')
|
|
19
|
+
page = curl_json(url, headers: headers, compressed: compressed, symbolize_names: symbolize_names)
|
|
20
|
+
@url = page[:url]
|
|
21
|
+
@code = page[:code]
|
|
22
|
+
@json = page[:json]
|
|
23
|
+
@headers = page[:headers]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def path(path, json = @json)
|
|
27
|
+
parts = path.split(/./)
|
|
28
|
+
target = json
|
|
29
|
+
parts.each do |part|
|
|
30
|
+
if part =~ /(?<key>[^\[]+)\[(?<int>\d+)\]/
|
|
31
|
+
target = target[key][int.to_i]
|
|
32
|
+
else
|
|
33
|
+
target = target[part]
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
target
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
##
|
|
43
|
+
## Curl the JSON contents
|
|
44
|
+
##
|
|
45
|
+
## @param url [String] The url
|
|
46
|
+
## @param headers [Hash] The headers to send
|
|
47
|
+
## @param compressed [Boolean] Expect compressed results
|
|
48
|
+
##
|
|
49
|
+
## @return [Hash] hash of url, code, headers, and parsed json
|
|
50
|
+
##
|
|
51
|
+
def curl_json(url, headers: nil, compressed: false, symbolize_names: false)
|
|
52
|
+
flags = 'SsLi'
|
|
53
|
+
agent = ['Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us)',
|
|
54
|
+
'AppleWebKit/533.17.9 (KHTML, like Gecko)',
|
|
55
|
+
'Version/5.0.2 Mobile/8J2 Safari/6533.18.5'].join(' ')
|
|
56
|
+
headers = headers.nil? ? '' : headers.map { |h, v| %(-H "#{h}: #{v}") }.join(' ')
|
|
57
|
+
compress = compressed ? '--compressed' : ''
|
|
58
|
+
source = `#{@curl} -#{flags} #{compress} #{headers} '#{url}' 2>/dev/null`
|
|
59
|
+
source = `#{@curl} -#{flags} #{compress} -A "#{agent}" #{headers} '#{url}' 2>/dev/null` if source.nil? || source.empty?
|
|
60
|
+
|
|
61
|
+
return false if source.nil? || source.empty?
|
|
62
|
+
|
|
63
|
+
source.strip!
|
|
64
|
+
|
|
65
|
+
headers = {}
|
|
66
|
+
lines = source.split(/\r\n/)
|
|
67
|
+
code = lines[0].match(/(\d\d\d)/)[1]
|
|
68
|
+
lines.shift
|
|
69
|
+
lines.each_with_index do |line, idx|
|
|
70
|
+
if line =~ /^([\w-]+): (.*?)$/
|
|
71
|
+
m = Regexp.last_match
|
|
72
|
+
headers[m[1]] = m[2]
|
|
73
|
+
else
|
|
74
|
+
source = lines[idx..].join("\n")
|
|
75
|
+
break
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
json = source.strip.force_encoding('utf-8')
|
|
80
|
+
|
|
81
|
+
json.gsub!(/[\u{1F600}-\u{1F6FF}]/, '')
|
|
82
|
+
|
|
83
|
+
{ url: url, code: code, headers: headers, json: JSON.parse(json, symbolize_names: symbolize_names) }
|
|
84
|
+
rescue StandardError => e
|
|
85
|
+
warn e
|
|
86
|
+
warn e.backtrace
|
|
87
|
+
nil
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
module SL
|
|
2
|
+
class SearchLink
|
|
3
|
+
def help_css
|
|
4
|
+
<<~ENDCSS
|
|
5
|
+
body{-webkit-font-smoothing:antialiased;font-family:"Avenir Next",Avenir,"Helvetica Neue",Helvetica,Arial,Verdana,sans-serif;
|
|
6
|
+
margin:30px 0 0;padding:0;background:#fff;color:#303030;font-size:16px;line-height:1.5;text-align:center}h1{color:#000}
|
|
7
|
+
h2{color:#111}p,td,div{color:#111;font-family:"Avenir Next",Avenir,"Helvetica Neue",Helvetica,Arial,Verdana,sans-serif;
|
|
8
|
+
word-wrap:break-word}a{color:#de5456;text-decoration:none;-webkit-transition:color .2s ease-in-out;
|
|
9
|
+
-moz-transition:color .2s ease-in-out;-o-transition:color .2s ease-in-out;-ms-transition:color .2s ease-in-out;
|
|
10
|
+
transition:color .2s ease-in-out}a:hover{color:#3593d9}h1,h2,h3,h4,h5{margin:2.75rem 0 2rem;font-weight:500;line-height:1.15}
|
|
11
|
+
h1{margin-top:0;font-size:2em}h2{font-size:1.7em}ul,ol,pre,table,blockquote{margin-top:2em;margin-bottom:2em}
|
|
12
|
+
caption,col,colgroup,table,tbody,td,tfoot,th,thead,tr{border-spacing:0}table{border:1px solid rgba(0,0,0,0.25);
|
|
13
|
+
border-collapse:collapse;display:table;empty-cells:hide;margin:-1px 0 1.3125em;padding:0;table-layout:fixed;margin:0 auto}
|
|
14
|
+
caption{display:table-caption;font-weight:700}col{display:table-column}colgroup{display:table-column-group}
|
|
15
|
+
tbody{display:table-row-group}tfoot{display:table-footer-group}thead{display:table-header-group}
|
|
16
|
+
td,th{display:table-cell}tr{display:table-row}table th,table td{font-size:1.2em;line-height:1.3;padding:.5em 1em 0}
|
|
17
|
+
table thead{background:rgba(0,0,0,0.15);border:1px solid rgba(0,0,0,0.15);border-bottom:1px solid rgba(0,0,0,0.2)}
|
|
18
|
+
table tbody{background:rgba(0,0,0,0.05)}table tfoot{background:rgba(0,0,0,0.15);border:1px solid rgba(0,0,0,0.15);
|
|
19
|
+
border-top:1px solid rgba(0,0,0,0.2)}p{font-size:1.1429em;line-height:1.72em;margin:1.3125em 0}dt,th{font-weight:700}
|
|
20
|
+
table tr:nth-child(odd),table th:nth-child(odd),table td:nth-child(odd){background:rgba(255,255,255,0.06)}
|
|
21
|
+
table tr:nth-child(even),table td:nth-child(even){background:rgba(200,200,200,0.25)}
|
|
22
|
+
input[type=text] {padding: 5px;border-radius: 5px;border: solid 1px #ccc;font-size: 20px;}
|
|
23
|
+
ENDCSS
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def help_js
|
|
27
|
+
<<~EOJS
|
|
28
|
+
function filterTable() {
|
|
29
|
+
let input, filter, table, tr, i, txtValue;
|
|
30
|
+
input = document.getElementById("filter");
|
|
31
|
+
filter = input.value.toUpperCase();
|
|
32
|
+
table = document.getElementById("searches");
|
|
33
|
+
table2 = document.getElementById("custom");
|
|
34
|
+
|
|
35
|
+
tr = table.getElementsByTagName("tr");
|
|
36
|
+
|
|
37
|
+
for (i = 0; i < tr.length; i++) {
|
|
38
|
+
txtValue = tr[i].textContent || tr[i].innerText;
|
|
39
|
+
if (txtValue.toUpperCase().indexOf(filter) > -1) {
|
|
40
|
+
tr[i].style.display = "";
|
|
41
|
+
} else {
|
|
42
|
+
tr[i].style.display = "none";
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
tr = table2.getElementsByTagName("tr");
|
|
47
|
+
|
|
48
|
+
for (i = 0; i < tr.length; i++) {
|
|
49
|
+
txtValue = tr[i].textContent || tr[i].innerText;
|
|
50
|
+
if (txtValue.toUpperCase().indexOf(filter) > -1) {
|
|
51
|
+
tr[i].style.display = "";
|
|
52
|
+
} else {
|
|
53
|
+
tr[i].style.display = "none";
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
EOJS
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def help_text
|
|
61
|
+
text = <<~EOHELP
|
|
62
|
+
-- [Available searches] -------------------
|
|
63
|
+
#{SL::Searches.available_searches}
|
|
64
|
+
EOHELP
|
|
65
|
+
|
|
66
|
+
if SL.config['custom_site_searches']
|
|
67
|
+
text += "\n-- [Custom Searches] ----------------------\n"
|
|
68
|
+
SL.config['custom_site_searches'].sort_by { |l, s| l }.each { |label, site| text += "!#{label}#{label.spacer} #{site}\n" }
|
|
69
|
+
end
|
|
70
|
+
text
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def help_html
|
|
74
|
+
out = ['<input type="text" id="filter" onkeyup="filterTable()" placeholder="Filter searches">']
|
|
75
|
+
out << '<h2>Available Searches</h2>'
|
|
76
|
+
out << SL::Searches.available_searches_html
|
|
77
|
+
out << '<h2>Custom Searches</h2>'
|
|
78
|
+
out << '<table id="custom">'
|
|
79
|
+
out << '<thead><td>Shortcut</td><td>Search Type</td></thead>'
|
|
80
|
+
out << '<tbody>'
|
|
81
|
+
SL.config['custom_site_searches'].each { |label, site| out << "<tr><td><code>!#{label}</code></td><td>#{site}</td></tr>" }
|
|
82
|
+
out << '</tbody>'
|
|
83
|
+
out << '</table>'
|
|
84
|
+
out.join("\n")
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def help_dialog
|
|
88
|
+
text = ["<html><head><style>#{help_css}</style><script>#{help_js}</script></head><body>"]
|
|
89
|
+
text << '<h1>SearchLink Help</h1>'
|
|
90
|
+
text << "<p>[#{SL.version_check}] [<a href='https://github.com/ttscoff/searchlink/wiki'>Wiki</a>]</p>"
|
|
91
|
+
text << help_html
|
|
92
|
+
text << '<p><a href="https://github.com/ttscoff/searchlink/wiki">Visit the wiki</a> for additional information</p>'
|
|
93
|
+
text << '</body>'
|
|
94
|
+
html_file = File.expand_path('~/.searchlink_searches.html')
|
|
95
|
+
File.open(html_file, 'w') { |f| f.puts text.join("\n") }
|
|
96
|
+
`open #{html_file}`
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def help_cli
|
|
100
|
+
$stdout.puts help_text
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|