siefca-htsucker 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/htsucker.rb +34 -0
- data/lib/htsucker/htsucker.rb +468 -0
- metadata +73 -0
data/lib/htsucker.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
# HTTP loading and transliteration
|
4
|
+
#
|
5
|
+
# Author:: Paweł Wilk (mailto:pw@gnu.org)
|
6
|
+
# Copyright:: Copyright (c) 2009 Paweł Wilk
|
7
|
+
# License:: LGPL
|
8
|
+
|
9
|
+
require 'iconv'
|
10
|
+
require 'htmlentities'
|
11
|
+
require 'net/http'
|
12
|
+
require 'net/https'
|
13
|
+
require 'timeout'
|
14
|
+
require 'uri'
|
15
|
+
|
16
|
+
require 'bufferaffects'
|
17
|
+
require './htsucker/domains_to_languages'
|
18
|
+
require './htsucker/htsucker'
|
19
|
+
|
20
|
+
# testing:
|
21
|
+
|
22
|
+
sites = []
|
23
|
+
sites << 'wykop.pl/wykopalisko'
|
24
|
+
sites << 'poland.com'
|
25
|
+
sites << 'hyperreal.info'
|
26
|
+
sites << 'grono.net'
|
27
|
+
sites << 'google.pl'
|
28
|
+
sites << 'randomseed.pl'
|
29
|
+
sites << 'heise-online.de'
|
30
|
+
|
31
|
+
sites.each do |site|
|
32
|
+
pa = HTSucker.new(site)
|
33
|
+
puts "#{pa.real_url}: #{pa.language} #{pa.charset}"
|
34
|
+
end
|
@@ -0,0 +1,468 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
class HTSucker
|
5
|
+
|
6
|
+
include DomainsToLanguages
|
7
|
+
include BufferAffects
|
8
|
+
|
9
|
+
buffers_reset_method :reset_buffers
|
10
|
+
attr_affects_buffers :url
|
11
|
+
|
12
|
+
attr_reader :url
|
13
|
+
|
14
|
+
# Default options are matrix for defaults used by class method HTSucker.default_options
|
15
|
+
# while setting up class variable @@default_options which is used by instances as a
|
16
|
+
# matrix for options not given when creating new objects.
|
17
|
+
|
18
|
+
DefaultOpts = { :redir_retry => 5,
|
19
|
+
:conn_retry => 8,
|
20
|
+
:total_retry => 2,
|
21
|
+
:read_timeout => 15,
|
22
|
+
:total_timeout => 30,
|
23
|
+
:allow_strange_ports => false,
|
24
|
+
:max_length => 524288 }.freeze
|
25
|
+
|
26
|
+
# Creates new instance of HTSucker. +url+ parameter should be valid URI object or string.
|
27
|
+
# You may want to override defaults by issuing hash containing options you want to be changed.
|
28
|
+
|
29
|
+
def initialize(url, options=nil)
|
30
|
+
default_options = self.class.default_options.dup
|
31
|
+
if options.respond_to?(:keys)
|
32
|
+
unknown = (options.keys - default_options.keys).join(', ')
|
33
|
+
raise ArgumentError.new("unknown options: #{unknown}") unless unknown.empty?
|
34
|
+
default_options.merge!(options)
|
35
|
+
end
|
36
|
+
default_options.each_pair do |opt_name,opt_value|
|
37
|
+
instance_variable_set("@#{opt_name}", opt_value)
|
38
|
+
end
|
39
|
+
reset_buffers
|
40
|
+
@http_req = nil
|
41
|
+
self.url = url
|
42
|
+
end
|
43
|
+
|
44
|
+
# Resets charset and response buffers.
|
45
|
+
|
46
|
+
def reset_buffers
|
47
|
+
@charset = nil
|
48
|
+
@content_type = nil
|
49
|
+
@response = nil
|
50
|
+
@overflow = 0
|
51
|
+
@real_url = nil
|
52
|
+
end
|
53
|
+
|
54
|
+
# Sets new url.
|
55
|
+
|
56
|
+
def url=(url)
|
57
|
+
url = URI.parse(url) unless url.kind_of? URI
|
58
|
+
url = URI.parse("http://#{url.to_s}") if url.is_a?(URI::Generic)
|
59
|
+
url.path = '/' if url.path.nil? || url.path.empty?
|
60
|
+
validate_url(url)
|
61
|
+
@url = url
|
62
|
+
@url.freeze
|
63
|
+
@http_req = Net::HTTP::Head.new(@url.path)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Returns top-level domain for URL.
|
67
|
+
|
68
|
+
def domain
|
69
|
+
self.url.host.split('.').last.downcase.to_sym
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns top-level domain for real URL.
|
73
|
+
|
74
|
+
def real_domain
|
75
|
+
self.real_url.host.split('.').last.downcase.to_sym
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns resource path.
|
79
|
+
def path; url.path end
|
80
|
+
|
81
|
+
# Returns real resource path.
|
82
|
+
def real_path; real_url.path end
|
83
|
+
|
84
|
+
# Returns hostname.
|
85
|
+
def host; url.host end
|
86
|
+
|
87
|
+
# Returns real hostname.
|
88
|
+
def real_host; real_url.host end
|
89
|
+
|
90
|
+
# Returns used port.
|
91
|
+
def port; url.port end
|
92
|
+
|
93
|
+
# Returns real port.
|
94
|
+
def real_port; real_url.port end
|
95
|
+
|
96
|
+
# Returns protocol.
|
97
|
+
def protocol; url.class.name.split('::').last.downcase.to_sym end
|
98
|
+
|
99
|
+
# Returns real protocol.
|
100
|
+
def real_protocol; real_url.class.name.split('::').last.downcase.to_sym end
|
101
|
+
|
102
|
+
# Returns page charset.
|
103
|
+
|
104
|
+
def charset
|
105
|
+
@content_type, @charset = get_page_info if @charset.nil?
|
106
|
+
return @charset
|
107
|
+
end
|
108
|
+
|
109
|
+
def content_charset; charset end
|
110
|
+
def content_charset=(x) charset=(x) end
|
111
|
+
|
112
|
+
# Returns page content-type.
|
113
|
+
|
114
|
+
def content_type
|
115
|
+
@content_type, @charset = get_page_info if @content_type.nil?
|
116
|
+
return @content_type
|
117
|
+
end
|
118
|
+
|
119
|
+
# Returns major name of the content-type or nil if something went wrong.
|
120
|
+
|
121
|
+
def content_type_major
|
122
|
+
ctype = self.content_type.to_s
|
123
|
+
return nil if ctype.empty?
|
124
|
+
ctype = ctype.split('/').first
|
125
|
+
return nil if ctype.to_s.empty?
|
126
|
+
return ctype.to_sym
|
127
|
+
end
|
128
|
+
|
129
|
+
# Returns minor name of the content-type or nil if something went wrong.
|
130
|
+
|
131
|
+
def content_type_minor
|
132
|
+
ctype = self.content_type.to_s
|
133
|
+
return nil if ctype.empty?
|
134
|
+
ctype = ctype.split('/')[1]
|
135
|
+
return nil if ctype.to_s.empty?
|
136
|
+
return ctype.to_sym
|
137
|
+
end
|
138
|
+
|
139
|
+
def validate_url(url)
|
140
|
+
raise HTSuckerBadURI.new("malformed URI") if url.to_s.empty?
|
141
|
+
u_protocol = url.class.name.split('::').last.upcase
|
142
|
+
unless ['HTTP','HTTPS'].include?(u_protocol)
|
143
|
+
raise HTSuckerBadProtocol.new("bad protocol: #{u_protocol}")
|
144
|
+
end
|
145
|
+
unless @allow_strange_ports
|
146
|
+
if ((u_protocol == 'HTTP' && url.port != 80) ||
|
147
|
+
(u_protocol == 'HTTPS' && url.port != 443))
|
148
|
+
raise HTSuckerBadPort.new("strange port number: #{url.port}")
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
private :validate_url
|
153
|
+
|
154
|
+
# Translates top-level domain to spoken language code.
|
155
|
+
|
156
|
+
def domain_to_spoken
|
157
|
+
lang = nil
|
158
|
+
enc = self.content_charset.to_s[0..2].downcase.to_sym
|
159
|
+
national_encodings = [:iso, :win, :"cp-", :koi, :utf]
|
160
|
+
if national_encodings.include?(enc)
|
161
|
+
lang = @@domain_to_language[self.real_domain] if real_domain.length == 2
|
162
|
+
end
|
163
|
+
return lang
|
164
|
+
end
|
165
|
+
private :domain_to_spoken
|
166
|
+
|
167
|
+
# Returns content-language or default content language.
|
168
|
+
|
169
|
+
def content_language(default_content_lanuage='en')
|
170
|
+
clang = nil
|
171
|
+
|
172
|
+
if self.response.nil?
|
173
|
+
clang = domain_to_spoken
|
174
|
+
return default_content_lanuage
|
175
|
+
end
|
176
|
+
|
177
|
+
# try meta-tag header
|
178
|
+
unless self.body.to_s.empty? || self.content_type_major != :text
|
179
|
+
header = body.scan(/<meta http-equiv\s*=\s*['"]*content-language['"]*\s*content\s*=\s*['"]*\s*(.*?)\s*['"]*\s*\/?>/i)
|
180
|
+
header = header.flatten.first
|
181
|
+
clang = extract_content_language(header)
|
182
|
+
end
|
183
|
+
|
184
|
+
# try lang and xml:lang attribute from HTML tag and do the same for body tag
|
185
|
+
if clang.to_s.empty? && !self.body.to_s.empty? && self.content_type_major == :text
|
186
|
+
header = body.scan(/<x?html\s.*?\s+?lang\s*?=["']*([^"']+).*?\/?>/i)
|
187
|
+
header = header.flatten.first
|
188
|
+
if header.to_s.empty?
|
189
|
+
header = body.scan(/<x?html\s.*?\s+?xml:lang\s*?=["']*([^"']+).*?\/?>/i)
|
190
|
+
header = header.flatten.first
|
191
|
+
end
|
192
|
+
if header.to_s.empty?
|
193
|
+
header = body.scan(/<body\s.*?\s+?lang\s*?=["']*([^"']+).*?\/?>/i)
|
194
|
+
header = header.flatten.first
|
195
|
+
end
|
196
|
+
if header.to_s.empty?
|
197
|
+
header = body.scan(/<body\s.*?\s+?xml:lang\s*?=["']*([^"']+).*?\/?>/i)
|
198
|
+
header = header.flatten.first
|
199
|
+
end
|
200
|
+
clang = extract_content_language(header)
|
201
|
+
end
|
202
|
+
|
203
|
+
# try server header and in case of 'en' or empty try to figure language by looking at top-domain
|
204
|
+
if clang.to_s.empty? && response.respond_to?(:header)
|
205
|
+
header = response.header['content-language']
|
206
|
+
clang = extract_content_language(header)
|
207
|
+
present = clang.to_s
|
208
|
+
clang = domain_to_spoken if (present.empty? || present[0..1] == 'en')
|
209
|
+
clang = present if (clang.to_s.empty? && !present.empty?)
|
210
|
+
end
|
211
|
+
|
212
|
+
# try default
|
213
|
+
clang = default_content_lanuage if clang.to_s.empty?
|
214
|
+
|
215
|
+
return clang
|
216
|
+
end
|
217
|
+
|
218
|
+
def language; content_language end
|
219
|
+
def lang; content_language end
|
220
|
+
|
221
|
+
# Obtains charset from document body or server response header.
|
222
|
+
|
223
|
+
def get_page_info(default_content_type='text/html', default_charset='ascii')
|
224
|
+
return [default_content_type, default_charset] if self.response.nil?
|
225
|
+
|
226
|
+
# try meta-tag header
|
227
|
+
enc = nil
|
228
|
+
ctype = nil
|
229
|
+
|
230
|
+
# try server header first time to see if we even can analyze the content
|
231
|
+
if response.respond_to?(:header)
|
232
|
+
header = response.header['content-type']
|
233
|
+
first_ctype = extract_content_type(header).to_s.split('/').first
|
234
|
+
end
|
235
|
+
|
236
|
+
unless (self.body.to_s.empty? || first_ctype != 'text')
|
237
|
+
header = body.scan(/<meta http-equiv\s*=\s*['"]*content-type['"]*\s*content\s*=\s*['"]*\s*(.*?)\s*['"]*\s*\/?>/i)
|
238
|
+
header = header.flatten.first
|
239
|
+
enc = extract_charset(header)
|
240
|
+
ctype = extract_content_type(header)
|
241
|
+
end
|
242
|
+
|
243
|
+
# try server header
|
244
|
+
if (ctype.to_s.empty? && response.respond_to?(:header))
|
245
|
+
header = response.header['content-type']
|
246
|
+
ctype = extract_content_type(header)
|
247
|
+
enc = extract_charset(header) if enc.to_s.empty? # weird but may happend (page with charset encoding but without type)
|
248
|
+
end
|
249
|
+
|
250
|
+
# try defaults
|
251
|
+
enc = default_charset if enc.to_s.empty?
|
252
|
+
ctype = default_content_type if ctype.to_s.empty?
|
253
|
+
|
254
|
+
return [ctype, enc]
|
255
|
+
end
|
256
|
+
private :get_page_info
|
257
|
+
|
258
|
+
# Extracts charset from content-type string.
|
259
|
+
|
260
|
+
def extract_charset(enc_string)
|
261
|
+
return nil if enc_string.nil? || enc_string.empty?
|
262
|
+
ret_enc = nil
|
263
|
+
ct = enc_string.chomp.downcase.squeeze(' ')
|
264
|
+
unless ct.nil?
|
265
|
+
ctary = {}
|
266
|
+
ct.split(';').each do |segment|
|
267
|
+
k,v = segment.split('=')
|
268
|
+
ctary[k.strip.to_sym] = v unless (k.nil? || v.nil?)
|
269
|
+
end
|
270
|
+
if ctary.has_key?(:charset)
|
271
|
+
begin
|
272
|
+
test_enc = ctary[:charset]
|
273
|
+
test_enc = 'utf-8' if test_enc == 'utf8'
|
274
|
+
ret_enc = Encoding.find(test_enc)
|
275
|
+
ret_enc = ret_enc.name
|
276
|
+
rescue ArgumentError
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
ret_enc = nil if ret_enc.nil? || ret_enc.squeeze(" ").empty?
|
281
|
+
return ret_enc.to_s.downcase.to_sym
|
282
|
+
end
|
283
|
+
private :extract_charset
|
284
|
+
|
285
|
+
# Extracts content-type from content-type string.
|
286
|
+
|
287
|
+
def extract_content_type(ctype_string)
|
288
|
+
return nil if ctype_string.to_s.empty?
|
289
|
+
ct = ctype_string.chomp.squeeze(' ').split(';').first
|
290
|
+
ct = ct.strip.downcase.to_sym unless ct.nil?
|
291
|
+
return ct
|
292
|
+
end
|
293
|
+
private :extract_content_type
|
294
|
+
|
295
|
+
# Extracts content-language from content-language string.
|
296
|
+
|
297
|
+
def extract_content_language(ltype_string)
|
298
|
+
return nil if ltype_string.to_s.empty?
|
299
|
+
lt = ltype_string.chomp.squeeze(' ').split(';').first.split(',').first
|
300
|
+
lt = lt.strip.downcase.to_sym unless lt.nil?
|
301
|
+
return lt
|
302
|
+
end
|
303
|
+
private :extract_content_language
|
304
|
+
|
305
|
+
|
306
|
+
# Fetches document using HTTP and returns response object. It also sets charset.
|
307
|
+
|
308
|
+
def response
|
309
|
+
return @response unless @response.nil?
|
310
|
+
url = @url
|
311
|
+
found = false
|
312
|
+
response = nil
|
313
|
+
@real_url = nil
|
314
|
+
http_req = @http_req
|
315
|
+
redir_retry = @redir_retry
|
316
|
+
conn_retry = @conn_retry
|
317
|
+
|
318
|
+
until found do
|
319
|
+
begin
|
320
|
+
status = Timeout::timeout(@timeout) do
|
321
|
+
case url.scheme.downcase.to_sym
|
322
|
+
when :http
|
323
|
+
response = Net::HTTP.start(url.host, url.port) { |http| http.request(http_req) }
|
324
|
+
when :https
|
325
|
+
https = Net::HTTP.new(url.host, url.port)
|
326
|
+
https.use_ssl = true
|
327
|
+
https.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
328
|
+
response = https.start { |http| http.request(http_req) }
|
329
|
+
else
|
330
|
+
return nil
|
331
|
+
end
|
332
|
+
end
|
333
|
+
response.value
|
334
|
+
rescue Net::HTTPRetriableError
|
335
|
+
conn_retry -= 1
|
336
|
+
if response.respond_to?(:header) && !response.header['location'].nil? && !response.header['location'].empty?
|
337
|
+
url = URI.parse(response.header['location'])
|
338
|
+
validate_url(url)
|
339
|
+
http_req = Net::HTTP::Head.new(url.path)
|
340
|
+
redir_retry -= 1
|
341
|
+
end
|
342
|
+
rescue
|
343
|
+
return nil
|
344
|
+
end
|
345
|
+
if response.kind_of?(Net::HTTPOK)
|
346
|
+
found = true
|
347
|
+
break
|
348
|
+
end
|
349
|
+
break if (redir_retry < 0 || conn_retry < 0)
|
350
|
+
end
|
351
|
+
if found
|
352
|
+
@real_url = url
|
353
|
+
@response = response
|
354
|
+
@content_length = response.header['content-length'].to_s.to_i
|
355
|
+
if @content_length > @max_length
|
356
|
+
raise HTSuckerContentTooBig.new("content length (#{@content_length}) is greater than declared limit (#{@max_length})")
|
357
|
+
end
|
358
|
+
openuri_opts = { :redirect=>false, :read_timeout=>false }
|
359
|
+
resource = open(@real_url.to_s, openuri_opts)
|
360
|
+
resource.read(@max_length)
|
361
|
+
@content_type, @charset = get_page_info(nil,nil) # using just server headers
|
362
|
+
|
363
|
+
return response
|
364
|
+
else
|
365
|
+
return nil
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
# Returns document body.
|
370
|
+
|
371
|
+
def body
|
372
|
+
r = self.response
|
373
|
+
return r.respond_to?(:body) ? r.body : nil
|
374
|
+
end
|
375
|
+
|
376
|
+
# Alias for body.
|
377
|
+
|
378
|
+
def fetch(*args); body(*args) end
|
379
|
+
|
380
|
+
# Returns URL used while obtaining content (e.g. after redirection).
|
381
|
+
|
382
|
+
def real_url
|
383
|
+
return nil if self.response.nil?
|
384
|
+
return @real_url
|
385
|
+
end
|
386
|
+
|
387
|
+
# Strips HTML tags from document.
|
388
|
+
|
389
|
+
def strip_html(text=nil)
|
390
|
+
text ||= self.body
|
391
|
+
@coder ||= HTMLEntities.new
|
392
|
+
r = text.tr("\t", ' ')
|
393
|
+
r.tr!("\r", '')
|
394
|
+
r.sub!(%r{<body.*?>(.*?)</body>}mi, '\1')
|
395
|
+
r.gsub!(%r{<script.*?>(.*?)</script>}mi, ' ')
|
396
|
+
r.gsub!(%r{<style.*?>(.*?)</style>}mi, ' ')
|
397
|
+
r.gsub!(%r{<!--.*?-->}mi, ' ')
|
398
|
+
r.gsub!(/<br\s*\/?>|<p>/mi, "\n")
|
399
|
+
r.gsub!(/<.*?>/m, '')
|
400
|
+
return coder.decode(r)
|
401
|
+
end
|
402
|
+
|
403
|
+
# Transliterates text to ASCII and removes unknown characters.
|
404
|
+
|
405
|
+
def clean_text(text=nil, enc=nil)
|
406
|
+
text ||= self.body
|
407
|
+
enc ||= self.charset
|
408
|
+
@transliterator ||= Iconv.new('ASCII//TRANSLIT//IGNORE', 'UTF-8')
|
409
|
+
page = Iconv.iconv('UTF-8//IGNORE', enc, text).join
|
410
|
+
page = strip_html(page)
|
411
|
+
page.gsub!(/['`]/m, '_amp__')
|
412
|
+
page = @transliterator.conv(page).downcase
|
413
|
+
page.tr!(".!?", ' ')
|
414
|
+
page.gsub!(/[^\x00-\x7F]+/, '')
|
415
|
+
page.gsub!(/[^a-z0-9\-_\[\]\(\)\*\=\@\#\$\%\^\&\{\}\:\;\,\<\>\+\s\n\.\!\?]+/im, '')
|
416
|
+
page.gsub!('_amp__',"'")
|
417
|
+
page.squeeze!(" \n")
|
418
|
+
page.gsub!(/^\s?\n\s?$/m, '')
|
419
|
+
page.gsub!(/\n\s/,"\n")
|
420
|
+
page.gsub!(/\s\n/,"\n")
|
421
|
+
page.gsub!(/^\s+/,'')
|
422
|
+
page.gsub!(/(^|\s)\'+(.*?)\'+(\s|$)/m,'\1\2\3')
|
423
|
+
page.gsub!(/(^|\s)\'+(\s|$)/, '')
|
424
|
+
page.squeeze!("\n ")
|
425
|
+
return page
|
426
|
+
end
|
427
|
+
|
428
|
+
def clean; clean_text end
|
429
|
+
|
430
|
+
# Transliterates text to ASCII and removes unknown characters leaving just words.
|
431
|
+
|
432
|
+
def clean_words(text=nil, enc=nil)
|
433
|
+
cw = clean_text(text, enc)
|
434
|
+
cw.gsub!(/\[\s*?[^\:]+?\:\/+?.*?\]/mi, ' ')
|
435
|
+
cw.gsub!(/\[\s*?(\d|\s|[^\w])+\]/mi, ' ')
|
436
|
+
cw.gsub!(/[^a-z0-9]+/im, ' ')
|
437
|
+
cw.squeeze!(' ')
|
438
|
+
return cw
|
439
|
+
end
|
440
|
+
|
441
|
+
# Transliterates text to ASCII, removes unknown characters and returns array of words.
|
442
|
+
|
443
|
+
def words
|
444
|
+
self.clean_words.split(' ')
|
445
|
+
end
|
446
|
+
|
447
|
+
# Use this class method to set up default options used when creating new objects.
|
448
|
+
# For each option that you omit it will be taken from constant hash called DefaultOpts.
|
449
|
+
# Default options hash is stored in @@default_options. This method will return current
|
450
|
+
# default options when called without parameter.
|
451
|
+
|
452
|
+
def self.default_options(opts=nil)
|
453
|
+
@@default_options ||= DefaultOpts.dup
|
454
|
+
return @@default_options.freeze if opts.nil?
|
455
|
+
if opts.respond_to?(:keys)
|
456
|
+
known_opts = DefaultOpts.keys
|
457
|
+
unknown = (opts.keys - known_opts).join(', ')
|
458
|
+
raise ArgumentError.new("unknown options: #{unknown}") unless unknown.empty?
|
459
|
+
@@default_options.unfreeze
|
460
|
+
@@default_options.merge!(opts)
|
461
|
+
return @@default_options.freeze
|
462
|
+
else
|
463
|
+
raise ArgumentError.new("malformed options")
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
end
|
468
|
+
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: siefca-htsucker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- "Pawe\xC5\x82 Wilk"
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-04-28 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: htmlentities
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: bufferaffects
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
description: HTSucker is simple HTTP(S) reader with ability to transliterate body
|
36
|
+
email: pw@gnu.org
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files: []
|
42
|
+
|
43
|
+
files:
|
44
|
+
- lib/htsucker.rb
|
45
|
+
- lib/htsucker/htsucker.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://randomseed.pl/htsucker
|
48
|
+
post_install_message:
|
49
|
+
rdoc_options: []
|
50
|
+
|
51
|
+
require_paths:
|
52
|
+
- lib
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: "0"
|
58
|
+
version:
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: "0"
|
64
|
+
version:
|
65
|
+
requirements: []
|
66
|
+
|
67
|
+
rubyforge_project:
|
68
|
+
rubygems_version: 1.2.0
|
69
|
+
signing_key:
|
70
|
+
specification_version: 2
|
71
|
+
summary: HTSucker is simple HTTP(S) reader with ability to transliterate body
|
72
|
+
test_files: []
|
73
|
+
|