hertools 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 20f15df456f2d19d57142b6e67310c78cf4d7a80a08d4f3e17ca07764e7bb38f
4
- data.tar.gz: 0e0ac2aff27cc5303d8a3ed93e44c075c6bad37cba541f00565eeb00f4f3cb85
3
+ metadata.gz: '05092964905a17f8eeefae0d133e2ea5ba85db714759ccc42d173b4ec8a5dbbb'
4
+ data.tar.gz: 1f0a3befe399d98d0c0cccb543a92e6b1674675e898a7c4a437ae7b91e9998c6
5
5
  SHA512:
6
- metadata.gz: 14a9a96095ad1a146abb92cb5c230366639cb2a4e490822ca8bedd9e6311c9ded6932d500c6b8b6025916d9c874dcef7fb2330c6b3b615cdd122c50990c83b0f
7
- data.tar.gz: ab279755ab77ff157d39a9d6ae5317837906c8bf81ecd3d95ee7d92580fe1cafec49f96198ba28f4728082ac76f9526854a0c838a9cf0f47e3e33a29ea526034
6
+ metadata.gz: 43a337be463c80fd54caba8f05feb63f638f2208a671c45827ce331114f1e40209a68d353feebb302115f91721201bd818b783d628a77b7c1d208725922a4bcc
7
+ data.tar.gz: 7d0824dc195b48c560009d1a1dba29fc2a479b2cf6a9d8327d769005109696fce926f8184370d6c32ced34adaeb24fdc20051ef84fc63bcdf18d3b9df006f035
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Hertools
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.1'
5
5
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'base64'
4
4
  require 'open-uri'
5
+ require 'net/http'
5
6
  require 'httparty'
6
7
  require 'htmlentities'
7
8
  require 'nokogiri'
@@ -13,7 +14,7 @@ module Hertools
13
14
  # Arguments
14
15
  # url: the url of a webpage
15
16
  # options:
16
- # html_parser: %w[httparty nokogiri]
17
+ # html_parser: %w[httparty nokogiri net_http]
17
18
  # root_path: existing file directory
18
19
  def crawl_title_and_favicon_file(url, options = {})
19
20
  puts '>>> Parsing the arguments <<<'
@@ -27,10 +28,8 @@ module Hertools
27
28
  end
28
29
 
29
30
  puts '>>> Analysing the http response <<<'
30
- if httparty?
31
- response = HTTParty.get(@url)
32
- res = response.body
33
- else
31
+ case @html_parser
32
+ when 'nokogiri'
34
33
  response = HTTParty.head(@url)
35
34
  res = begin
36
35
  Nokogiri::HTML(URI.open(url), nil, 'UTF-8')
@@ -38,6 +37,12 @@ module Hertools
38
37
  puts e
39
38
  nil
40
39
  end
40
+ when 'httparty'
41
+ response = HTTParty.get(@url)
42
+ res = response.body
43
+ else
44
+ response = Net::HTTP.get_response(URI(@url))
45
+ res = response.body.force_encoding("utf-8")
41
46
  end
42
47
  puts "HttpCode: #{response.code}"
43
48
 
@@ -47,27 +52,27 @@ module Hertools
47
52
  @favicon_url = "#{@index_url}/favicon.ico"
48
53
  puts "Use the default favicon url: #{@favicon_url}."
49
54
  else
50
- @title = if httparty?
51
- res[%r{<title>\n*(.*)\n*</title>}, 1].to_s
52
- else
55
+ @title = if nokogiri?
53
56
  res.xpath('//head/title')[0]&.content.to_s
57
+ else
58
+ res[%r{<title>\n*(.*)\n*</title>}, 1].to_s
54
59
  end
55
60
  if @title.empty?
56
61
  puts 'Not found the title!'
57
62
  puts 'Use the domain name as the title.'
58
63
  @title = @domain_name
59
64
  end
60
- if httparty?
65
+ unless nokogiri?
61
66
  coder = HTMLEntities.new
62
67
  @title = coder.decode(@title)
63
68
  end
64
69
  puts "Title: #{@title}"
65
70
 
66
- @favicon_url = if httparty?
67
- res[/<link rel="icon".*href="([^"]+)/, 1].to_s
68
- else
71
+ @favicon_url = if nokogiri?
69
72
  favicon_links = res.xpath('//head/link[@rel="icon"]')
70
73
  favicon_links.empty? ? '' : favicon_links[0][:href].to_s
74
+ else
75
+ res[/<link rel="icon".*href="([^"]+)/, 1].to_s
71
76
  end
72
77
  if @favicon_url.empty?
73
78
  puts 'Not found the favicon url!'
@@ -147,10 +152,10 @@ module Hertools
147
152
 
148
153
  def parse_options(options)
149
154
  options = Hash(options)
150
- html_parser = options.fetch(:html_parser) { 'httparty' }
155
+ html_parser = options.fetch(:html_parser) { 'net_http' }
151
156
  @old_html_parser = @html_parser
152
- @html_parser = %w[httparty nokogiri].include?(html_parser) ? html_parser : 'httparty'
153
- reset_httparty_judge_result if @old_html_parser != @html_parser
157
+ @html_parser = %w[httparty nokogiri].include?(html_parser) ? html_parser : 'net_http'
158
+ rejudge_nokogiri if @old_html_parser != @html_parser
154
159
  puts "HtmlParser: #{@html_parser}"
155
160
  root_path = options.fetch(:root_path) { Dir.pwd }
156
161
  @root_path = (File.directory?(root_path) ? root_path : Dir.pwd).chomp('/')
@@ -161,12 +166,12 @@ module Hertools
161
166
  false
162
167
  end
163
168
 
164
- def httparty?
165
- @httparty_judge_result ||= @html_parser == 'httparty'
169
+ def nokogiri?
170
+ @judge_nokogiri ||= @html_parser == 'nokogiri'
166
171
  end
167
172
 
168
- def reset_httparty_judge_result
169
- @httparty_judge_result = nil
173
+ def rejudge_nokogiri
174
+ @judge_nokogiri = nil
170
175
  end
171
176
  end
172
177
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hertools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - WuDi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-25 00:00:00.000000000 Z
11
+ date: 2020-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty