web_loader 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 803b7f81240d732a305705cc0fa46610781688ce19203f0b7d8710822b088558
4
- data.tar.gz: ec8ed1467ab473fa5fbae5a370304e3bb2281e525ecfc4f54c752828f49c1d00
3
+ metadata.gz: 03a86b4f610a1575be326e19740897204b75e2dbe41e1eac56ee1c98d9c44558
4
+ data.tar.gz: b83a8f147007e94e4fac89abd4297066727dfe6ebf0a68052569f97cece0f223
5
5
  SHA512:
6
- metadata.gz: 51c54de50e0faa9a4886cc3c84efbf88887003c12e8cafcc9cfa1ea9c5ae5beeb795a2795d08d48031dd373f6cb56de1e28116c7d0450a18b288c04e81e54e0f
7
- data.tar.gz: 62cd1ecd932a88fb51f919a5565833e5d14bbef94414acdcc55c90ab075fb387d0d98050753d05438bcaf3d8a371b731a2b146ac57cb98d4420a2b9ff97ffccc
6
+ metadata.gz: 27395f10236456d0780ee08c28d5b1c9fd447acab50261732eb24c4bb88011eb823381d01005ac6faf4d5e580d85dd1e16c69038c4ca1f4d3ed257613aeb1daf
7
+ data.tar.gz: 24a7b60226b63fcc7c1fda0a44ca11fbb14d10176936f93b0684fc0866289cbae73058fc7c4f485579fa83850520d6f9cc13112cddaff60d733e009b06ded5cc
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_loader (2.2.0)
4
+ web_loader (2.3.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -36,8 +36,6 @@ module WebLoader
36
36
 
37
37
  # ドライバーのセットアップ
38
38
  @driver = driver
39
- @driver.user_agent = @user_agent
40
- @driver.binary = @binary
41
39
  end
42
40
 
43
41
  attr_reader :load_cache_page
@@ -67,15 +65,9 @@ module WebLoader
67
65
 
68
66
  ##### サーバーからロード
69
67
  log("Load server: #{url}")
70
- # uri = URI.parse(url)
71
- # http = Net::HTTP.new(uri.host, uri.port)
72
- # if uri.scheme == 'https'
73
- # http.use_ssl = true
74
- # http.verify_mode = OpenSSL::SSL::VERIFY_NONE
75
- # end
76
- # @response = nil
77
68
  begin
78
- # @response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
69
+ @driver.user_agent = @user_agent
70
+ @driver.binary = @binary
79
71
  @response = @driver.fetch(url)
80
72
  rescue Net::ReadTimeout
81
73
  # タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
@@ -110,51 +102,7 @@ module WebLoader
110
102
  # それ以外は対応した例外を発生
111
103
  log("error #{url}", true)
112
104
  end
113
-
114
105
  result
115
-
116
- # ##### レスポンスの処理
117
- # result = nil
118
- # case @response
119
- # when Net::HTTPSuccess
120
- # # @responseがNet::HTTPSuccessのサブクラスの場合成功とみなし読み込んだ内容を返す
121
- # body = @response.body
122
- # unless @binary
123
- # # デフォルトでは ASCII-8BITが帰ってくる。
124
- # # Content-Typeのcharsetとみなす。
125
- # # https://bugs.ruby-lang.org/issues/2567
126
- # encoding = @response.type_params['charset']
127
- # body = toutf8(body, encoding)
128
- # end
129
- #
130
- # if @use_cache || @always_write_cache
131
- # log("Write cache: #{url}")
132
- # Cache.write(@cache_dir, url, @response.code, body)
133
- # end
134
- # result = body
135
- # when Net::HTTPRedirection
136
- # result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
137
- # # when Net::HTTPNotFound
138
- # # result = nil
139
- # when Net::HTTPTooManyRequests, Net::ReadTimeout
140
- # # 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
141
- # if retry_count > 0
142
- # sleep_for = 10
143
- # if @response.is_a?(Net::HTTPTooManyRequests)
144
- # # HTTPTooManyRequestsならばretry-afterで指定された値を取得。
145
- # sleep_for = @response.header['retry-after'].to_i + 10
146
- # log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
147
- # else
148
- # log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
149
- # end
150
- # sleep sleep_for
151
- # result = load(url, redirect_count , retry_count - 1)
152
- # end
153
- # else
154
- # # それ以外は対応した例外を発生
155
- # log("error #{url}", true)
156
- # end
157
- # result
158
106
  end
159
107
 
160
108
  private
@@ -18,6 +18,8 @@ module WebLoader
18
18
  drivers = ['pureruby', 'selenium']
19
19
  opt.on('-d DRIVER', '--driver=DRIVER', drivers, drivers.join("|") + "(default pureruby)") {|v| opts[:d] = v }
20
20
  opt.on("--disable-cache", "Disable cache") {|v| opts[:disable_cache] = v }
21
+ opt.on('--user-agent=USERAGENT', 'Set User-Agent header') {|v| opts[:user_agent] = v }
22
+ opt.on('-b', '--binary', 'Download binary files') {|v| opts[:binary] = v }
21
23
  opt.parse!(argv)
22
24
  if argv.empty?
23
25
  puts "Error: URL is required."
@@ -39,6 +41,12 @@ module WebLoader
39
41
  if @opts[:disable_cache]
40
42
  loader.use_cache = false
41
43
  end
44
+ if @opts[:user_agent]
45
+ loader.user_agent = @opts[:user_agent]
46
+ end
47
+ if @opts[:binary]
48
+ loader.binary = true
49
+ end
42
50
  loader.load(url)
43
51
  end
44
52
 
@@ -38,7 +38,7 @@ module WebLoader
38
38
 
39
39
  content_type = driver.execute_script("return document.contentType;")
40
40
 
41
- body = @binary ? page_source.b : driver.page_source
41
+ body = @binary ? driver.page_source.b : driver.page_source
42
42
  response = WebLoader::Response.new(status: 200,
43
43
  headers: {
44
44
  'Content-Type' => content_type
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WebLoader
4
- VERSION = "2.2.0"
4
+ VERSION = "2.3.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - src