web_loader 1.8.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1434106ba64ce88a81a2c382b330d92d82525162638d02ee0cd8a11958f50acd
4
- data.tar.gz: c55bafdbea2ae8fda51272087683e4a0f157c275a001cdcc357a086add410afc
3
+ metadata.gz: d51a9c53f63ed251de81a4664f36038919309a79298397ea72ae26fabd320734
4
+ data.tar.gz: 69e3294e0c85c07f88241e2249fcbeb512d3f2f446324788b5fbfcb1493b6908
5
5
  SHA512:
6
- metadata.gz: 7139ac3325dd3daaf23bc6aed6d0b645c65df7fc943ace0ccf730c70ec3e30b7f6aa6e677198697b2414e334279a43a06669fc21d8b40b8bcebf215d0689e5f6
7
- data.tar.gz: bad9b7b7e4fce6a6f5c2d9947c0ac44072556a5cdcbf8205a689e2240e34bb5562f69a7dcbd09412ca89cbd5249530eb6806a978aecb82a455fa77a5eddbe5f6
6
+ metadata.gz: 9ed22698344e0212b05026fbf21afe9b5b77c3e7a88973c909c6aa1fbdb798626bc981bdae1b76490832b11720d2e88f1d89cea4fea7f05c5c99aea46894ea56
7
+ data.tar.gz: 5ee5974c7650ec1153a57e7d3299ec29349ac2797665527174e90d627389a17814bc3ba2c733f9c7e2850b6266caf1e89640944e33f4d36edeeb215a2c04e459
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="AskMigrationStateService">
4
+ <option name="migrationStatus" value="COMPLETED" />
5
+ </component>
6
+ </project>
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Ask2AgentMigrationStateService">
4
+ <option name="migrationStatus" value="COMPLETED" />
5
+ </component>
6
+ </project>
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="EditMigrationStateService">
4
+ <option name="migrationStatus" value="COMPLETED" />
5
+ </component>
6
+ </project>
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_loader (1.8.0)
4
+ web_loader (2.0.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -1,7 +1,6 @@
1
1
  require 'open-uri'
2
2
  require 'net/http'
3
3
  require 'uri'
4
- require 'kconv'
5
4
 
6
5
  module WebLoader
7
6
  class Command
@@ -23,7 +22,7 @@ module WebLoader
23
22
  File.binwrite(file, content)
24
23
  end
25
24
 
26
- def initialize
25
+ def initialize(driver = ::WebLoader::Drivers::HttpDriver.new)
27
26
  @use_cache = true
28
27
  @load_cache_page = false #キャッシュを読み込んだかどうか
29
28
  @cache_dir = File.expand_path(CACHE_DIR)
@@ -34,12 +33,18 @@ module WebLoader
34
33
  @always_write_cache = false
35
34
  @response = nil
36
35
  @logger = nil
36
+
37
+ # ドライバーのセットアップ
38
+ @driver = driver
39
+ @driver.user_agent = @user_agent
40
+ @driver.binary = @binary
37
41
  end
38
42
 
39
43
  attr_reader :load_cache_page
40
44
  attr_accessor :use_cache, :cache_dir, :binary, :user_agent, :verbose
41
45
  attr_accessor :cache_limit
42
46
  attr_accessor :always_write_cache
47
+ attr_accessor :driver
43
48
  attr_reader :response
44
49
  attr_accessor :logger
45
50
 
@@ -62,15 +67,16 @@ module WebLoader
62
67
 
63
68
  ##### サーバーからロード
64
69
  log("Load server: #{url}")
65
- uri = URI.parse(url)
66
- http = Net::HTTP.new(uri.host, uri.port)
67
- if uri.scheme == 'https'
68
- http.use_ssl = true
69
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
70
- end
71
- @response = nil
70
+ # uri = URI.parse(url)
71
+ # http = Net::HTTP.new(uri.host, uri.port)
72
+ # if uri.scheme == 'https'
73
+ # http.use_ssl = true
74
+ # http.verify_mode = OpenSSL::SSL::VERIFY_NONE
75
+ # end
76
+ # @response = nil
72
77
  begin
73
- @response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
78
+ # @response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
79
+ @response = @driver.fetch(url)
74
80
  rescue Net::ReadTimeout
75
81
  # タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
76
82
  log("Read timeout: #{url}")
@@ -82,38 +88,21 @@ module WebLoader
82
88
 
83
89
  ##### レスポンスの処理
84
90
  result = nil
85
- case @response
86
- when Net::HTTPSuccess
87
- # @responseがNet::HTTPSuccessのサブクラスの場合成功とみなし読み込んだ内容を返す
91
+ if response.ok?
88
92
  body = @response.body
89
- unless @binary
90
- # デフォルトでは ASCII-8BITが帰ってくる。
91
- # Content-Typeのcharsetとみなす。
92
- # https://bugs.ruby-lang.org/issues/2567
93
- encoding = @response.type_params['charset']
94
- body = toutf8(body, encoding)
95
- end
96
-
97
93
  if @use_cache || @always_write_cache
98
94
  log("Write cache: #{url}")
99
- Cache.write(@cache_dir, url, @response.code, body)
95
+ Cache.write(@cache_dir, url, @response.status, body)
100
96
  end
101
97
  result = body
102
- when Net::HTTPRedirection
103
- result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
104
- # when Net::HTTPNotFound
105
- # result = nil
106
- when Net::HTTPTooManyRequests, Net::ReadTimeout
98
+ elsif response.redirect?
99
+ result = load(to_redirect_url(URI.parse(url), @response.headers['location']), redirect_count - 1)
100
+ elsif response.rate_limited?
107
101
  # 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
108
102
  if retry_count > 0
109
- sleep_for = 10
110
- if @response.is_a?(Net::HTTPTooManyRequests)
111
- # HTTPTooManyRequestsならばretry-afterで指定された値を取得。
112
- sleep_for = @response.header['retry-after'].to_i + 10
113
- log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
114
- else
115
- log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
116
- end
103
+ # HTTPTooManyRequestsならばretry-afterで指定された値を取得。
104
+ sleep_for = @response.header['retry-after'].to_i + 10
105
+ log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
117
106
  sleep sleep_for
118
107
  result = load(url, redirect_count , retry_count - 1)
119
108
  end
@@ -121,7 +110,51 @@ module WebLoader
121
110
  # それ以外は対応した例外を発生
122
111
  log("error #{url}", true)
123
112
  end
113
+
124
114
  result
115
+
116
+ # ##### レスポンスの処理
117
+ # result = nil
118
+ # case @response
119
+ # when Net::HTTPSuccess
120
+ # # @responseがNet::HTTPSuccessのサブクラスの場合成功とみなし読み込んだ内容を返す
121
+ # body = @response.body
122
+ # unless @binary
123
+ # # デフォルトでは ASCII-8BITが帰ってくる。
124
+ # # Content-Typeのcharsetとみなす。
125
+ # # https://bugs.ruby-lang.org/issues/2567
126
+ # encoding = @response.type_params['charset']
127
+ # body = toutf8(body, encoding)
128
+ # end
129
+ #
130
+ # if @use_cache || @always_write_cache
131
+ # log("Write cache: #{url}")
132
+ # Cache.write(@cache_dir, url, @response.code, body)
133
+ # end
134
+ # result = body
135
+ # when Net::HTTPRedirection
136
+ # result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
137
+ # # when Net::HTTPNotFound
138
+ # # result = nil
139
+ # when Net::HTTPTooManyRequests, Net::ReadTimeout
140
+ # # 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
141
+ # if retry_count > 0
142
+ # sleep_for = 10
143
+ # if @response.is_a?(Net::HTTPTooManyRequests)
144
+ # # HTTPTooManyRequestsならばretry-afterで指定された値を取得。
145
+ # sleep_for = @response.header['retry-after'].to_i + 10
146
+ # log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
147
+ # else
148
+ # log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
149
+ # end
150
+ # sleep sleep_for
151
+ # result = load(url, redirect_count , retry_count - 1)
152
+ # end
153
+ # else
154
+ # # それ以外は対応した例外を発生
155
+ # log("error #{url}", true)
156
+ # end
157
+ # result
125
158
  end
126
159
 
127
160
  private
@@ -0,0 +1,30 @@
1
+ require 'open-uri'
2
+ require 'net/http'
3
+ require 'uri'
4
+ require 'kconv'
5
+
6
+
7
+ module WebLoader
8
+ module Drivers
9
+ class HttpDriver
10
+
11
+ def initialize
12
+ @user_agent = nil
13
+ @binary = false
14
+ end
15
+
16
+ attr_accessor :user_agent, :binary
17
+
18
+ def fetch(url)
19
+ uri = URI.parse(url)
20
+ http = Net::HTTP.new(uri.host, uri.port)
21
+ if uri.scheme == 'https'
22
+ http.use_ssl = true
23
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
24
+ end
25
+ response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
26
+ WebLoader::Response.from_net_http(response, @binary)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,41 @@
1
+ module WebLoader
2
+ class Response
3
+ include WebLoader::Utils
4
+
5
+ def self.from_net_http(response, binary)
6
+ body = response.body
7
+ unless binary
8
+ # デフォルトでは ASCII-8BITが帰ってくる。
9
+ # Content-Typeのcharsetとみなす。
10
+ # https://bugs.ruby-lang.org/issues/2567
11
+ encoding = response.type_params['charset']
12
+ body = ::WebLoader::Utils.toutf8(body, encoding)
13
+ end
14
+ new(
15
+ status: response.code.to_i,
16
+ headers: response.each_header.to_h,
17
+ body: body
18
+ )
19
+ end
20
+
21
+ def self.from_selenium(driver, original_url)
22
+ # デフォルトは成功200
23
+ status = 200
24
+ # redirected = driver.current_url != original_url
25
+ # status = 300 if redirected # 簡易的にリダイレクト扱い
26
+ new(status: status, headers: {}, body: driver.page_source)
27
+ end
28
+
29
+ def initialize(status:, headers: {}, body: nil)
30
+ @status = status.to_i
31
+ @headers = headers || {}
32
+ @body = body
33
+ end
34
+
35
+ attr_reader :status, :headers, :body
36
+
37
+ def ok?; (200..299).include?(@status); end
38
+ def redirect?; (300..399).include?(@status); end
39
+ def rate_limited?; @status == 429; end
40
+ end
41
+ end
@@ -67,6 +67,7 @@ module WebLoader
67
67
  end
68
68
  result
69
69
  end
70
+ module_function :toutf8
70
71
 
71
72
  def to_redirect_url(orig_uri, location)
72
73
  redirect_url = location
@@ -75,5 +76,6 @@ module WebLoader
75
76
  end
76
77
  redirect_url
77
78
  end
79
+ module_function :to_redirect_url
78
80
  end
79
81
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WebLoader
4
- VERSION = "1.8.0"
4
+ VERSION = "2.0.0"
5
5
  end
data/lib/web_loader.rb CHANGED
@@ -4,6 +4,8 @@ require_relative "web_loader/version"
4
4
  require_relative "web_loader/utils"
5
5
  require_relative "web_loader/cache"
6
6
  require_relative "web_loader/command"
7
+ require_relative "web_loader/response"
8
+ require_relative "web_loader/drivers/http_driver"
7
9
 
8
10
  module WebLoader
9
11
  class Error < StandardError; end
data/web_loader.iml CHANGED
@@ -10,6 +10,80 @@
10
10
  </content>
11
11
  <orderEntry type="jdk" jdkName="rbenv: 3.2.6" jdkType="RUBY_SDK" />
12
12
  <orderEntry type="sourceFolder" forTests="false" />
13
+ <orderEntry type="module-library">
14
+ <library name="minitest (v5.26.1) [path][gem]" type="rubylib">
15
+ <properties>
16
+ <option name="additionalInfo">
17
+ <AdditionalInfo>
18
+ <option name="authors" value="該当なし" />
19
+ <option name="email" value="該当なし" />
20
+ <option name="homepage" value="該当なし" />
21
+ <option name="summary" value="該当なし" />
22
+ </AdditionalInfo>
23
+ </option>
24
+ <option name="fromPath" value="true" />
25
+ <option name="name" value="minitest" />
26
+ <option name="requirePaths">
27
+ <list>
28
+ <option value="lib" />
29
+ </list>
30
+ </option>
31
+ <option name="url" value="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/minitest-5.26.1" />
32
+ <option name="version" value="5.26.1" />
33
+ </properties>
34
+ <CLASSES>
35
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/minitest-5.26.1/lib" />
36
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/minitest-5.26.1/test" />
37
+ </CLASSES>
38
+ <JAVADOC />
39
+ <SOURCES>
40
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/minitest-5.26.1/lib" />
41
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/minitest-5.26.1/test" />
42
+ </SOURCES>
43
+ <excluded>
44
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/minitest-5.26.1/test" />
45
+ </excluded>
46
+ </library>
47
+ </orderEntry>
48
+ <orderEntry type="module-library">
49
+ <library name="rake (v13.3.1) [path][gem]" type="rubylib">
50
+ <properties>
51
+ <option name="additionalInfo">
52
+ <AdditionalInfo>
53
+ <option name="authors" value="該当なし" />
54
+ <option name="email" value="該当なし" />
55
+ <option name="homepage" value="該当なし" />
56
+ <option name="summary" value="該当なし" />
57
+ </AdditionalInfo>
58
+ </option>
59
+ <option name="fromPath" value="true" />
60
+ <option name="name" value="rake" />
61
+ <option name="requirePaths">
62
+ <list>
63
+ <option value="lib" />
64
+ </list>
65
+ </option>
66
+ <option name="url" value="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1" />
67
+ <option name="version" value="13.3.1" />
68
+ </properties>
69
+ <CLASSES>
70
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1/doc" />
71
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1/exe" />
72
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1/lib" />
73
+ </CLASSES>
74
+ <JAVADOC />
75
+ <SOURCES>
76
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1/doc" />
77
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1/exe" />
78
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1/lib" />
79
+ </SOURCES>
80
+ <excluded>
81
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1/doc" />
82
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.2.0/gems/rake-13.3.1/exe" />
83
+ </excluded>
84
+ </library>
85
+ </orderEntry>
86
+ <orderEntry type="library" scope="PROVIDED" name="bundler (v2.6.3, rbenv: 3.2.6) [gem]" level="application" />
13
87
  </component>
14
88
  <component name="RakeTasksCache">
15
89
  <option name="myRootTask">
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - src
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-11-12 00:00:00.000000000 Z
11
+ date: 2025-11-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Web loader.
14
14
  email:
@@ -21,6 +21,9 @@ files:
21
21
  - ".DS_Store"
22
22
  - ".idea/.gitignore"
23
23
  - ".idea/copilot.data.migration.agent.xml"
24
+ - ".idea/copilot.data.migration.ask.xml"
25
+ - ".idea/copilot.data.migration.ask2agent.xml"
26
+ - ".idea/copilot.data.migration.edit.xml"
24
27
  - ".idea/inspectionProfiles/Project_Default.xml"
25
28
  - ".idea/misc.xml"
26
29
  - ".idea/modules.xml"
@@ -36,6 +39,8 @@ files:
36
39
  - lib/web_loader.rb
37
40
  - lib/web_loader/cache.rb
38
41
  - lib/web_loader/command.rb
42
+ - lib/web_loader/drivers/http_driver.rb
43
+ - lib/web_loader/response.rb
39
44
  - lib/web_loader/utils.rb
40
45
  - lib/web_loader/version.rb
41
46
  - sig/web_loader.rbs