web_loader 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.idea/.gitignore +2 -0
- data/.idea/inspectionProfiles/Project_Default.xml +6 -0
- data/Gemfile.lock +2 -2
- data/lib/web_loader/command.rb +32 -22
- data/lib/web_loader/version.rb +1 -1
- data/web_loader.iml +33 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e68d4a30eba5781e5c81817527d5cc592a379e853cbb3e5e9811f65f0a8c8be4
|
4
|
+
data.tar.gz: 6415bbe69f36527884cd5ac76e8903583a9d5d18d87e2732aaa13457917ac4a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7184bd1a1066e739bff7890a2917261c2ce834e1db3337ecad1ed3ee0af66af19a966b90785991647fc9c6aa40754e0accb9b08883e31eac326365c65ca1154
|
7
|
+
data.tar.gz: e05e80904dc72f2460fe00843af43c4dbde377a2ca49a6caa39568d9539ae919f608f2f9da7943aac8e60aa9355c74a81c9904c1e8fded05e083c47545ec82c5
|
data/.idea/.gitignore
CHANGED
@@ -0,0 +1,6 @@
|
|
1
|
+
<component name="InspectionProjectProfileManager">
|
2
|
+
<profile version="1.0">
|
3
|
+
<option name="myName" value="Project Default" />
|
4
|
+
<inspection_tool class="RbsMissingTypeSignature" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
5
|
+
</profile>
|
6
|
+
</component>
|
data/Gemfile.lock
CHANGED
data/lib/web_loader/command.rb
CHANGED
@@ -32,12 +32,16 @@ module WebLoader
|
|
32
32
|
@verbose = false
|
33
33
|
@cache_limit = CACHE_LIMIT
|
34
34
|
@always_write_cache = false
|
35
|
+
@response = nil
|
36
|
+
@logger = nil
|
35
37
|
end
|
36
38
|
|
37
39
|
attr_reader :load_cache_page
|
38
40
|
attr_accessor :use_cache, :cache_dir, :binary, :user_agent, :verbose
|
39
41
|
attr_accessor :cache_limit
|
40
42
|
attr_accessor :always_write_cache
|
43
|
+
attr_reader :response
|
44
|
+
attr_accessor :logger
|
41
45
|
|
42
46
|
def load_retry(url, retry_count = DEFAULT_RETRY)
|
43
47
|
load(url, DEFAULT_REDIRECT, retry_count)
|
@@ -45,31 +49,31 @@ module WebLoader
|
|
45
49
|
|
46
50
|
def load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0)
|
47
51
|
raise ArgumentError, 'HTTP redirect too deep' if redirect_count == 0
|
48
|
-
log("Load: #{url}"
|
52
|
+
log("Load: #{url}")
|
49
53
|
|
50
54
|
##### キャッシュの読み込み
|
51
55
|
@load_cache_page = false
|
52
56
|
content = try_load_cache(url)
|
53
57
|
if content
|
54
|
-
log("Load cache: #{url}"
|
58
|
+
log("Load cache: #{url}")
|
55
59
|
@load_cache_page = true
|
56
60
|
return content
|
57
61
|
end
|
58
62
|
|
59
63
|
##### サーバーからロード
|
60
|
-
log("Load server: #{url}"
|
64
|
+
log("Load server: #{url}")
|
61
65
|
uri = URI.parse(url)
|
62
66
|
http = Net::HTTP.new(uri.host, uri.port)
|
63
67
|
if uri.scheme == 'https'
|
64
68
|
http.use_ssl = true
|
65
69
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
66
70
|
end
|
67
|
-
response = nil
|
71
|
+
@response = nil
|
68
72
|
begin
|
69
|
-
response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
|
73
|
+
@response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
|
70
74
|
rescue Net::ReadTimeout
|
71
75
|
# タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
|
72
|
-
log("Read timeout: #{url}"
|
76
|
+
log("Read timeout: #{url}")
|
73
77
|
if retry_count > 0
|
74
78
|
sleep DEFAULT_SLEEP
|
75
79
|
return load(url, redirect_count , retry_count - 1)
|
@@ -78,43 +82,44 @@ module WebLoader
|
|
78
82
|
|
79
83
|
##### レスポンスの処理
|
80
84
|
result = nil
|
81
|
-
case response
|
85
|
+
case @response
|
82
86
|
when Net::HTTPSuccess
|
83
|
-
# responseがNet::HTTPSuccessのサブクラスの場合成功とみなし読み込んだ内容を返す
|
84
|
-
body = response.body
|
87
|
+
# @responseがNet::HTTPSuccessのサブクラスの場合成功とみなし読み込んだ内容を返す
|
88
|
+
body = @response.body
|
85
89
|
unless @binary
|
86
90
|
# デフォルトでは ASCII-8BITが帰ってくる。
|
87
91
|
# Content-Typeのcharsetとみなす。
|
88
92
|
# https://bugs.ruby-lang.org/issues/2567
|
89
|
-
encoding = response.type_params['charset']
|
93
|
+
encoding = @response.type_params['charset']
|
90
94
|
body = toutf8(body, encoding)
|
91
95
|
end
|
92
96
|
|
93
97
|
if @use_cache || @always_write_cache
|
94
|
-
log("Write cache: #{url}"
|
95
|
-
Cache.write(@cache_dir, url, response.code, body)
|
98
|
+
log("Write cache: #{url}")
|
99
|
+
Cache.write(@cache_dir, url, @response.code, body)
|
96
100
|
end
|
97
101
|
result = body
|
98
102
|
when Net::HTTPRedirection
|
99
|
-
result = load(to_redirect_url(uri, response['location']), redirect_count - 1)
|
100
|
-
|
103
|
+
result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
|
104
|
+
# when Net::HTTPNotFound
|
105
|
+
# result = nil
|
106
|
+
when Net::HTTPTooManyRequests, Net::ReadTimeout
|
101
107
|
# 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
|
102
108
|
if retry_count > 0
|
103
109
|
sleep_for = 10
|
104
|
-
if response.is_a?(Net::HTTPTooManyRequests)
|
110
|
+
if @response.is_a?(Net::HTTPTooManyRequests)
|
105
111
|
# HTTPTooManyRequestsならばretry-afterで指定された値を取得。
|
106
|
-
sleep_for = response.header['retry-after'].to_i + 10
|
107
|
-
log("Rate limit: #{uri} #{response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
112
|
+
sleep_for = @response.header['retry-after'].to_i + 10
|
113
|
+
log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
108
114
|
else
|
109
|
-
log("Unknown response: #{uri} #{response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
115
|
+
log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
110
116
|
end
|
111
117
|
sleep sleep_for
|
112
118
|
result = load(url, redirect_count , retry_count - 1)
|
113
119
|
end
|
114
|
-
|
120
|
+
else
|
115
121
|
# それ以外は対応した例外を発生
|
116
122
|
log("error #{url}", true)
|
117
|
-
response.value
|
118
123
|
end
|
119
124
|
result
|
120
125
|
end
|
@@ -126,8 +131,13 @@ module WebLoader
|
|
126
131
|
Cache.load_content(@cache_dir, url)
|
127
132
|
end
|
128
133
|
|
129
|
-
def log(msg, put_log)
|
130
|
-
|
134
|
+
def log(msg, put_log = @verbose)
|
135
|
+
return unless put_log
|
136
|
+
if @logger
|
137
|
+
@logger.info(msg)
|
138
|
+
else
|
139
|
+
puts msg
|
140
|
+
end
|
131
141
|
end
|
132
142
|
end
|
133
143
|
end
|
data/lib/web_loader/version.rb
CHANGED
data/web_loader.iml
CHANGED
@@ -57,4 +57,37 @@
|
|
57
57
|
<RakeTaskImpl id="rake" />
|
58
58
|
</option>
|
59
59
|
</component>
|
60
|
+
<component name="RakeTasksCache-v2">
|
61
|
+
<option name="myRootTask">
|
62
|
+
<RakeTaskImpl id="rake">
|
63
|
+
<subtasks>
|
64
|
+
<RakeTaskImpl description="Build web_loader-1.4.1.gem into the pkg directory" fullCommand="build" id="build" />
|
65
|
+
<RakeTaskImpl id="build">
|
66
|
+
<subtasks>
|
67
|
+
<RakeTaskImpl description="Generate SHA512 checksum if web_loader-1.4.1.gem into the checksums directory" fullCommand="build:checksum" id="checksum" />
|
68
|
+
</subtasks>
|
69
|
+
</RakeTaskImpl>
|
70
|
+
<RakeTaskImpl description="Remove any temporary products" fullCommand="clean" id="clean" />
|
71
|
+
<RakeTaskImpl description="Remove any generated files" fullCommand="clobber" id="clobber" />
|
72
|
+
<RakeTaskImpl description="Build and install web_loader-1.4.1.gem into system gems" fullCommand="install" id="install" />
|
73
|
+
<RakeTaskImpl id="install">
|
74
|
+
<subtasks>
|
75
|
+
<RakeTaskImpl description="Build and install web_loader-1.4.1.gem into system gems without network access" fullCommand="install:local" id="local" />
|
76
|
+
</subtasks>
|
77
|
+
</RakeTaskImpl>
|
78
|
+
<RakeTaskImpl description="Create tag v1.4.1 and build and push web_loader-1.4.1.gem to rubygems.org" fullCommand="release[remote]" id="release[remote]" />
|
79
|
+
<RakeTaskImpl description="Run tests" fullCommand="test" id="test" />
|
80
|
+
<RakeTaskImpl description="" fullCommand="default" id="default" />
|
81
|
+
<RakeTaskImpl description="" fullCommand="release" id="release" />
|
82
|
+
<RakeTaskImpl id="release">
|
83
|
+
<subtasks>
|
84
|
+
<RakeTaskImpl description="" fullCommand="release:guard_clean" id="guard_clean" />
|
85
|
+
<RakeTaskImpl description="" fullCommand="release:rubygem_push" id="rubygem_push" />
|
86
|
+
<RakeTaskImpl description="" fullCommand="release:source_control_push" id="source_control_push" />
|
87
|
+
</subtasks>
|
88
|
+
</RakeTaskImpl>
|
89
|
+
</subtasks>
|
90
|
+
</RakeTaskImpl>
|
91
|
+
</option>
|
92
|
+
</component>
|
60
93
|
</module>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- src
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Web loader.
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extensions: []
|
|
19
19
|
extra_rdoc_files: []
|
20
20
|
files:
|
21
21
|
- ".idea/.gitignore"
|
22
|
+
- ".idea/inspectionProfiles/Project_Default.xml"
|
22
23
|
- ".idea/misc.xml"
|
23
24
|
- ".idea/modules.xml"
|
24
25
|
- ".idea/vcs.xml"
|