web_loader 1.4.1 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/.gitignore +2 -0
- data/.idea/inspectionProfiles/Project_Default.xml +6 -0
- data/Gemfile.lock +2 -2
- data/lib/web_loader/command.rb +32 -22
- data/lib/web_loader/version.rb +1 -1
- data/web_loader.iml +33 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e68d4a30eba5781e5c81817527d5cc592a379e853cbb3e5e9811f65f0a8c8be4
|
4
|
+
data.tar.gz: 6415bbe69f36527884cd5ac76e8903583a9d5d18d87e2732aaa13457917ac4a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7184bd1a1066e739bff7890a2917261c2ce834e1db3337ecad1ed3ee0af66af19a966b90785991647fc9c6aa40754e0accb9b08883e31eac326365c65ca1154
|
7
|
+
data.tar.gz: e05e80904dc72f2460fe00843af43c4dbde377a2ca49a6caa39568d9539ae919f608f2f9da7943aac8e60aa9355c74a81c9904c1e8fded05e083c47545ec82c5
|
data/.idea/.gitignore
CHANGED
@@ -0,0 +1,6 @@
|
|
1
|
+
<component name="InspectionProjectProfileManager">
|
2
|
+
<profile version="1.0">
|
3
|
+
<option name="myName" value="Project Default" />
|
4
|
+
<inspection_tool class="RbsMissingTypeSignature" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
5
|
+
</profile>
|
6
|
+
</component>
|
data/Gemfile.lock
CHANGED
data/lib/web_loader/command.rb
CHANGED
@@ -32,12 +32,16 @@ module WebLoader
|
|
32
32
|
@verbose = false
|
33
33
|
@cache_limit = CACHE_LIMIT
|
34
34
|
@always_write_cache = false
|
35
|
+
@response = nil
|
36
|
+
@logger = nil
|
35
37
|
end
|
36
38
|
|
37
39
|
attr_reader :load_cache_page
|
38
40
|
attr_accessor :use_cache, :cache_dir, :binary, :user_agent, :verbose
|
39
41
|
attr_accessor :cache_limit
|
40
42
|
attr_accessor :always_write_cache
|
43
|
+
attr_reader :response
|
44
|
+
attr_accessor :logger
|
41
45
|
|
42
46
|
def load_retry(url, retry_count = DEFAULT_RETRY)
|
43
47
|
load(url, DEFAULT_REDIRECT, retry_count)
|
@@ -45,31 +49,31 @@ module WebLoader
|
|
45
49
|
|
46
50
|
def load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0)
|
47
51
|
raise ArgumentError, 'HTTP redirect too deep' if redirect_count == 0
|
48
|
-
log("Load: #{url}"
|
52
|
+
log("Load: #{url}")
|
49
53
|
|
50
54
|
##### キャッシュの読み込み
|
51
55
|
@load_cache_page = false
|
52
56
|
content = try_load_cache(url)
|
53
57
|
if content
|
54
|
-
log("Load cache: #{url}"
|
58
|
+
log("Load cache: #{url}")
|
55
59
|
@load_cache_page = true
|
56
60
|
return content
|
57
61
|
end
|
58
62
|
|
59
63
|
##### サーバーからロード
|
60
|
-
log("Load server: #{url}"
|
64
|
+
log("Load server: #{url}")
|
61
65
|
uri = URI.parse(url)
|
62
66
|
http = Net::HTTP.new(uri.host, uri.port)
|
63
67
|
if uri.scheme == 'https'
|
64
68
|
http.use_ssl = true
|
65
69
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
66
70
|
end
|
67
|
-
response = nil
|
71
|
+
@response = nil
|
68
72
|
begin
|
69
|
-
response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
|
73
|
+
@response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
|
70
74
|
rescue Net::ReadTimeout
|
71
75
|
# タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
|
72
|
-
log("Read timeout: #{url}"
|
76
|
+
log("Read timeout: #{url}")
|
73
77
|
if retry_count > 0
|
74
78
|
sleep DEFAULT_SLEEP
|
75
79
|
return load(url, redirect_count , retry_count - 1)
|
@@ -78,43 +82,44 @@ module WebLoader
|
|
78
82
|
|
79
83
|
##### レスポンスの処理
|
80
84
|
result = nil
|
81
|
-
case response
|
85
|
+
case @response
|
82
86
|
when Net::HTTPSuccess
|
83
|
-
# responseがNet::HTTPSuccessのサブクラスの場合成功とみなし読み込んだ内容を返す
|
84
|
-
body = response.body
|
87
|
+
# @responseがNet::HTTPSuccessのサブクラスの場合成功とみなし読み込んだ内容を返す
|
88
|
+
body = @response.body
|
85
89
|
unless @binary
|
86
90
|
# デフォルトでは ASCII-8BITが帰ってくる。
|
87
91
|
# Content-Typeのcharsetとみなす。
|
88
92
|
# https://bugs.ruby-lang.org/issues/2567
|
89
|
-
encoding = response.type_params['charset']
|
93
|
+
encoding = @response.type_params['charset']
|
90
94
|
body = toutf8(body, encoding)
|
91
95
|
end
|
92
96
|
|
93
97
|
if @use_cache || @always_write_cache
|
94
|
-
log("Write cache: #{url}"
|
95
|
-
Cache.write(@cache_dir, url, response.code, body)
|
98
|
+
log("Write cache: #{url}")
|
99
|
+
Cache.write(@cache_dir, url, @response.code, body)
|
96
100
|
end
|
97
101
|
result = body
|
98
102
|
when Net::HTTPRedirection
|
99
|
-
result = load(to_redirect_url(uri, response['location']), redirect_count - 1)
|
100
|
-
|
103
|
+
result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
|
104
|
+
# when Net::HTTPNotFound
|
105
|
+
# result = nil
|
106
|
+
when Net::HTTPTooManyRequests, Net::ReadTimeout
|
101
107
|
# 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
|
102
108
|
if retry_count > 0
|
103
109
|
sleep_for = 10
|
104
|
-
if response.is_a?(Net::HTTPTooManyRequests)
|
110
|
+
if @response.is_a?(Net::HTTPTooManyRequests)
|
105
111
|
# HTTPTooManyRequestsならばretry-afterで指定された値を取得。
|
106
|
-
sleep_for = response.header['retry-after'].to_i + 10
|
107
|
-
log("Rate limit: #{uri} #{response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
112
|
+
sleep_for = @response.header['retry-after'].to_i + 10
|
113
|
+
log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
108
114
|
else
|
109
|
-
log("Unknown response: #{uri} #{response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
115
|
+
log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
110
116
|
end
|
111
117
|
sleep sleep_for
|
112
118
|
result = load(url, redirect_count , retry_count - 1)
|
113
119
|
end
|
114
|
-
|
120
|
+
else
|
115
121
|
# それ以外は対応した例外を発生
|
116
122
|
log("error #{url}", true)
|
117
|
-
response.value
|
118
123
|
end
|
119
124
|
result
|
120
125
|
end
|
@@ -126,8 +131,13 @@ module WebLoader
|
|
126
131
|
Cache.load_content(@cache_dir, url)
|
127
132
|
end
|
128
133
|
|
129
|
-
def log(msg, put_log)
|
130
|
-
|
134
|
+
def log(msg, put_log = @verbose)
|
135
|
+
return unless put_log
|
136
|
+
if @logger
|
137
|
+
@logger.info(msg)
|
138
|
+
else
|
139
|
+
puts msg
|
140
|
+
end
|
131
141
|
end
|
132
142
|
end
|
133
143
|
end
|
data/lib/web_loader/version.rb
CHANGED
data/web_loader.iml
CHANGED
@@ -57,4 +57,37 @@
|
|
57
57
|
<RakeTaskImpl id="rake" />
|
58
58
|
</option>
|
59
59
|
</component>
|
60
|
+
<component name="RakeTasksCache-v2">
|
61
|
+
<option name="myRootTask">
|
62
|
+
<RakeTaskImpl id="rake">
|
63
|
+
<subtasks>
|
64
|
+
<RakeTaskImpl description="Build web_loader-1.4.1.gem into the pkg directory" fullCommand="build" id="build" />
|
65
|
+
<RakeTaskImpl id="build">
|
66
|
+
<subtasks>
|
67
|
+
<RakeTaskImpl description="Generate SHA512 checksum if web_loader-1.4.1.gem into the checksums directory" fullCommand="build:checksum" id="checksum" />
|
68
|
+
</subtasks>
|
69
|
+
</RakeTaskImpl>
|
70
|
+
<RakeTaskImpl description="Remove any temporary products" fullCommand="clean" id="clean" />
|
71
|
+
<RakeTaskImpl description="Remove any generated files" fullCommand="clobber" id="clobber" />
|
72
|
+
<RakeTaskImpl description="Build and install web_loader-1.4.1.gem into system gems" fullCommand="install" id="install" />
|
73
|
+
<RakeTaskImpl id="install">
|
74
|
+
<subtasks>
|
75
|
+
<RakeTaskImpl description="Build and install web_loader-1.4.1.gem into system gems without network access" fullCommand="install:local" id="local" />
|
76
|
+
</subtasks>
|
77
|
+
</RakeTaskImpl>
|
78
|
+
<RakeTaskImpl description="Create tag v1.4.1 and build and push web_loader-1.4.1.gem to rubygems.org" fullCommand="release[remote]" id="release[remote]" />
|
79
|
+
<RakeTaskImpl description="Run tests" fullCommand="test" id="test" />
|
80
|
+
<RakeTaskImpl description="" fullCommand="default" id="default" />
|
81
|
+
<RakeTaskImpl description="" fullCommand="release" id="release" />
|
82
|
+
<RakeTaskImpl id="release">
|
83
|
+
<subtasks>
|
84
|
+
<RakeTaskImpl description="" fullCommand="release:guard_clean" id="guard_clean" />
|
85
|
+
<RakeTaskImpl description="" fullCommand="release:rubygem_push" id="rubygem_push" />
|
86
|
+
<RakeTaskImpl description="" fullCommand="release:source_control_push" id="source_control_push" />
|
87
|
+
</subtasks>
|
88
|
+
</RakeTaskImpl>
|
89
|
+
</subtasks>
|
90
|
+
</RakeTaskImpl>
|
91
|
+
</option>
|
92
|
+
</component>
|
60
93
|
</module>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- src
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Web loader.
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extensions: []
|
|
19
19
|
extra_rdoc_files: []
|
20
20
|
files:
|
21
21
|
- ".idea/.gitignore"
|
22
|
+
- ".idea/inspectionProfiles/Project_Default.xml"
|
22
23
|
- ".idea/misc.xml"
|
23
24
|
- ".idea/modules.xml"
|
24
25
|
- ".idea/vcs.xml"
|