web_loader 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/inspectionProfiles/Project_Default.xml +6 -0
- data/Gemfile.lock +2 -2
- data/lib/web_loader/command.rb +19 -13
- data/lib/web_loader/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e68d4a30eba5781e5c81817527d5cc592a379e853cbb3e5e9811f65f0a8c8be4
|
4
|
+
data.tar.gz: 6415bbe69f36527884cd5ac76e8903583a9d5d18d87e2732aaa13457917ac4a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7184bd1a1066e739bff7890a2917261c2ce834e1db3337ecad1ed3ee0af66af19a966b90785991647fc9c6aa40754e0accb9b08883e31eac326365c65ca1154
|
7
|
+
data.tar.gz: e05e80904dc72f2460fe00843af43c4dbde377a2ca49a6caa39568d9539ae919f608f2f9da7943aac8e60aa9355c74a81c9904c1e8fded05e083c47545ec82c5
|
@@ -0,0 +1,6 @@
|
|
1
|
+
<component name="InspectionProjectProfileManager">
|
2
|
+
<profile version="1.0">
|
3
|
+
<option name="myName" value="Project Default" />
|
4
|
+
<inspection_tool class="RbsMissingTypeSignature" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
5
|
+
</profile>
|
6
|
+
</component>
|
data/Gemfile.lock
CHANGED
data/lib/web_loader/command.rb
CHANGED
@@ -33,6 +33,7 @@ module WebLoader
|
|
33
33
|
@cache_limit = CACHE_LIMIT
|
34
34
|
@always_write_cache = false
|
35
35
|
@response = nil
|
36
|
+
@logger = nil
|
36
37
|
end
|
37
38
|
|
38
39
|
attr_reader :load_cache_page
|
@@ -40,6 +41,7 @@ module WebLoader
|
|
40
41
|
attr_accessor :cache_limit
|
41
42
|
attr_accessor :always_write_cache
|
42
43
|
attr_reader :response
|
44
|
+
attr_accessor :logger
|
43
45
|
|
44
46
|
def load_retry(url, retry_count = DEFAULT_RETRY)
|
45
47
|
load(url, DEFAULT_REDIRECT, retry_count)
|
@@ -47,19 +49,19 @@ module WebLoader
|
|
47
49
|
|
48
50
|
def load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0)
|
49
51
|
raise ArgumentError, 'HTTP redirect too deep' if redirect_count == 0
|
50
|
-
log("Load: #{url}"
|
52
|
+
log("Load: #{url}")
|
51
53
|
|
52
54
|
##### キャッシュの読み込み
|
53
55
|
@load_cache_page = false
|
54
56
|
content = try_load_cache(url)
|
55
57
|
if content
|
56
|
-
log("Load cache: #{url}"
|
58
|
+
log("Load cache: #{url}")
|
57
59
|
@load_cache_page = true
|
58
60
|
return content
|
59
61
|
end
|
60
62
|
|
61
63
|
##### サーバーからロード
|
62
|
-
log("Load server: #{url}"
|
64
|
+
log("Load server: #{url}")
|
63
65
|
uri = URI.parse(url)
|
64
66
|
http = Net::HTTP.new(uri.host, uri.port)
|
65
67
|
if uri.scheme == 'https'
|
@@ -71,7 +73,7 @@ module WebLoader
|
|
71
73
|
@response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
|
72
74
|
rescue Net::ReadTimeout
|
73
75
|
# タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
|
74
|
-
log("Read timeout: #{url}"
|
76
|
+
log("Read timeout: #{url}")
|
75
77
|
if retry_count > 0
|
76
78
|
sleep DEFAULT_SLEEP
|
77
79
|
return load(url, redirect_count , retry_count - 1)
|
@@ -93,14 +95,14 @@ module WebLoader
|
|
93
95
|
end
|
94
96
|
|
95
97
|
if @use_cache || @always_write_cache
|
96
|
-
log("Write cache: #{url}"
|
98
|
+
log("Write cache: #{url}")
|
97
99
|
Cache.write(@cache_dir, url, @response.code, body)
|
98
100
|
end
|
99
101
|
result = body
|
100
102
|
when Net::HTTPRedirection
|
101
103
|
result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
|
102
|
-
|
103
|
-
|
104
|
+
# when Net::HTTPNotFound
|
105
|
+
# result = nil
|
104
106
|
when Net::HTTPTooManyRequests, Net::ReadTimeout
|
105
107
|
# 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
|
106
108
|
if retry_count > 0
|
@@ -108,17 +110,16 @@ module WebLoader
|
|
108
110
|
if @response.is_a?(Net::HTTPTooManyRequests)
|
109
111
|
# HTTPTooManyRequestsならばretry-afterで指定された値を取得。
|
110
112
|
sleep_for = @response.header['retry-after'].to_i + 10
|
111
|
-
log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
113
|
+
log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
112
114
|
else
|
113
|
-
log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
115
|
+
log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
114
116
|
end
|
115
117
|
sleep sleep_for
|
116
118
|
result = load(url, redirect_count , retry_count - 1)
|
117
119
|
end
|
118
|
-
|
120
|
+
else
|
119
121
|
# それ以外は対応した例外を発生
|
120
122
|
log("error #{url}", true)
|
121
|
-
@response.value
|
122
123
|
end
|
123
124
|
result
|
124
125
|
end
|
@@ -130,8 +131,13 @@ module WebLoader
|
|
130
131
|
Cache.load_content(@cache_dir, url)
|
131
132
|
end
|
132
133
|
|
133
|
-
def log(msg, put_log)
|
134
|
-
|
134
|
+
def log(msg, put_log = @verbose)
|
135
|
+
return unless put_log
|
136
|
+
if @logger
|
137
|
+
@logger.info(msg)
|
138
|
+
else
|
139
|
+
puts msg
|
140
|
+
end
|
135
141
|
end
|
136
142
|
end
|
137
143
|
end
|
data/lib/web_loader/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- src
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Web loader.
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extensions: []
|
|
19
19
|
extra_rdoc_files: []
|
20
20
|
files:
|
21
21
|
- ".idea/.gitignore"
|
22
|
+
- ".idea/inspectionProfiles/Project_Default.xml"
|
22
23
|
- ".idea/misc.xml"
|
23
24
|
- ".idea/modules.xml"
|
24
25
|
- ".idea/vcs.xml"
|