web_loader 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.idea/inspectionProfiles/Project_Default.xml +6 -0
- data/Gemfile.lock +2 -2
- data/lib/web_loader/command.rb +19 -13
- data/lib/web_loader/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e68d4a30eba5781e5c81817527d5cc592a379e853cbb3e5e9811f65f0a8c8be4
|
4
|
+
data.tar.gz: 6415bbe69f36527884cd5ac76e8903583a9d5d18d87e2732aaa13457917ac4a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7184bd1a1066e739bff7890a2917261c2ce834e1db3337ecad1ed3ee0af66af19a966b90785991647fc9c6aa40754e0accb9b08883e31eac326365c65ca1154
|
7
|
+
data.tar.gz: e05e80904dc72f2460fe00843af43c4dbde377a2ca49a6caa39568d9539ae919f608f2f9da7943aac8e60aa9355c74a81c9904c1e8fded05e083c47545ec82c5
|
@@ -0,0 +1,6 @@
|
|
1
|
+
<component name="InspectionProjectProfileManager">
|
2
|
+
<profile version="1.0">
|
3
|
+
<option name="myName" value="Project Default" />
|
4
|
+
<inspection_tool class="RbsMissingTypeSignature" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
5
|
+
</profile>
|
6
|
+
</component>
|
data/Gemfile.lock
CHANGED
data/lib/web_loader/command.rb
CHANGED
@@ -33,6 +33,7 @@ module WebLoader
|
|
33
33
|
@cache_limit = CACHE_LIMIT
|
34
34
|
@always_write_cache = false
|
35
35
|
@response = nil
|
36
|
+
@logger = nil
|
36
37
|
end
|
37
38
|
|
38
39
|
attr_reader :load_cache_page
|
@@ -40,6 +41,7 @@ module WebLoader
|
|
40
41
|
attr_accessor :cache_limit
|
41
42
|
attr_accessor :always_write_cache
|
42
43
|
attr_reader :response
|
44
|
+
attr_accessor :logger
|
43
45
|
|
44
46
|
def load_retry(url, retry_count = DEFAULT_RETRY)
|
45
47
|
load(url, DEFAULT_REDIRECT, retry_count)
|
@@ -47,19 +49,19 @@ module WebLoader
|
|
47
49
|
|
48
50
|
def load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0)
|
49
51
|
raise ArgumentError, 'HTTP redirect too deep' if redirect_count == 0
|
50
|
-
log("Load: #{url}"
|
52
|
+
log("Load: #{url}")
|
51
53
|
|
52
54
|
##### キャッシュの読み込み
|
53
55
|
@load_cache_page = false
|
54
56
|
content = try_load_cache(url)
|
55
57
|
if content
|
56
|
-
log("Load cache: #{url}"
|
58
|
+
log("Load cache: #{url}")
|
57
59
|
@load_cache_page = true
|
58
60
|
return content
|
59
61
|
end
|
60
62
|
|
61
63
|
##### サーバーからロード
|
62
|
-
log("Load server: #{url}"
|
64
|
+
log("Load server: #{url}")
|
63
65
|
uri = URI.parse(url)
|
64
66
|
http = Net::HTTP.new(uri.host, uri.port)
|
65
67
|
if uri.scheme == 'https'
|
@@ -71,7 +73,7 @@ module WebLoader
|
|
71
73
|
@response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
|
72
74
|
rescue Net::ReadTimeout
|
73
75
|
# タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
|
74
|
-
log("Read timeout: #{url}"
|
76
|
+
log("Read timeout: #{url}")
|
75
77
|
if retry_count > 0
|
76
78
|
sleep DEFAULT_SLEEP
|
77
79
|
return load(url, redirect_count , retry_count - 1)
|
@@ -93,14 +95,14 @@ module WebLoader
|
|
93
95
|
end
|
94
96
|
|
95
97
|
if @use_cache || @always_write_cache
|
96
|
-
log("Write cache: #{url}"
|
98
|
+
log("Write cache: #{url}")
|
97
99
|
Cache.write(@cache_dir, url, @response.code, body)
|
98
100
|
end
|
99
101
|
result = body
|
100
102
|
when Net::HTTPRedirection
|
101
103
|
result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
|
102
|
-
|
103
|
-
|
104
|
+
# when Net::HTTPNotFound
|
105
|
+
# result = nil
|
104
106
|
when Net::HTTPTooManyRequests, Net::ReadTimeout
|
105
107
|
# 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
|
106
108
|
if retry_count > 0
|
@@ -108,17 +110,16 @@ module WebLoader
|
|
108
110
|
if @response.is_a?(Net::HTTPTooManyRequests)
|
109
111
|
# HTTPTooManyRequestsならばretry-afterで指定された値を取得。
|
110
112
|
sleep_for = @response.header['retry-after'].to_i + 10
|
111
|
-
log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
113
|
+
log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
112
114
|
else
|
113
|
-
log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
115
|
+
log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
114
116
|
end
|
115
117
|
sleep sleep_for
|
116
118
|
result = load(url, redirect_count , retry_count - 1)
|
117
119
|
end
|
118
|
-
|
120
|
+
else
|
119
121
|
# それ以外は対応した例外を発生
|
120
122
|
log("error #{url}", true)
|
121
|
-
@response.value
|
122
123
|
end
|
123
124
|
result
|
124
125
|
end
|
@@ -130,8 +131,13 @@ module WebLoader
|
|
130
131
|
Cache.load_content(@cache_dir, url)
|
131
132
|
end
|
132
133
|
|
133
|
-
def log(msg, put_log)
|
134
|
-
|
134
|
+
def log(msg, put_log = @verbose)
|
135
|
+
return unless put_log
|
136
|
+
if @logger
|
137
|
+
@logger.info(msg)
|
138
|
+
else
|
139
|
+
puts msg
|
140
|
+
end
|
135
141
|
end
|
136
142
|
end
|
137
143
|
end
|
data/lib/web_loader/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- src
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Web loader.
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extensions: []
|
|
19
19
|
extra_rdoc_files: []
|
20
20
|
files:
|
21
21
|
- ".idea/.gitignore"
|
22
|
+
- ".idea/inspectionProfiles/Project_Default.xml"
|
22
23
|
- ".idea/misc.xml"
|
23
24
|
- ".idea/modules.xml"
|
24
25
|
- ".idea/vcs.xml"
|