web_loader 1.5.0 → 1.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.idea/inspectionProfiles/Project_Default.xml +6 -0
- data/Gemfile.lock +2 -2
- data/lib/web_loader/command.rb +19 -13
- data/lib/web_loader/utils.rb +5 -0
- data/lib/web_loader/version.rb +1 -1
- data/web_loader.iml +6 -6
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af7dbb07ea7ffe94d18c04d91c1f53564123b6ed794945d04ed2dd441483f2ad
|
4
|
+
data.tar.gz: 3e1be0e448488fb0fc8cdf0aa53e5751c0f1fc33261315b3a53882d356e42492
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c1cc94232fdca9458d416c90ce6832fdb08be2a6a90f15705645711566de931ca76a47650ce241589d76620ccfe9c0af02c069e7abd990ad2f6a2643220bb14
|
7
|
+
data.tar.gz: 227a97a262168cdc5a1f473901d274b52ef60f388ff89ca3fda07c24f725f5c977ff343a9f24d8bdc70634ae0f916ff84a98b1215f1129f5e6264999b05df007
|
@@ -0,0 +1,6 @@
|
|
1
|
+
<component name="InspectionProjectProfileManager">
|
2
|
+
<profile version="1.0">
|
3
|
+
<option name="myName" value="Project Default" />
|
4
|
+
<inspection_tool class="RbsMissingTypeSignature" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
5
|
+
</profile>
|
6
|
+
</component>
|
data/Gemfile.lock
CHANGED
data/lib/web_loader/command.rb
CHANGED
@@ -33,6 +33,7 @@ module WebLoader
|
|
33
33
|
@cache_limit = CACHE_LIMIT
|
34
34
|
@always_write_cache = false
|
35
35
|
@response = nil
|
36
|
+
@logger = nil
|
36
37
|
end
|
37
38
|
|
38
39
|
attr_reader :load_cache_page
|
@@ -40,6 +41,7 @@ module WebLoader
|
|
40
41
|
attr_accessor :cache_limit
|
41
42
|
attr_accessor :always_write_cache
|
42
43
|
attr_reader :response
|
44
|
+
attr_accessor :logger
|
43
45
|
|
44
46
|
def load_retry(url, retry_count = DEFAULT_RETRY)
|
45
47
|
load(url, DEFAULT_REDIRECT, retry_count)
|
@@ -47,19 +49,19 @@ module WebLoader
|
|
47
49
|
|
48
50
|
def load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0)
|
49
51
|
raise ArgumentError, 'HTTP redirect too deep' if redirect_count == 0
|
50
|
-
log("Load: #{url}"
|
52
|
+
log("Load: #{url}")
|
51
53
|
|
52
54
|
##### キャッシュの読み込み
|
53
55
|
@load_cache_page = false
|
54
56
|
content = try_load_cache(url)
|
55
57
|
if content
|
56
|
-
log("Load cache: #{url}"
|
58
|
+
log("Load cache: #{url}")
|
57
59
|
@load_cache_page = true
|
58
60
|
return content
|
59
61
|
end
|
60
62
|
|
61
63
|
##### サーバーからロード
|
62
|
-
log("Load server: #{url}"
|
64
|
+
log("Load server: #{url}")
|
63
65
|
uri = URI.parse(url)
|
64
66
|
http = Net::HTTP.new(uri.host, uri.port)
|
65
67
|
if uri.scheme == 'https'
|
@@ -71,7 +73,7 @@ module WebLoader
|
|
71
73
|
@response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
|
72
74
|
rescue Net::ReadTimeout
|
73
75
|
# タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
|
74
|
-
log("Read timeout: #{url}"
|
76
|
+
log("Read timeout: #{url}")
|
75
77
|
if retry_count > 0
|
76
78
|
sleep DEFAULT_SLEEP
|
77
79
|
return load(url, redirect_count , retry_count - 1)
|
@@ -93,14 +95,14 @@ module WebLoader
|
|
93
95
|
end
|
94
96
|
|
95
97
|
if @use_cache || @always_write_cache
|
96
|
-
log("Write cache: #{url}"
|
98
|
+
log("Write cache: #{url}")
|
97
99
|
Cache.write(@cache_dir, url, @response.code, body)
|
98
100
|
end
|
99
101
|
result = body
|
100
102
|
when Net::HTTPRedirection
|
101
103
|
result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
|
102
|
-
|
103
|
-
|
104
|
+
# when Net::HTTPNotFound
|
105
|
+
# result = nil
|
104
106
|
when Net::HTTPTooManyRequests, Net::ReadTimeout
|
105
107
|
# 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
|
106
108
|
if retry_count > 0
|
@@ -108,17 +110,16 @@ module WebLoader
|
|
108
110
|
if @response.is_a?(Net::HTTPTooManyRequests)
|
109
111
|
# HTTPTooManyRequestsならばretry-afterで指定された値を取得。
|
110
112
|
sleep_for = @response.header['retry-after'].to_i + 10
|
111
|
-
log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
113
|
+
log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
112
114
|
else
|
113
|
-
log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count})."
|
115
|
+
log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
|
114
116
|
end
|
115
117
|
sleep sleep_for
|
116
118
|
result = load(url, redirect_count , retry_count - 1)
|
117
119
|
end
|
118
|
-
|
120
|
+
else
|
119
121
|
# それ以外は対応した例外を発生
|
120
122
|
log("error #{url}", true)
|
121
|
-
@response.value
|
122
123
|
end
|
123
124
|
result
|
124
125
|
end
|
@@ -130,8 +131,13 @@ module WebLoader
|
|
130
131
|
Cache.load_content(@cache_dir, url)
|
131
132
|
end
|
132
133
|
|
133
|
-
def log(msg, put_log)
|
134
|
-
|
134
|
+
def log(msg, put_log = @verbose)
|
135
|
+
return unless put_log
|
136
|
+
if @logger
|
137
|
+
@logger.info(msg)
|
138
|
+
else
|
139
|
+
puts msg
|
140
|
+
end
|
135
141
|
end
|
136
142
|
end
|
137
143
|
end
|
data/lib/web_loader/utils.rb
CHANGED
@@ -13,6 +13,10 @@ module WebLoader
|
|
13
13
|
if str =~ /<meta.*?charset=["']*([^"']+)/i
|
14
14
|
charset = $1
|
15
15
|
end
|
16
|
+
if charset =~ /Shift_JIS/i
|
17
|
+
# Shift_JISの場合、実際はWindows-31J(Windowsの標準コードの場合が多いはず)
|
18
|
+
charset = "Windows-31J"
|
19
|
+
end
|
16
20
|
charset
|
17
21
|
end
|
18
22
|
# テストのためにmodule_functionを使用
|
@@ -45,6 +49,7 @@ module WebLoader
|
|
45
49
|
end
|
46
50
|
result
|
47
51
|
end
|
52
|
+
module_function :toutf8_charset
|
48
53
|
|
49
54
|
def toutf8(str, charset)
|
50
55
|
# 2022/04/04(月)
|
data/lib/web_loader/version.rb
CHANGED
data/web_loader.iml
CHANGED
@@ -11,21 +11,21 @@
|
|
11
11
|
<orderEntry type="inheritedJdk" />
|
12
12
|
<orderEntry type="sourceFolder" forTests="false" />
|
13
13
|
<orderEntry type="module-library">
|
14
|
-
<library name="minitest (v5.22.
|
14
|
+
<library name="minitest (v5.22.3) [path][gem]" type="rubylib">
|
15
15
|
<properties>
|
16
16
|
<option name="version" value="4" />
|
17
17
|
</properties>
|
18
18
|
<CLASSES>
|
19
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.
|
20
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.
|
19
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/lib" />
|
20
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/test" />
|
21
21
|
</CLASSES>
|
22
22
|
<JAVADOC />
|
23
23
|
<SOURCES>
|
24
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.
|
25
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.
|
24
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/lib" />
|
25
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/test" />
|
26
26
|
</SOURCES>
|
27
27
|
<excluded>
|
28
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.
|
28
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/test" />
|
29
29
|
</excluded>
|
30
30
|
</library>
|
31
31
|
</orderEntry>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- src
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Web loader.
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extensions: []
|
|
19
19
|
extra_rdoc_files: []
|
20
20
|
files:
|
21
21
|
- ".idea/.gitignore"
|
22
|
+
- ".idea/inspectionProfiles/Project_Default.xml"
|
22
23
|
- ".idea/misc.xml"
|
23
24
|
- ".idea/modules.xml"
|
24
25
|
- ".idea/vcs.xml"
|