web_loader 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 972dc054f042a2f2da8e3e84f8dc0963343765bd316c11ed285302551c071de0
4
- data.tar.gz: 8711c2f6d20926d94960e0ad924aad1c6931551a2f1e30c9fdbc6c89d285bf15
3
+ metadata.gz: af7dbb07ea7ffe94d18c04d91c1f53564123b6ed794945d04ed2dd441483f2ad
4
+ data.tar.gz: 3e1be0e448488fb0fc8cdf0aa53e5751c0f1fc33261315b3a53882d356e42492
5
5
  SHA512:
6
- metadata.gz: e7e52952ef16318d2e792521044dcd614c8828311bc49557b67948f843ce4934b7c5c79ccdbc53357ce63a3deeb6f7ad32ec6201bb274386b7bf481f5b31956e
7
- data.tar.gz: b32129726e055b55b26fda3f3f9ebac71f7b2b3f3ca916a2c612dc1439195ac40052d0d7015b31663a4184fe7a279934b6fbb372ddae9cf0193cdb93a3ab7545
6
+ metadata.gz: 6c1cc94232fdca9458d416c90ce6832fdb08be2a6a90f15705645711566de931ca76a47650ce241589d76620ccfe9c0af02c069e7abd990ad2f6a2643220bb14
7
+ data.tar.gz: 227a97a262168cdc5a1f473901d274b52ef60f388ff89ca3fda07c24f725f5c977ff343a9f24d8bdc70634ae0f916ff84a98b1215f1129f5e6264999b05df007
@@ -0,0 +1,6 @@
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="RbsMissingTypeSignature" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
5
+ </profile>
6
+ </component>
data/Gemfile.lock CHANGED
@@ -1,12 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_loader (1.5.0)
4
+ web_loader (1.7.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- minitest (5.22.2)
9
+ minitest (5.22.3)
10
10
  rake (13.1.0)
11
11
 
12
12
  PLATFORMS
@@ -33,6 +33,7 @@ module WebLoader
33
33
  @cache_limit = CACHE_LIMIT
34
34
  @always_write_cache = false
35
35
  @response = nil
36
+ @logger = nil
36
37
  end
37
38
 
38
39
  attr_reader :load_cache_page
@@ -40,6 +41,7 @@ module WebLoader
40
41
  attr_accessor :cache_limit
41
42
  attr_accessor :always_write_cache
42
43
  attr_reader :response
44
+ attr_accessor :logger
43
45
 
44
46
  def load_retry(url, retry_count = DEFAULT_RETRY)
45
47
  load(url, DEFAULT_REDIRECT, retry_count)
@@ -47,19 +49,19 @@ module WebLoader
47
49
 
48
50
  def load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0)
49
51
  raise ArgumentError, 'HTTP redirect too deep' if redirect_count == 0
50
- log("Load: #{url}", @verbose)
52
+ log("Load: #{url}")
51
53
 
52
54
  ##### キャッシュの読み込み
53
55
  @load_cache_page = false
54
56
  content = try_load_cache(url)
55
57
  if content
56
- log("Load cache: #{url}", @verbose)
58
+ log("Load cache: #{url}")
57
59
  @load_cache_page = true
58
60
  return content
59
61
  end
60
62
 
61
63
  ##### サーバーからロード
62
- log("Load server: #{url}", @verbose)
64
+ log("Load server: #{url}")
63
65
  uri = URI.parse(url)
64
66
  http = Net::HTTP.new(uri.host, uri.port)
65
67
  if uri.scheme == 'https'
@@ -71,7 +73,7 @@ module WebLoader
71
73
  @response = http.get(uri.request_uri, 'User-Agent' => @user_agent) # request_uri=path + '?' + query
72
74
  rescue Net::ReadTimeout
73
75
  # タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
74
- log("Read timeout: #{url}", @verbose)
76
+ log("Read timeout: #{url}")
75
77
  if retry_count > 0
76
78
  sleep DEFAULT_SLEEP
77
79
  return load(url, redirect_count , retry_count - 1)
@@ -93,14 +95,14 @@ module WebLoader
93
95
  end
94
96
 
95
97
  if @use_cache || @always_write_cache
96
- log("Write cache: #{url}", @verbose)
98
+ log("Write cache: #{url}")
97
99
  Cache.write(@cache_dir, url, @response.code, body)
98
100
  end
99
101
  result = body
100
102
  when Net::HTTPRedirection
101
103
  result = load(to_redirect_url(uri, @response['location']), redirect_count - 1)
102
- # when Net::HTTPNotFound
103
- # result = nil
104
+ # when Net::HTTPNotFound
105
+ # result = nil
104
106
  when Net::HTTPTooManyRequests, Net::ReadTimeout
105
107
  # 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
106
108
  if retry_count > 0
@@ -108,17 +110,16 @@ module WebLoader
108
110
  if @response.is_a?(Net::HTTPTooManyRequests)
109
111
  # HTTPTooManyRequestsならばretry-afterで指定された値を取得。
110
112
  sleep_for = @response.header['retry-after'].to_i + 10
111
- log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).", @verbose)
113
+ log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
112
114
  else
113
- log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).", @verbose)
115
+ log("Unknown response: #{uri} #{@response.inspect}. Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
114
116
  end
115
117
  sleep sleep_for
116
118
  result = load(url, redirect_count , retry_count - 1)
117
119
  end
118
-
120
+ else
119
121
  # それ以外は対応した例外を発生
120
122
  log("error #{url}", true)
121
- @response.value
122
123
  end
123
124
  result
124
125
  end
@@ -130,8 +131,13 @@ module WebLoader
130
131
  Cache.load_content(@cache_dir, url)
131
132
  end
132
133
 
133
- def log(msg, put_log)
134
- puts msg if put_log
134
+ def log(msg, put_log = @verbose)
135
+ return unless put_log
136
+ if @logger
137
+ @logger.info(msg)
138
+ else
139
+ puts msg
140
+ end
135
141
  end
136
142
  end
137
143
  end
@@ -13,6 +13,10 @@ module WebLoader
13
13
  if str =~ /<meta.*?charset=["']*([^"']+)/i
14
14
  charset = $1
15
15
  end
16
+ if charset =~ /Shift_JIS/i
17
+ # Shift_JISの場合、実際はWindows-31J(Windowsの標準コードの場合が多いはず)
18
+ charset = "Windows-31J"
19
+ end
16
20
  charset
17
21
  end
18
22
  # テストのためにmodule_functionを使用
@@ -45,6 +49,7 @@ module WebLoader
45
49
  end
46
50
  result
47
51
  end
52
+ module_function :toutf8_charset
48
53
 
49
54
  def toutf8(str, charset)
50
55
  # 2022/04/04(月)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WebLoader
4
- VERSION = "1.5.0"
4
+ VERSION = "1.7.0"
5
5
  end
data/web_loader.iml CHANGED
@@ -11,21 +11,21 @@
11
11
  <orderEntry type="inheritedJdk" />
12
12
  <orderEntry type="sourceFolder" forTests="false" />
13
13
  <orderEntry type="module-library">
14
- <library name="minitest (v5.22.2) [path][gem]" type="rubylib">
14
+ <library name="minitest (v5.22.3) [path][gem]" type="rubylib">
15
15
  <properties>
16
16
  <option name="version" value="4" />
17
17
  </properties>
18
18
  <CLASSES>
19
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.2/lib" />
20
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.2/test" />
19
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/lib" />
20
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/test" />
21
21
  </CLASSES>
22
22
  <JAVADOC />
23
23
  <SOURCES>
24
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.2/lib" />
25
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.2/test" />
24
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/lib" />
25
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/test" />
26
26
  </SOURCES>
27
27
  <excluded>
28
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.2/test" />
28
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.3/test" />
29
29
  </excluded>
30
30
  </library>
31
31
  </orderEntry>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - src
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-26 00:00:00.000000000 Z
11
+ date: 2024-03-29 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Web loader.
14
14
  email:
@@ -19,6 +19,7 @@ extensions: []
19
19
  extra_rdoc_files: []
20
20
  files:
21
21
  - ".idea/.gitignore"
22
+ - ".idea/inspectionProfiles/Project_Default.xml"
22
23
  - ".idea/misc.xml"
23
24
  - ".idea/modules.xml"
24
25
  - ".idea/vcs.xml"