web_loader 1.2.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa46cae0813efddd510f37385b7e1f3b4e78288405586af295b1ec6dec2a86af
4
- data.tar.gz: f1f9caa1a7b767394d201abbc32ba05d7663a6e1c3af803112db252dc5b163c8
3
+ metadata.gz: 25257053739d26c811dc6fe61337deb27582549fcade84cc94da4a2b841cc399
4
+ data.tar.gz: 5f1fc1172b8252bf552caa9131e495065c5420d8773c9ce944843999c914df4f
5
5
  SHA512:
6
- metadata.gz: 7cbf32092d6ad8d26954b3c3dc97a4f32ec930b9598b2cc326d5dffa39d3367fde8575757a2e878ebc6eac9c13155b082f121f730fb8884506beab2b118e383a
7
- data.tar.gz: 3a38e4744ace8a488fcd4c2af85c2e19ec453de8e6d85573ddafc3bebff5a4da66ea50c711ae7a5a24349b3d5f90811d831e247c02b8c7f64b73e7a5b8679a1e
6
+ metadata.gz: 31d4b8dc8fb95aaa1585e4009183b4a2ac4612aa3d4fe3cf65f8f369340bdf375a3ee410ca4e26d9471d31272f8fc40447e2adccd633b6f8a7000be23f87111e
7
+ data.tar.gz: e4a6b5b6fee9a8a9f3a971c50fd1345a09674544c90e2ec063290e8cac067e38374f19f2395377cc84bbb8649f107dea64c6a9484dde9a67c5a9c3fa79aaece6
data/Gemfile.lock CHANGED
@@ -1,12 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_loader (1.2.1)
4
+ web_loader (1.4.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- minitest (5.20.0)
9
+ minitest (5.22.2)
10
10
  rake (13.1.0)
11
11
 
12
12
  PLATFORMS
@@ -12,7 +12,6 @@ module WebLoader
12
12
  class Cache
13
13
  PREFIX = "__cache__"
14
14
  # CACHE_LIMIT = 3600
15
- CACHE_LIMIT = 60
16
15
 
17
16
  def self.basename(url)
18
17
  Digest::MD5.hexdigest(url)
@@ -43,11 +42,11 @@ module WebLoader
43
42
  File.write(content_path, content)
44
43
  end
45
44
 
46
- def self.clear(dir)
45
+ def self.clear(dir, cache_limit)
47
46
  Dir.glob("#{dir}/#{PREFIX}*.{yml,html}").each do |path|
48
47
  diff = Time.now - File.mtime(path)
49
48
  # 1時間以上昔のキャッシュは使用しない
50
- too_old_cache = diff > CACHE_LIMIT
49
+ too_old_cache = diff > cache_limit
51
50
  FileUtils.rm(path) if too_old_cache
52
51
  end
53
52
  end
@@ -12,6 +12,7 @@ module WebLoader
12
12
  DEFAULT_RETRY = 3
13
13
  DEFAULT_REDIRECT = 10
14
14
  DEFAULT_SLEEP = 10
15
+ CACHE_LIMIT = 3600 # キャッシュが有効な秒数。デフォルトは1時間とする
15
16
 
16
17
  def self.save_image(url, file)
17
18
  # キャッシュせず単に保存する
@@ -29,10 +30,12 @@ module WebLoader
29
30
  @user_agent = "#{USER_AGENT}/#{VERSION}"
30
31
  @binary = false
31
32
  @verbose = false
33
+ @cache_limit = CACHE_LIMIT
32
34
  end
33
35
 
34
36
  attr_reader :load_cache_page
35
37
  attr_accessor :use_cache, :cache_dir, :binary, :user_agent, :verbose
38
+ attr_accessor :cache_limit
36
39
 
37
40
  def load_retry(url, retry_count = DEFAULT_RETRY)
38
41
  load(url, DEFAULT_REDIRECT, retry_count)
@@ -84,10 +87,11 @@ module WebLoader
84
87
  encoding = response.type_params['charset']
85
88
  body = toutf8(body, encoding)
86
89
  end
87
- if @use_cache
88
- log("Write cache: #{url}", @verbose)
89
- Cache.write(@cache_dir, url, response.code, body)
90
- end
90
+
91
+ # if @use_cache
92
+ log("Write cache: #{url}", @verbose)
93
+ Cache.write(@cache_dir, url, response.code, body)
94
+ # end
91
95
  result = body
92
96
  when Net::HTTPRedirection
93
97
  result = load(to_redirect_url(uri, response['location']), redirect_count - 1)
@@ -116,7 +120,7 @@ module WebLoader
116
120
  private
117
121
  def try_load_cache(url)
118
122
  return nil unless @use_cache
119
- Cache.clear(@cache_dir)
123
+ Cache.clear(@cache_dir, @cache_limit)
120
124
  Cache.load_content(@cache_dir, url)
121
125
  end
122
126
 
@@ -2,9 +2,30 @@ module WebLoader
2
2
  module Utils
3
3
  UTF_8 = 'UTF-8'
4
4
 
5
+ def detect_charset(str)
6
+ # charsetが指定されていない場合内容からcharsetを判定する
7
+ # https://learn.microsoft.com/en-us/windows/release-health/status-windows-11-22h2 の場合この処理がないと文字化け
8
+ # charsetがサーバーから返されず、ASCII-8BITとして判定される。それをKconv.toutf8で変換すると文字化けする
9
+ # metaタグのcharsetはUTF-8なのでこれを使えば正しいはず
10
+ charset = nil
11
+ # Nokogiriの場合 https://qiita.com/tetoralynx/items/273560ad6f75bb685935
12
+ # <meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i
13
+ if str =~ /<meta.*?charset=["']*([^"']+)/i
14
+ charset = $1
15
+ end
16
+ charset
17
+ end
18
+ # テストのためにmodule_functionを使用
19
+ module_function :detect_charset
20
+
5
21
  def toutf8_charset(str, charset)
6
22
  # charsetが指定されていない場合はnil
7
- return nil if charset.to_s.length == 0
23
+ if charset.to_s.length == 0
24
+ charset = detect_charset(str)
25
+ end
26
+ if charset.to_s.length == 0
27
+ return nil
28
+ end
8
29
 
9
30
  result = nil
10
31
  begin
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WebLoader
4
- VERSION = "1.2.1"
4
+ VERSION = "1.4.0"
5
5
  end
data/web_loader.iml CHANGED
@@ -1,5 +1,8 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
2
  <module type="RUBY_MODULE" version="4">
3
+ <component name="ModuleRunConfigurationManager">
4
+ <shared />
5
+ </component>
3
6
  <component name="NewModuleRootManager" inherit-compiler-output="true">
4
7
  <exclude-output />
5
8
  <content url="file://$MODULE_DIR$">
@@ -8,21 +11,21 @@
8
11
  <orderEntry type="inheritedJdk" />
9
12
  <orderEntry type="sourceFolder" forTests="false" />
10
13
  <orderEntry type="module-library">
11
- <library name="minitest (v5.20.0) [path][gem]" type="rubylib">
14
+ <library name="minitest (v5.22.0) [path][gem]" type="rubylib">
12
15
  <properties>
13
16
  <option name="version" value="4" />
14
17
  </properties>
15
18
  <CLASSES>
16
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/lib" />
17
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/test" />
19
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/lib" />
20
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
18
21
  </CLASSES>
19
22
  <JAVADOC />
20
23
  <SOURCES>
21
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/lib" />
22
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/test" />
24
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/lib" />
25
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
23
26
  </SOURCES>
24
27
  <excluded>
25
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/test" />
28
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
26
29
  </excluded>
27
30
  </library>
28
31
  </orderEntry>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - src
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-23 00:00:00.000000000 Z
11
+ date: 2024-02-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Web loader.
14
14
  email: