web_loader 1.2.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa46cae0813efddd510f37385b7e1f3b4e78288405586af295b1ec6dec2a86af
4
- data.tar.gz: f1f9caa1a7b767394d201abbc32ba05d7663a6e1c3af803112db252dc5b163c8
3
+ metadata.gz: 25257053739d26c811dc6fe61337deb27582549fcade84cc94da4a2b841cc399
4
+ data.tar.gz: 5f1fc1172b8252bf552caa9131e495065c5420d8773c9ce944843999c914df4f
5
5
  SHA512:
6
- metadata.gz: 7cbf32092d6ad8d26954b3c3dc97a4f32ec930b9598b2cc326d5dffa39d3367fde8575757a2e878ebc6eac9c13155b082f121f730fb8884506beab2b118e383a
7
- data.tar.gz: 3a38e4744ace8a488fcd4c2af85c2e19ec453de8e6d85573ddafc3bebff5a4da66ea50c711ae7a5a24349b3d5f90811d831e247c02b8c7f64b73e7a5b8679a1e
6
+ metadata.gz: 31d4b8dc8fb95aaa1585e4009183b4a2ac4612aa3d4fe3cf65f8f369340bdf375a3ee410ca4e26d9471d31272f8fc40447e2adccd633b6f8a7000be23f87111e
7
+ data.tar.gz: e4a6b5b6fee9a8a9f3a971c50fd1345a09674544c90e2ec063290e8cac067e38374f19f2395377cc84bbb8649f107dea64c6a9484dde9a67c5a9c3fa79aaece6
data/Gemfile.lock CHANGED
@@ -1,12 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_loader (1.2.1)
4
+ web_loader (1.4.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- minitest (5.20.0)
9
+ minitest (5.22.2)
10
10
  rake (13.1.0)
11
11
 
12
12
  PLATFORMS
@@ -12,7 +12,6 @@ module WebLoader
12
12
  class Cache
13
13
  PREFIX = "__cache__"
14
14
  # CACHE_LIMIT = 3600
15
- CACHE_LIMIT = 60
16
15
 
17
16
  def self.basename(url)
18
17
  Digest::MD5.hexdigest(url)
@@ -43,11 +42,11 @@ module WebLoader
43
42
  File.write(content_path, content)
44
43
  end
45
44
 
46
- def self.clear(dir)
45
+ def self.clear(dir, cache_limit)
47
46
  Dir.glob("#{dir}/#{PREFIX}*.{yml,html}").each do |path|
48
47
  diff = Time.now - File.mtime(path)
49
48
  # 1時間以上昔のキャッシュは使用しない
50
- too_old_cache = diff > CACHE_LIMIT
49
+ too_old_cache = diff > cache_limit
51
50
  FileUtils.rm(path) if too_old_cache
52
51
  end
53
52
  end
@@ -12,6 +12,7 @@ module WebLoader
12
12
  DEFAULT_RETRY = 3
13
13
  DEFAULT_REDIRECT = 10
14
14
  DEFAULT_SLEEP = 10
15
+ CACHE_LIMIT = 3600 # キャッシュが有効な秒数。デフォルトは1時間とする
15
16
 
16
17
  def self.save_image(url, file)
17
18
  # キャッシュせず単に保存する
@@ -29,10 +30,12 @@ module WebLoader
29
30
  @user_agent = "#{USER_AGENT}/#{VERSION}"
30
31
  @binary = false
31
32
  @verbose = false
33
+ @cache_limit = CACHE_LIMIT
32
34
  end
33
35
 
34
36
  attr_reader :load_cache_page
35
37
  attr_accessor :use_cache, :cache_dir, :binary, :user_agent, :verbose
38
+ attr_accessor :cache_limit
36
39
 
37
40
  def load_retry(url, retry_count = DEFAULT_RETRY)
38
41
  load(url, DEFAULT_REDIRECT, retry_count)
@@ -84,10 +87,11 @@ module WebLoader
84
87
  encoding = response.type_params['charset']
85
88
  body = toutf8(body, encoding)
86
89
  end
87
- if @use_cache
88
- log("Write cache: #{url}", @verbose)
89
- Cache.write(@cache_dir, url, response.code, body)
90
- end
90
+
91
+ # if @use_cache
92
+ log("Write cache: #{url}", @verbose)
93
+ Cache.write(@cache_dir, url, response.code, body)
94
+ # end
91
95
  result = body
92
96
  when Net::HTTPRedirection
93
97
  result = load(to_redirect_url(uri, response['location']), redirect_count - 1)
@@ -116,7 +120,7 @@ module WebLoader
116
120
  private
117
121
  def try_load_cache(url)
118
122
  return nil unless @use_cache
119
- Cache.clear(@cache_dir)
123
+ Cache.clear(@cache_dir, @cache_limit)
120
124
  Cache.load_content(@cache_dir, url)
121
125
  end
122
126
 
@@ -2,9 +2,30 @@ module WebLoader
2
2
  module Utils
3
3
  UTF_8 = 'UTF-8'
4
4
 
5
+ def detect_charset(str)
6
+ # charsetが指定されていない場合内容からcharsetを判定する
7
+ # https://learn.microsoft.com/en-us/windows/release-health/status-windows-11-22h2 の場合この処理がないと文字化け
8
+ # charsetがサーバーから返されず、ASCII-8BITとして判定される。それをKconv.toutf8で変換すると文字化けする
9
+ # metaタグのcharsetはUTF-8なのでこれを使えば正しいはず
10
+ charset = nil
11
+ # Nokogiriの場合 https://qiita.com/tetoralynx/items/273560ad6f75bb685935
12
+ # <meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i
13
+ if str =~ /<meta.*?charset=["']*([^"']+)/i
14
+ charset = $1
15
+ end
16
+ charset
17
+ end
18
+ # テストのためにmodule_functionを使用
19
+ module_function :detect_charset
20
+
5
21
  def toutf8_charset(str, charset)
6
22
  # charsetが指定されていない場合はnil
7
- return nil if charset.to_s.length == 0
23
+ if charset.to_s.length == 0
24
+ charset = detect_charset(str)
25
+ end
26
+ if charset.to_s.length == 0
27
+ return nil
28
+ end
8
29
 
9
30
  result = nil
10
31
  begin
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WebLoader
4
- VERSION = "1.2.1"
4
+ VERSION = "1.4.0"
5
5
  end
data/web_loader.iml CHANGED
@@ -1,5 +1,8 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
2
  <module type="RUBY_MODULE" version="4">
3
+ <component name="ModuleRunConfigurationManager">
4
+ <shared />
5
+ </component>
3
6
  <component name="NewModuleRootManager" inherit-compiler-output="true">
4
7
  <exclude-output />
5
8
  <content url="file://$MODULE_DIR$">
@@ -8,21 +11,21 @@
8
11
  <orderEntry type="inheritedJdk" />
9
12
  <orderEntry type="sourceFolder" forTests="false" />
10
13
  <orderEntry type="module-library">
11
- <library name="minitest (v5.20.0) [path][gem]" type="rubylib">
14
+ <library name="minitest (v5.22.0) [path][gem]" type="rubylib">
12
15
  <properties>
13
16
  <option name="version" value="4" />
14
17
  </properties>
15
18
  <CLASSES>
16
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/lib" />
17
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/test" />
19
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/lib" />
20
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
18
21
  </CLASSES>
19
22
  <JAVADOC />
20
23
  <SOURCES>
21
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/lib" />
22
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/test" />
24
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/lib" />
25
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
23
26
  </SOURCES>
24
27
  <excluded>
25
- <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.20.0/test" />
28
+ <root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
26
29
  </excluded>
27
30
  </library>
28
31
  </orderEntry>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - src
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-23 00:00:00.000000000 Z
11
+ date: 2024-02-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Web loader.
14
14
  email: