web_loader 1.2.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/lib/web_loader/cache.rb +2 -3
- data/lib/web_loader/command.rb +9 -5
- data/lib/web_loader/utils.rb +22 -1
- data/lib/web_loader/version.rb +1 -1
- data/web_loader.iml +9 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25257053739d26c811dc6fe61337deb27582549fcade84cc94da4a2b841cc399
|
4
|
+
data.tar.gz: 5f1fc1172b8252bf552caa9131e495065c5420d8773c9ce944843999c914df4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31d4b8dc8fb95aaa1585e4009183b4a2ac4612aa3d4fe3cf65f8f369340bdf375a3ee410ca4e26d9471d31272f8fc40447e2adccd633b6f8a7000be23f87111e
|
7
|
+
data.tar.gz: e4a6b5b6fee9a8a9f3a971c50fd1345a09674544c90e2ec063290e8cac067e38374f19f2395377cc84bbb8649f107dea64c6a9484dde9a67c5a9c3fa79aaece6
|
data/Gemfile.lock
CHANGED
data/lib/web_loader/cache.rb
CHANGED
@@ -12,7 +12,6 @@ module WebLoader
|
|
12
12
|
class Cache
|
13
13
|
PREFIX = "__cache__"
|
14
14
|
# CACHE_LIMIT = 3600
|
15
|
-
CACHE_LIMIT = 60
|
16
15
|
|
17
16
|
def self.basename(url)
|
18
17
|
Digest::MD5.hexdigest(url)
|
@@ -43,11 +42,11 @@ module WebLoader
|
|
43
42
|
File.write(content_path, content)
|
44
43
|
end
|
45
44
|
|
46
|
-
def self.clear(dir)
|
45
|
+
def self.clear(dir, cache_limit)
|
47
46
|
Dir.glob("#{dir}/#{PREFIX}*.{yml,html}").each do |path|
|
48
47
|
diff = Time.now - File.mtime(path)
|
49
48
|
# 1時間以上昔のキャッシュは使用しない
|
50
|
-
too_old_cache = diff >
|
49
|
+
too_old_cache = diff > cache_limit
|
51
50
|
FileUtils.rm(path) if too_old_cache
|
52
51
|
end
|
53
52
|
end
|
data/lib/web_loader/command.rb
CHANGED
@@ -12,6 +12,7 @@ module WebLoader
|
|
12
12
|
DEFAULT_RETRY = 3
|
13
13
|
DEFAULT_REDIRECT = 10
|
14
14
|
DEFAULT_SLEEP = 10
|
15
|
+
CACHE_LIMIT = 3600 # キャッシュが有効な秒数。デフォルトは1時間とする
|
15
16
|
|
16
17
|
def self.save_image(url, file)
|
17
18
|
# キャッシュせず単に保存する
|
@@ -29,10 +30,12 @@ module WebLoader
|
|
29
30
|
@user_agent = "#{USER_AGENT}/#{VERSION}"
|
30
31
|
@binary = false
|
31
32
|
@verbose = false
|
33
|
+
@cache_limit = CACHE_LIMIT
|
32
34
|
end
|
33
35
|
|
34
36
|
attr_reader :load_cache_page
|
35
37
|
attr_accessor :use_cache, :cache_dir, :binary, :user_agent, :verbose
|
38
|
+
attr_accessor :cache_limit
|
36
39
|
|
37
40
|
def load_retry(url, retry_count = DEFAULT_RETRY)
|
38
41
|
load(url, DEFAULT_REDIRECT, retry_count)
|
@@ -84,10 +87,11 @@ module WebLoader
|
|
84
87
|
encoding = response.type_params['charset']
|
85
88
|
body = toutf8(body, encoding)
|
86
89
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
|
91
|
+
# if @use_cache
|
92
|
+
log("Write cache: #{url}", @verbose)
|
93
|
+
Cache.write(@cache_dir, url, response.code, body)
|
94
|
+
# end
|
91
95
|
result = body
|
92
96
|
when Net::HTTPRedirection
|
93
97
|
result = load(to_redirect_url(uri, response['location']), redirect_count - 1)
|
@@ -116,7 +120,7 @@ module WebLoader
|
|
116
120
|
private
|
117
121
|
def try_load_cache(url)
|
118
122
|
return nil unless @use_cache
|
119
|
-
Cache.clear(@cache_dir)
|
123
|
+
Cache.clear(@cache_dir, @cache_limit)
|
120
124
|
Cache.load_content(@cache_dir, url)
|
121
125
|
end
|
122
126
|
|
data/lib/web_loader/utils.rb
CHANGED
@@ -2,9 +2,30 @@ module WebLoader
|
|
2
2
|
module Utils
|
3
3
|
UTF_8 = 'UTF-8'
|
4
4
|
|
5
|
+
def detect_charset(str)
|
6
|
+
# charsetが指定されていない場合内容からcharsetを判定する
|
7
|
+
# https://learn.microsoft.com/en-us/windows/release-health/status-windows-11-22h2 の場合この処理がないと文字化け
|
8
|
+
# charsetがサーバーから返されず、ASCII-8BITとして判定される。それをKconv.toutf8で変換すると文字化けする
|
9
|
+
# metaタグのcharsetはUTF-8なのでこれを使えば正しいはず
|
10
|
+
charset = nil
|
11
|
+
# Nokogiriの場合 https://qiita.com/tetoralynx/items/273560ad6f75bb685935
|
12
|
+
# <meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i
|
13
|
+
if str =~ /<meta.*?charset=["']*([^"']+)/i
|
14
|
+
charset = $1
|
15
|
+
end
|
16
|
+
charset
|
17
|
+
end
|
18
|
+
# テストのためにmodule_functionを使用
|
19
|
+
module_function :detect_charset
|
20
|
+
|
5
21
|
def toutf8_charset(str, charset)
|
6
22
|
# charsetが指定されていない場合はnil
|
7
|
-
|
23
|
+
if charset.to_s.length == 0
|
24
|
+
charset = detect_charset(str)
|
25
|
+
end
|
26
|
+
if charset.to_s.length == 0
|
27
|
+
return nil
|
28
|
+
end
|
8
29
|
|
9
30
|
result = nil
|
10
31
|
begin
|
data/lib/web_loader/version.rb
CHANGED
data/web_loader.iml
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
2
2
|
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="ModuleRunConfigurationManager">
|
4
|
+
<shared />
|
5
|
+
</component>
|
3
6
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
4
7
|
<exclude-output />
|
5
8
|
<content url="file://$MODULE_DIR$">
|
@@ -8,21 +11,21 @@
|
|
8
11
|
<orderEntry type="inheritedJdk" />
|
9
12
|
<orderEntry type="sourceFolder" forTests="false" />
|
10
13
|
<orderEntry type="module-library">
|
11
|
-
<library name="minitest (v5.
|
14
|
+
<library name="minitest (v5.22.0) [path][gem]" type="rubylib">
|
12
15
|
<properties>
|
13
16
|
<option name="version" value="4" />
|
14
17
|
</properties>
|
15
18
|
<CLASSES>
|
16
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.
|
17
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.
|
19
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/lib" />
|
20
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
|
18
21
|
</CLASSES>
|
19
22
|
<JAVADOC />
|
20
23
|
<SOURCES>
|
21
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.
|
22
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.
|
24
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/lib" />
|
25
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
|
23
26
|
</SOURCES>
|
24
27
|
<excluded>
|
25
|
-
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.
|
28
|
+
<root url="file://$MODULE_DIR$/vendor/bundle/ruby/3.1.0/gems/minitest-5.22.0/test" />
|
26
29
|
</excluded>
|
27
30
|
</library>
|
28
31
|
</orderEntry>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- src
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Web loader.
|
14
14
|
email:
|