kudzu 1.1.6 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kudzu/agent/util/mime_type_detector.rb +2 -2
- data/lib/kudzu/model/page.rb +19 -7
- data/lib/kudzu/version.rb +1 -1
- data/lib/kudzu.rb +2 -1
- metadata +3 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: adc0bdb2f1017f8f4e8abcc2f0e6416c88b2110ce9f345957c6233484505509c
|
4
|
+
data.tar.gz: 49a6c3166b4d499177a144987623d17e516c4a6d84674af81d883dddbfee3b5b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7eba18403646beab304d92b9826bb774f4d2b2f59b365402b2aeadb3ae5e05a50f0604a5c8ba7ccfe63176db82ea85bf7759697b1069adda860a8c2e4ef56bff
|
7
|
+
data.tar.gz: 456b279651849f18308de4ab93cf76e4644988fdbb5ac023fa32b6d5cc163298236e374c8323227d2540854f4579d08e137caf844bf49c41235dd5a112b9dc6f
|
@@ -22,13 +22,13 @@ module Kudzu
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def from_body(body)
|
25
|
-
mime =
|
25
|
+
mime = MimeMagic.by_magic(StringIO.new(body))
|
26
26
|
mime.to_s if mime
|
27
27
|
end
|
28
28
|
|
29
29
|
def from_url(url)
|
30
30
|
uri = Addressable::URI.parse(url)
|
31
|
-
mime =
|
31
|
+
mime = MimeMagic.by_path(uri.basename)
|
32
32
|
mime.to_s if mime
|
33
33
|
end
|
34
34
|
end
|
data/lib/kudzu/model/page.rb
CHANGED
@@ -91,9 +91,9 @@ module Kudzu
|
|
91
91
|
private
|
92
92
|
|
93
93
|
def decode_body(body)
|
94
|
-
if text?
|
95
|
-
if find_encoding
|
96
|
-
body.dup.force_encoding(
|
94
|
+
if body && text?
|
95
|
+
if enc = find_encoding(body)
|
96
|
+
body.dup.force_encoding(enc).encode('utf-8', invalid: :replace, undef: :replace)
|
97
97
|
else
|
98
98
|
body.dup.encode('utf-8', invalid: :replace, undef: :replace)
|
99
99
|
end
|
@@ -102,10 +102,22 @@ module Kudzu
|
|
102
102
|
end
|
103
103
|
end
|
104
104
|
|
105
|
-
def find_encoding
|
106
|
-
|
107
|
-
|
108
|
-
|
105
|
+
def find_encoding(body)
|
106
|
+
begin
|
107
|
+
enc = Encoding.find(charset)
|
108
|
+
rescue ArgumentError
|
109
|
+
return nil
|
110
|
+
end
|
111
|
+
|
112
|
+
if enc == Encoding::Shift_JIS
|
113
|
+
Encoding::CP932
|
114
|
+
elsif enc == Encoding::EUC_JP
|
115
|
+
require 'nkf'
|
116
|
+
guessed = NKF.guess(body)
|
117
|
+
[Encoding::EUCJP_MS, Encoding::CP51932].include?(guessed) ? guessed : enc
|
118
|
+
else
|
119
|
+
enc
|
120
|
+
end
|
109
121
|
end
|
110
122
|
end
|
111
123
|
end
|
data/lib/kudzu/version.rb
CHANGED
data/lib/kudzu.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kudzu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yoshikazu Kaneta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -53,21 +53,7 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: mime-types
|
56
|
+
name: mimemagic
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - ">="
|