kudzu 1.2.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/kudzu/agent/util/mime_type_detector.rb +2 -2
- data/lib/kudzu/agent.rb +9 -1
- data/lib/kudzu/version.rb +1 -1
- data/lib/kudzu.rb +1 -2
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2893a15f2faff7f0697f7ce39f16a6f0e9c0972645c5e19aafb5751153432480
|
4
|
+
data.tar.gz: a8b9b0230be54ca43852e5b0700e1ea329258cbffcba284772f8d025ee14cbcf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 568b1bf560acb842ba3fa768c68078be503f271f7b1eed383ac67e8529bc568fc5ba720fc35f3c4e6973c07beede61baf39710d33ddfc13d01d75dbf14e2b090
|
7
|
+
data.tar.gz: 2537befd681ca91912003aad92b02ae69666afcc9fc816b330064c34547e0b94c5981d86ba369f45644b6161cb1fdbc980aea49aef7ec33e2be5a7b698597545
|
data/README.md
CHANGED
@@ -22,13 +22,13 @@ module Kudzu
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def from_body(body)
|
25
|
-
mime =
|
25
|
+
mime = Marcel::Magic.by_magic(StringIO.new(body))
|
26
26
|
mime.to_s if mime
|
27
27
|
end
|
28
28
|
|
29
29
|
def from_url(url)
|
30
30
|
uri = Addressable::URI.parse(url)
|
31
|
-
mime =
|
31
|
+
mime = Marcel::Magic.by_path(uri.basename)
|
32
32
|
mime.to_s if mime
|
33
33
|
end
|
34
34
|
end
|
data/lib/kudzu/agent.rb
CHANGED
@@ -30,13 +30,21 @@ module Kudzu
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def extract_refs(response)
|
33
|
+
return [] unless redirect_url_allowed?(response)
|
33
34
|
refs = @url_extractor.extract(response)
|
34
35
|
@url_filterer.filter(refs, response.url)
|
35
36
|
end
|
36
37
|
|
37
38
|
def filter_response?(response)
|
38
|
-
return
|
39
|
+
return true unless redirect_url_allowed?(response)
|
39
40
|
!@page_filterer.allowed?(response)
|
40
41
|
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def redirect_url_allowed?(response)
|
46
|
+
return true if response.redirect_from.nil? || response.redirect_from.empty?
|
47
|
+
@url_filterer.allowed?(response.url, response.redirect_from)
|
48
|
+
end
|
41
49
|
end
|
42
50
|
end
|
data/lib/kudzu/version.rb
CHANGED
data/lib/kudzu.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kudzu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yoshikazu Kaneta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -53,7 +53,7 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: marcel
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: webrick
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: rspec-rails
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -214,7 +228,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
214
228
|
- !ruby/object:Gem::Version
|
215
229
|
version: '0'
|
216
230
|
requirements: []
|
217
|
-
rubygems_version: 3.
|
231
|
+
rubygems_version: 3.3.3
|
218
232
|
signing_key:
|
219
233
|
specification_version: 4
|
220
234
|
summary: A simple web crawler for ruby
|