kudzu 1.2.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 232a54685462f7415227ddf7ce2efd31d0046cb5bf7756ffbffb0b473836d45d
4
- data.tar.gz: e601657db196282b54e89633eebbaa4633ae1f7951fd0e7a3aa4b6ea00109d48
3
+ metadata.gz: 2893a15f2faff7f0697f7ce39f16a6f0e9c0972645c5e19aafb5751153432480
4
+ data.tar.gz: a8b9b0230be54ca43852e5b0700e1ea329258cbffcba284772f8d025ee14cbcf
5
5
  SHA512:
6
- metadata.gz: 5a3dabef241ebaed7b9a80ed11abd8bbaee63e09fe674743931c4f2c36ca44e436be6e5b44ed44e68cc30899ff080faf803f5e2834074fdc3dfc9abba100248f
7
- data.tar.gz: fa024d77336f25d03025d056a2d40f7c16f302658cbcf21afba5a637c9f14ab29f353c135b4c90217f6de46cbc9edb52a4bd0284982d264019762b4978b69c67
6
+ metadata.gz: 568b1bf560acb842ba3fa768c68078be503f271f7b1eed383ac67e8529bc568fc5ba720fc35f3c4e6973c07beede61baf39710d33ddfc13d01d75dbf14e2b090
7
+ data.tar.gz: 2537befd681ca91912003aad92b02ae69666afcc9fc816b330064c34547e0b94c5981d86ba369f45644b6161cb1fdbc980aea49aef7ec33e2be5a7b698597545
data/README.md CHANGED
@@ -12,7 +12,7 @@ A simple web crawler for ruby.
12
12
 
13
13
  ## Dependencies
14
14
 
15
- * ruby 2.3+
15
+ * ruby 2.5+
16
16
  * libicu
17
17
 
18
18
  ## Installation
@@ -22,13 +22,13 @@ module Kudzu
22
22
  end
23
23
 
24
24
  def from_body(body)
25
- mime = MimeMagic.by_magic(StringIO.new(body))
25
+ mime = Marcel::Magic.by_magic(StringIO.new(body))
26
26
  mime.to_s if mime
27
27
  end
28
28
 
29
29
  def from_url(url)
30
30
  uri = Addressable::URI.parse(url)
31
- mime = MimeMagic.by_path(uri.basename)
31
+ mime = Marcel::Magic.by_path(uri.basename)
32
32
  mime.to_s if mime
33
33
  end
34
34
  end
data/lib/kudzu/agent.rb CHANGED
@@ -30,13 +30,21 @@ module Kudzu
30
30
  end
31
31
 
32
32
  def extract_refs(response)
33
+ return [] unless redirect_url_allowed?(response)
33
34
  refs = @url_extractor.extract(response)
34
35
  @url_filterer.filter(refs, response.url)
35
36
  end
36
37
 
37
38
  def filter_response?(response)
38
- return false if response.redirect_from && !@url_filterer.allowed?(response.url, response.redirect_from)
39
+ return true unless redirect_url_allowed?(response)
39
40
  !@page_filterer.allowed?(response)
40
41
  end
42
+
43
+ private
44
+
45
+ def redirect_url_allowed?(response)
46
+ return true if response.redirect_from.nil? || response.redirect_from.empty?
47
+ @url_filterer.allowed?(response.url, response.redirect_from)
48
+ end
41
49
  end
42
50
  end
data/lib/kudzu/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kudzu
2
- VERSION = '1.2.0'
2
+ VERSION = '1.3.1'
3
3
  end
data/lib/kudzu.rb CHANGED
@@ -2,8 +2,7 @@ require 'net/http'
2
2
  require 'http-cookie'
3
3
  require 'addressable'
4
4
  require 'nokogiri'
5
- require 'mimemagic'
6
- require 'mimemagic/overlay'
5
+ require 'marcel'
7
6
  require 'charlock_holmes'
8
7
 
9
8
  require 'kudzu/version'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kudzu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yoshikazu Kaneta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-24 00:00:00.000000000 Z
11
+ date: 2023-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -53,7 +53,7 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: mimemagic
56
+ name: marcel
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webrick
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: rspec-rails
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -214,7 +228,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
214
228
  - !ruby/object:Gem::Version
215
229
  version: '0'
216
230
  requirements: []
217
- rubygems_version: 3.1.2
231
+ rubygems_version: 3.3.3
218
232
  signing_key:
219
233
  specification_version: 4
220
234
  summary: A simple web crawler for ruby