kudzu 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 232a54685462f7415227ddf7ce2efd31d0046cb5bf7756ffbffb0b473836d45d
4
- data.tar.gz: e601657db196282b54e89633eebbaa4633ae1f7951fd0e7a3aa4b6ea00109d48
3
+ metadata.gz: 2893a15f2faff7f0697f7ce39f16a6f0e9c0972645c5e19aafb5751153432480
4
+ data.tar.gz: a8b9b0230be54ca43852e5b0700e1ea329258cbffcba284772f8d025ee14cbcf
5
5
  SHA512:
6
- metadata.gz: 5a3dabef241ebaed7b9a80ed11abd8bbaee63e09fe674743931c4f2c36ca44e436be6e5b44ed44e68cc30899ff080faf803f5e2834074fdc3dfc9abba100248f
7
- data.tar.gz: fa024d77336f25d03025d056a2d40f7c16f302658cbcf21afba5a637c9f14ab29f353c135b4c90217f6de46cbc9edb52a4bd0284982d264019762b4978b69c67
6
+ metadata.gz: 568b1bf560acb842ba3fa768c68078be503f271f7b1eed383ac67e8529bc568fc5ba720fc35f3c4e6973c07beede61baf39710d33ddfc13d01d75dbf14e2b090
7
+ data.tar.gz: 2537befd681ca91912003aad92b02ae69666afcc9fc816b330064c34547e0b94c5981d86ba369f45644b6161cb1fdbc980aea49aef7ec33e2be5a7b698597545
data/README.md CHANGED
@@ -12,7 +12,7 @@ A simple web crawler for ruby.
12
12
 
13
13
  ## Dependencies
14
14
 
15
- * ruby 2.3+
15
+ * ruby 2.5+
16
16
  * libicu
17
17
 
18
18
  ## Installation
@@ -22,13 +22,13 @@ module Kudzu
22
22
  end
23
23
 
24
24
  def from_body(body)
25
- mime = MimeMagic.by_magic(StringIO.new(body))
25
+ mime = Marcel::Magic.by_magic(StringIO.new(body))
26
26
  mime.to_s if mime
27
27
  end
28
28
 
29
29
  def from_url(url)
30
30
  uri = Addressable::URI.parse(url)
31
- mime = MimeMagic.by_path(uri.basename)
31
+ mime = Marcel::Magic.by_path(uri.basename)
32
32
  mime.to_s if mime
33
33
  end
34
34
  end
data/lib/kudzu/agent.rb CHANGED
@@ -30,13 +30,21 @@ module Kudzu
30
30
  end
31
31
 
32
32
  def extract_refs(response)
33
+ return [] unless redirect_url_allowed?(response)
33
34
  refs = @url_extractor.extract(response)
34
35
  @url_filterer.filter(refs, response.url)
35
36
  end
36
37
 
37
38
  def filter_response?(response)
38
- return false if response.redirect_from && !@url_filterer.allowed?(response.url, response.redirect_from)
39
+ return true unless redirect_url_allowed?(response)
39
40
  !@page_filterer.allowed?(response)
40
41
  end
42
+
43
+ private
44
+
45
+ def redirect_url_allowed?(response)
46
+ return true if response.redirect_from.nil? || response.redirect_from.empty?
47
+ @url_filterer.allowed?(response.url, response.redirect_from)
48
+ end
41
49
  end
42
50
  end
data/lib/kudzu/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kudzu
2
- VERSION = '1.2.0'
2
+ VERSION = '1.3.1'
3
3
  end
data/lib/kudzu.rb CHANGED
@@ -2,8 +2,7 @@ require 'net/http'
2
2
  require 'http-cookie'
3
3
  require 'addressable'
4
4
  require 'nokogiri'
5
- require 'mimemagic'
6
- require 'mimemagic/overlay'
5
+ require 'marcel'
7
6
  require 'charlock_holmes'
8
7
 
9
8
  require 'kudzu/version'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kudzu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yoshikazu Kaneta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-24 00:00:00.000000000 Z
11
+ date: 2023-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -53,7 +53,7 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: mimemagic
56
+ name: marcel
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webrick
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: rspec-rails
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -214,7 +228,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
214
228
  - !ruby/object:Gem::Version
215
229
  version: '0'
216
230
  requirements: []
217
- rubygems_version: 3.1.2
231
+ rubygems_version: 3.3.3
218
232
  signing_key:
219
233
  specification_version: 4
220
234
  summary: A simple web crawler for ruby