miteru 2.0.1 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miteru/crawler.rb +14 -5
- data/lib/miteru/kit.rb +3 -2
- data/lib/miteru/notifiers/base.rb +4 -0
- data/lib/miteru/notifiers/urlscan.rb +3 -6
- data/lib/miteru/orchestrator.rb +9 -4
- data/lib/miteru/sidekiq/jobs.rb +8 -1
- data/lib/miteru/version.rb +1 -1
- data/lib/miteru/website.rb +5 -11
- data/miteru.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5c223747e06cb6d010f4a5e3157b1a4999662761ccaef108521b1627489b800
|
4
|
+
data.tar.gz: 403ed81289db89582add2420290b6f8c343da88babf2c548940871f0791655dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 255a86779cdb3473ba4b7683159b1290b0774eae7863db2cd9e770610b6a14f1397ff1a07823132009daafc5b0c75aea2489cd39c051067311242cf30535517d
|
7
|
+
data.tar.gz: 3662c8946aba747ad6dcda2fbedc1dc34b0cba8fd680e9d573b3a1c24114b5658f5029ecb3bf5e222c2d4cced01dd0f6f1542d4878566fab0880456bb500997e
|
data/lib/miteru/crawler.rb
CHANGED
@@ -8,7 +8,7 @@ module Miteru
|
|
8
8
|
def call(website)
|
9
9
|
Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do
|
10
10
|
Miteru.logger.info("Website:#{website.truncated_url} has #{website.kits.length} kit(s).")
|
11
|
-
return unless website.
|
11
|
+
return unless website.kits?
|
12
12
|
|
13
13
|
notify website
|
14
14
|
|
@@ -17,11 +17,10 @@ module Miteru
|
|
17
17
|
website.kits.each do |kit|
|
18
18
|
downloader = Downloader.new(kit)
|
19
19
|
result = downloader.result
|
20
|
-
|
21
20
|
if result.success?
|
22
|
-
Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}")
|
21
|
+
Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}.")
|
23
22
|
else
|
24
|
-
Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}")
|
23
|
+
Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
|
25
24
|
end
|
26
25
|
end
|
27
26
|
end.recover { nil }.value!
|
@@ -33,8 +32,18 @@ module Miteru
|
|
33
32
|
Miteru.config.auto_download
|
34
33
|
end
|
35
34
|
|
35
|
+
#
|
36
|
+
# @param [Miteru::Website] website
|
37
|
+
#
|
36
38
|
def notify(website)
|
37
|
-
|
39
|
+
notifiers.each do |notifier|
|
40
|
+
result = notifier.result(website)
|
41
|
+
if result.success?
|
42
|
+
Miteru.logger.info("Notifier:#{notifier.name} succeeded.")
|
43
|
+
else
|
44
|
+
Miteru.logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
|
45
|
+
end
|
46
|
+
end
|
38
47
|
end
|
39
48
|
|
40
49
|
#
|
data/lib/miteru/kit.rb
CHANGED
@@ -25,6 +25,8 @@ module Miteru
|
|
25
25
|
# @param [String] source
|
26
26
|
#
|
27
27
|
def initialize(url, source:)
|
28
|
+
super()
|
29
|
+
|
28
30
|
@url = url
|
29
31
|
@source = source
|
30
32
|
|
@@ -37,7 +39,6 @@ module Miteru
|
|
37
39
|
def valid?
|
38
40
|
# make a HEAD request for the validation
|
39
41
|
before_validation
|
40
|
-
|
41
42
|
valid_ext? && reachable? && valid_mime_type? && valid_content_length?
|
42
43
|
end
|
43
44
|
|
@@ -110,7 +111,7 @@ module Miteru
|
|
110
111
|
end
|
111
112
|
|
112
113
|
def http
|
113
|
-
HTTP::Factory.build
|
114
|
+
HTTP::Factory.build(raise_exception: false)
|
114
115
|
end
|
115
116
|
|
116
117
|
def before_validation
|
@@ -9,10 +9,7 @@ module Miteru
|
|
9
9
|
def call(website)
|
10
10
|
return unless callable?
|
11
11
|
|
12
|
-
|
13
|
-
return unless kits.any?
|
14
|
-
|
15
|
-
kits.each { |kit| submit(kit.url) }
|
12
|
+
website.kits.each { |kit| submit(kit.url) }
|
16
13
|
end
|
17
14
|
|
18
15
|
def callable?
|
@@ -33,7 +30,7 @@ module Miteru
|
|
33
30
|
end
|
34
31
|
|
35
32
|
def timeout
|
36
|
-
Miteru.config.
|
33
|
+
Miteru.config.api_timeout
|
37
34
|
end
|
38
35
|
|
39
36
|
def tags
|
@@ -45,7 +42,7 @@ module Miteru
|
|
45
42
|
end
|
46
43
|
|
47
44
|
def submit(url)
|
48
|
-
http.post("/api/v1/scan/", json: {tags:, visibility:, url:})
|
45
|
+
http.post("https://urlscan.io/api/v1/scan/", json: {tags:, visibility:, url:})
|
49
46
|
end
|
50
47
|
end
|
51
48
|
end
|
data/lib/miteru/orchestrator.rb
CHANGED
@@ -6,16 +6,21 @@ module Miteru
|
|
6
6
|
Miteru.logger.info("#{websites.length} websites loaded in total.") if verbose?
|
7
7
|
|
8
8
|
if Miteru.sidekiq?
|
9
|
-
|
10
9
|
websites.each do |website|
|
11
10
|
Jobs::CrawleJob.perform_async(website.url, website.source)
|
12
|
-
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued") if verbose?
|
11
|
+
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
|
13
12
|
end
|
14
13
|
else
|
15
14
|
Miteru.logger.info("Use #{threads} thread(s).") if verbose?
|
16
15
|
Parallel.each(websites, in_threads: threads) do |website|
|
17
|
-
Miteru.logger.info("Website:#{website.truncated_url} crawling started") if verbose?
|
18
|
-
|
16
|
+
Miteru.logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
|
17
|
+
|
18
|
+
result = Crawler.result(website)
|
19
|
+
if result.success?
|
20
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
|
21
|
+
else
|
22
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
|
23
|
+
end
|
19
24
|
end
|
20
25
|
end
|
21
26
|
end
|
data/lib/miteru/sidekiq/jobs.rb
CHANGED
@@ -14,7 +14,14 @@ module Miteru
|
|
14
14
|
#
|
15
15
|
def perform(url, source)
|
16
16
|
website = Miteru::Website.new(url, source:)
|
17
|
-
with_db_connection
|
17
|
+
with_db_connection do
|
18
|
+
result = Crawler.result(website)
|
19
|
+
if result.success?
|
20
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
|
21
|
+
else
|
22
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
|
23
|
+
end
|
24
|
+
end
|
18
25
|
end
|
19
26
|
end
|
20
27
|
end
|
data/lib/miteru/version.rb
CHANGED
data/lib/miteru/website.rb
CHANGED
@@ -33,15 +33,8 @@ module Miteru
|
|
33
33
|
title.to_s.start_with? "Index of"
|
34
34
|
end
|
35
35
|
|
36
|
-
def
|
37
|
-
|
38
|
-
Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
|
39
|
-
OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
|
40
|
-
!kits.empty?
|
41
|
-
end.recover do
|
42
|
-
false
|
43
|
-
end.value!
|
44
|
-
end.call
|
36
|
+
def kits?
|
37
|
+
kits.any?
|
45
38
|
end
|
46
39
|
|
47
40
|
def links
|
@@ -81,8 +74,9 @@ module Miteru
|
|
81
74
|
Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
|
82
75
|
OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
|
83
76
|
doc.css("a").filter_map { |a| a.get("href") }.map do |href|
|
84
|
-
|
85
|
-
url
|
77
|
+
normalized_href = href.start_with?("/") ? href : "/#{href}"
|
78
|
+
normalized_url = url.end_with?("/") ? url.delete_suffix("/") : url
|
79
|
+
normalized_url + normalized_href
|
86
80
|
end
|
87
81
|
end.recover { [] }.value!
|
88
82
|
end
|
data/miteru.gemspec
CHANGED
@@ -40,6 +40,7 @@ Gem::Specification.new do |spec|
|
|
40
40
|
spec.add_development_dependency 'webmock', '~> 3.19'
|
41
41
|
|
42
42
|
spec.add_dependency 'activerecord', '7.1.3'
|
43
|
+
spec.add_dependency 'addressable', '2.8.6'
|
43
44
|
spec.add_dependency 'anyway_config', '2.6.2'
|
44
45
|
spec.add_dependency 'colorize', '1.1.0'
|
45
46
|
spec.add_dependency 'dotenv', '2.8.1'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miteru
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manabu Niseki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-01-
|
11
|
+
date: 2024-01-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -206,6 +206,20 @@ dependencies:
|
|
206
206
|
- - '='
|
207
207
|
- !ruby/object:Gem::Version
|
208
208
|
version: 7.1.3
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: addressable
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - '='
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: 2.8.6
|
216
|
+
type: :runtime
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - '='
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: 2.8.6
|
209
223
|
- !ruby/object:Gem::Dependency
|
210
224
|
name: anyway_config
|
211
225
|
requirement: !ruby/object:Gem::Requirement
|