miteru 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miteru/crawler.rb +14 -5
- data/lib/miteru/kit.rb +3 -2
- data/lib/miteru/notifiers/base.rb +4 -0
- data/lib/miteru/notifiers/urlscan.rb +3 -6
- data/lib/miteru/orchestrator.rb +9 -4
- data/lib/miteru/sidekiq/jobs.rb +8 -1
- data/lib/miteru/version.rb +1 -1
- data/lib/miteru/website.rb +5 -11
- data/miteru.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c5c223747e06cb6d010f4a5e3157b1a4999662761ccaef108521b1627489b800
|
|
4
|
+
data.tar.gz: 403ed81289db89582add2420290b6f8c343da88babf2c548940871f0791655dd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 255a86779cdb3473ba4b7683159b1290b0774eae7863db2cd9e770610b6a14f1397ff1a07823132009daafc5b0c75aea2489cd39c051067311242cf30535517d
|
|
7
|
+
data.tar.gz: 3662c8946aba747ad6dcda2fbedc1dc34b0cba8fd680e9d573b3a1c24114b5658f5029ecb3bf5e222c2d4cced01dd0f6f1542d4878566fab0880456bb500997e
|
data/lib/miteru/crawler.rb
CHANGED
|
@@ -8,7 +8,7 @@ module Miteru
|
|
|
8
8
|
def call(website)
|
|
9
9
|
Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do
|
|
10
10
|
Miteru.logger.info("Website:#{website.truncated_url} has #{website.kits.length} kit(s).")
|
|
11
|
-
return unless website.
|
|
11
|
+
return unless website.kits?
|
|
12
12
|
|
|
13
13
|
notify website
|
|
14
14
|
|
|
@@ -17,11 +17,10 @@ module Miteru
|
|
|
17
17
|
website.kits.each do |kit|
|
|
18
18
|
downloader = Downloader.new(kit)
|
|
19
19
|
result = downloader.result
|
|
20
|
-
|
|
21
20
|
if result.success?
|
|
22
|
-
Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}")
|
|
21
|
+
Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}.")
|
|
23
22
|
else
|
|
24
|
-
Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}")
|
|
23
|
+
Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
|
|
25
24
|
end
|
|
26
25
|
end
|
|
27
26
|
end.recover { nil }.value!
|
|
@@ -33,8 +32,18 @@ module Miteru
|
|
|
33
32
|
Miteru.config.auto_download
|
|
34
33
|
end
|
|
35
34
|
|
|
35
|
+
#
|
|
36
|
+
# @param [Miteru::Website] website
|
|
37
|
+
#
|
|
36
38
|
def notify(website)
|
|
37
|
-
|
|
39
|
+
notifiers.each do |notifier|
|
|
40
|
+
result = notifier.result(website)
|
|
41
|
+
if result.success?
|
|
42
|
+
Miteru.logger.info("Notifier:#{notifier.name} succeeded.")
|
|
43
|
+
else
|
|
44
|
+
Miteru.logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
|
|
45
|
+
end
|
|
46
|
+
end
|
|
38
47
|
end
|
|
39
48
|
|
|
40
49
|
#
|
data/lib/miteru/kit.rb
CHANGED
|
@@ -25,6 +25,8 @@ module Miteru
|
|
|
25
25
|
# @param [String] source
|
|
26
26
|
#
|
|
27
27
|
def initialize(url, source:)
|
|
28
|
+
super()
|
|
29
|
+
|
|
28
30
|
@url = url
|
|
29
31
|
@source = source
|
|
30
32
|
|
|
@@ -37,7 +39,6 @@ module Miteru
|
|
|
37
39
|
def valid?
|
|
38
40
|
# make a HEAD request for the validation
|
|
39
41
|
before_validation
|
|
40
|
-
|
|
41
42
|
valid_ext? && reachable? && valid_mime_type? && valid_content_length?
|
|
42
43
|
end
|
|
43
44
|
|
|
@@ -110,7 +111,7 @@ module Miteru
|
|
|
110
111
|
end
|
|
111
112
|
|
|
112
113
|
def http
|
|
113
|
-
HTTP::Factory.build
|
|
114
|
+
HTTP::Factory.build(raise_exception: false)
|
|
114
115
|
end
|
|
115
116
|
|
|
116
117
|
def before_validation
|
|
@@ -9,10 +9,7 @@ module Miteru
|
|
|
9
9
|
def call(website)
|
|
10
10
|
return unless callable?
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
return unless kits.any?
|
|
14
|
-
|
|
15
|
-
kits.each { |kit| submit(kit.url) }
|
|
12
|
+
website.kits.each { |kit| submit(kit.url) }
|
|
16
13
|
end
|
|
17
14
|
|
|
18
15
|
def callable?
|
|
@@ -33,7 +30,7 @@ module Miteru
|
|
|
33
30
|
end
|
|
34
31
|
|
|
35
32
|
def timeout
|
|
36
|
-
Miteru.config.
|
|
33
|
+
Miteru.config.api_timeout
|
|
37
34
|
end
|
|
38
35
|
|
|
39
36
|
def tags
|
|
@@ -45,7 +42,7 @@ module Miteru
|
|
|
45
42
|
end
|
|
46
43
|
|
|
47
44
|
def submit(url)
|
|
48
|
-
http.post("/api/v1/scan/", json: {tags:, visibility:, url:})
|
|
45
|
+
http.post("https://urlscan.io/api/v1/scan/", json: {tags:, visibility:, url:})
|
|
49
46
|
end
|
|
50
47
|
end
|
|
51
48
|
end
|
data/lib/miteru/orchestrator.rb
CHANGED
|
@@ -6,16 +6,21 @@ module Miteru
|
|
|
6
6
|
Miteru.logger.info("#{websites.length} websites loaded in total.") if verbose?
|
|
7
7
|
|
|
8
8
|
if Miteru.sidekiq?
|
|
9
|
-
|
|
10
9
|
websites.each do |website|
|
|
11
10
|
Jobs::CrawleJob.perform_async(website.url, website.source)
|
|
12
|
-
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued") if verbose?
|
|
11
|
+
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
|
|
13
12
|
end
|
|
14
13
|
else
|
|
15
14
|
Miteru.logger.info("Use #{threads} thread(s).") if verbose?
|
|
16
15
|
Parallel.each(websites, in_threads: threads) do |website|
|
|
17
|
-
Miteru.logger.info("Website:#{website.truncated_url} crawling started") if verbose?
|
|
18
|
-
|
|
16
|
+
Miteru.logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
|
|
17
|
+
|
|
18
|
+
result = Crawler.result(website)
|
|
19
|
+
if result.success?
|
|
20
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
|
|
21
|
+
else
|
|
22
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
|
|
23
|
+
end
|
|
19
24
|
end
|
|
20
25
|
end
|
|
21
26
|
end
|
data/lib/miteru/sidekiq/jobs.rb
CHANGED
|
@@ -14,7 +14,14 @@ module Miteru
|
|
|
14
14
|
#
|
|
15
15
|
def perform(url, source)
|
|
16
16
|
website = Miteru::Website.new(url, source:)
|
|
17
|
-
with_db_connection
|
|
17
|
+
with_db_connection do
|
|
18
|
+
result = Crawler.result(website)
|
|
19
|
+
if result.success?
|
|
20
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
|
|
21
|
+
else
|
|
22
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
18
25
|
end
|
|
19
26
|
end
|
|
20
27
|
end
|
data/lib/miteru/version.rb
CHANGED
data/lib/miteru/website.rb
CHANGED
|
@@ -33,15 +33,8 @@ module Miteru
|
|
|
33
33
|
title.to_s.start_with? "Index of"
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
-
def
|
|
37
|
-
|
|
38
|
-
Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
|
|
39
|
-
OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
|
|
40
|
-
!kits.empty?
|
|
41
|
-
end.recover do
|
|
42
|
-
false
|
|
43
|
-
end.value!
|
|
44
|
-
end.call
|
|
36
|
+
def kits?
|
|
37
|
+
kits.any?
|
|
45
38
|
end
|
|
46
39
|
|
|
47
40
|
def links
|
|
@@ -81,8 +74,9 @@ module Miteru
|
|
|
81
74
|
Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
|
|
82
75
|
OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
|
|
83
76
|
doc.css("a").filter_map { |a| a.get("href") }.map do |href|
|
|
84
|
-
|
|
85
|
-
url
|
|
77
|
+
normalized_href = href.start_with?("/") ? href : "/#{href}"
|
|
78
|
+
normalized_url = url.end_with?("/") ? url.delete_suffix("/") : url
|
|
79
|
+
normalized_url + normalized_href
|
|
86
80
|
end
|
|
87
81
|
end.recover { [] }.value!
|
|
88
82
|
end
|
data/miteru.gemspec
CHANGED
|
@@ -40,6 +40,7 @@ Gem::Specification.new do |spec|
|
|
|
40
40
|
spec.add_development_dependency 'webmock', '~> 3.19'
|
|
41
41
|
|
|
42
42
|
spec.add_dependency 'activerecord', '7.1.3'
|
|
43
|
+
spec.add_dependency 'addressable', '2.8.6'
|
|
43
44
|
spec.add_dependency 'anyway_config', '2.6.2'
|
|
44
45
|
spec.add_dependency 'colorize', '1.1.0'
|
|
45
46
|
spec.add_dependency 'dotenv', '2.8.1'
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: miteru
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.
|
|
4
|
+
version: 2.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Manabu Niseki
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-01-
|
|
11
|
+
date: 2024-01-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -206,6 +206,20 @@ dependencies:
|
|
|
206
206
|
- - '='
|
|
207
207
|
- !ruby/object:Gem::Version
|
|
208
208
|
version: 7.1.3
|
|
209
|
+
- !ruby/object:Gem::Dependency
|
|
210
|
+
name: addressable
|
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
|
212
|
+
requirements:
|
|
213
|
+
- - '='
|
|
214
|
+
- !ruby/object:Gem::Version
|
|
215
|
+
version: 2.8.6
|
|
216
|
+
type: :runtime
|
|
217
|
+
prerelease: false
|
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
219
|
+
requirements:
|
|
220
|
+
- - '='
|
|
221
|
+
- !ruby/object:Gem::Version
|
|
222
|
+
version: 2.8.6
|
|
209
223
|
- !ruby/object:Gem::Dependency
|
|
210
224
|
name: anyway_config
|
|
211
225
|
requirement: !ruby/object:Gem::Requirement
|