miteru 2.0.1 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miteru/crawler.rb +14 -5
- data/lib/miteru/feeds/ayashige.rb +0 -3
- data/lib/miteru/kit.rb +3 -2
- data/lib/miteru/notifiers/base.rb +4 -0
- data/lib/miteru/notifiers/slack.rb +1 -1
- data/lib/miteru/notifiers/urlscan.rb +3 -6
- data/lib/miteru/orchestrator.rb +17 -12
- data/lib/miteru/sidekiq/jobs.rb +8 -1
- data/lib/miteru/version.rb +1 -1
- data/lib/miteru/website.rb +5 -11
- data/miteru.gemspec +5 -4
- metadata +24 -11
- data/screenshots/slack.png +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4133dc72d0d07c5719999c526b8534a2dd7b5db5fe74172042b747c5cc557d4a
|
4
|
+
data.tar.gz: d2f59be0a18bdaee1b74633a58b8ed59a396862defd4b80dff4ae8ae9d8199f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a311f7ffa407ed11951f8f3b7ce696510ba31f8888dff459d3a04e4a19ec892f6f7e5651b7616e70a4482cf07d92f657dc4765ef17b6f42e66a3b526c7e1e26
|
7
|
+
data.tar.gz: 70b43504f29dc3646a343a87029e5c7a785d777099352717a4d2c2f26801879d2541d90ec5568f04d56c6635a35067397c4042e64a3177b6375cb74e51b5eae3
|
data/lib/miteru/crawler.rb
CHANGED
@@ -8,7 +8,7 @@ module Miteru
|
|
8
8
|
def call(website)
|
9
9
|
Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do
|
10
10
|
Miteru.logger.info("Website:#{website.truncated_url} has #{website.kits.length} kit(s).")
|
11
|
-
return unless website.
|
11
|
+
return unless website.kits?
|
12
12
|
|
13
13
|
notify website
|
14
14
|
|
@@ -17,11 +17,10 @@ module Miteru
|
|
17
17
|
website.kits.each do |kit|
|
18
18
|
downloader = Downloader.new(kit)
|
19
19
|
result = downloader.result
|
20
|
-
|
21
20
|
if result.success?
|
22
|
-
Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}")
|
21
|
+
Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{result.value!}.")
|
23
22
|
else
|
24
|
-
Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}")
|
23
|
+
Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
|
25
24
|
end
|
26
25
|
end
|
27
26
|
end.recover { nil }.value!
|
@@ -33,8 +32,18 @@ module Miteru
|
|
33
32
|
Miteru.config.auto_download
|
34
33
|
end
|
35
34
|
|
35
|
+
#
|
36
|
+
# @param [Miteru::Website] website
|
37
|
+
#
|
36
38
|
def notify(website)
|
37
|
-
|
39
|
+
notifiers.each do |notifier|
|
40
|
+
result = notifier.result(website)
|
41
|
+
if result.success?
|
42
|
+
Miteru.logger.info("Notifier:#{notifier.name} succeeded.")
|
43
|
+
else
|
44
|
+
Miteru.logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
|
45
|
+
end
|
46
|
+
end
|
38
47
|
end
|
39
48
|
|
40
49
|
#
|
data/lib/miteru/kit.rb
CHANGED
@@ -25,6 +25,8 @@ module Miteru
|
|
25
25
|
# @param [String] source
|
26
26
|
#
|
27
27
|
def initialize(url, source:)
|
28
|
+
super()
|
29
|
+
|
28
30
|
@url = url
|
29
31
|
@source = source
|
30
32
|
|
@@ -37,7 +39,6 @@ module Miteru
|
|
37
39
|
def valid?
|
38
40
|
# make a HEAD request for the validation
|
39
41
|
before_validation
|
40
|
-
|
41
42
|
valid_ext? && reachable? && valid_mime_type? && valid_content_length?
|
42
43
|
end
|
43
44
|
|
@@ -110,7 +111,7 @@ module Miteru
|
|
110
111
|
end
|
111
112
|
|
112
113
|
def http
|
113
|
-
HTTP::Factory.build
|
114
|
+
HTTP::Factory.build(raise_exception: false)
|
114
115
|
end
|
115
116
|
|
116
117
|
def before_validation
|
@@ -9,10 +9,7 @@ module Miteru
|
|
9
9
|
def call(website)
|
10
10
|
return unless callable?
|
11
11
|
|
12
|
-
|
13
|
-
return unless kits.any?
|
14
|
-
|
15
|
-
kits.each { |kit| submit(kit.url) }
|
12
|
+
website.kits.each { |kit| submit(kit.url) }
|
16
13
|
end
|
17
14
|
|
18
15
|
def callable?
|
@@ -33,7 +30,7 @@ module Miteru
|
|
33
30
|
end
|
34
31
|
|
35
32
|
def timeout
|
36
|
-
Miteru.config.
|
33
|
+
Miteru.config.api_timeout
|
37
34
|
end
|
38
35
|
|
39
36
|
def tags
|
@@ -45,7 +42,7 @@ module Miteru
|
|
45
42
|
end
|
46
43
|
|
47
44
|
def submit(url)
|
48
|
-
http.post("/api/v1/scan/", json: {tags:, visibility:, url:})
|
45
|
+
http.post("https://urlscan.io/api/v1/scan/", json: {tags:, visibility:, url:})
|
49
46
|
end
|
50
47
|
end
|
51
48
|
end
|
data/lib/miteru/orchestrator.rb
CHANGED
@@ -6,16 +6,21 @@ module Miteru
|
|
6
6
|
Miteru.logger.info("#{websites.length} websites loaded in total.") if verbose?
|
7
7
|
|
8
8
|
if Miteru.sidekiq?
|
9
|
-
|
10
9
|
websites.each do |website|
|
11
10
|
Jobs::CrawleJob.perform_async(website.url, website.source)
|
12
|
-
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued") if verbose?
|
11
|
+
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
|
13
12
|
end
|
14
13
|
else
|
15
14
|
Miteru.logger.info("Use #{threads} thread(s).") if verbose?
|
16
15
|
Parallel.each(websites, in_threads: threads) do |website|
|
17
|
-
Miteru.logger.info("Website:#{website.truncated_url} crawling started") if verbose?
|
18
|
-
|
16
|
+
Miteru.logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
|
17
|
+
|
18
|
+
result = Crawler.result(website)
|
19
|
+
if result.success?
|
20
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
|
21
|
+
else
|
22
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
|
23
|
+
end
|
19
24
|
end
|
20
25
|
end
|
21
26
|
end
|
@@ -35,7 +40,14 @@ module Miteru
|
|
35
40
|
Miteru.logger.warn("Feed:#{feed.source} failed - #{result.failure}")
|
36
41
|
end
|
37
42
|
end
|
38
|
-
end.flatten
|
43
|
+
end.flatten.uniq(&:url)
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# @return [Array<Miteru::Feeds::Base>]
|
48
|
+
#
|
49
|
+
def feeds
|
50
|
+
Miteru.feeds.map(&:new)
|
39
51
|
end
|
40
52
|
|
41
53
|
private
|
@@ -47,12 +59,5 @@ module Miteru
|
|
47
59
|
def verbose?
|
48
60
|
Miteru.config.verbose
|
49
61
|
end
|
50
|
-
|
51
|
-
#
|
52
|
-
# @return [Array<Miteru::Feeds::Base>]
|
53
|
-
#
|
54
|
-
def feeds
|
55
|
-
Miteru.feeds.map(&:new)
|
56
|
-
end
|
57
62
|
end
|
58
63
|
end
|
data/lib/miteru/sidekiq/jobs.rb
CHANGED
@@ -14,7 +14,14 @@ module Miteru
|
|
14
14
|
#
|
15
15
|
def perform(url, source)
|
16
16
|
website = Miteru::Website.new(url, source:)
|
17
|
-
with_db_connection
|
17
|
+
with_db_connection do
|
18
|
+
result = Crawler.result(website)
|
19
|
+
if result.success?
|
20
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
|
21
|
+
else
|
22
|
+
Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
|
23
|
+
end
|
24
|
+
end
|
18
25
|
end
|
19
26
|
end
|
20
27
|
end
|
data/lib/miteru/version.rb
CHANGED
data/lib/miteru/website.rb
CHANGED
@@ -33,15 +33,8 @@ module Miteru
|
|
33
33
|
title.to_s.start_with? "Index of"
|
34
34
|
end
|
35
35
|
|
36
|
-
def
|
37
|
-
|
38
|
-
Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
|
39
|
-
OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
|
40
|
-
!kits.empty?
|
41
|
-
end.recover do
|
42
|
-
false
|
43
|
-
end.value!
|
44
|
-
end.call
|
36
|
+
def kits?
|
37
|
+
kits.any?
|
45
38
|
end
|
46
39
|
|
47
40
|
def links
|
@@ -81,8 +74,9 @@ module Miteru
|
|
81
74
|
Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
|
82
75
|
OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
|
83
76
|
doc.css("a").filter_map { |a| a.get("href") }.map do |href|
|
84
|
-
|
85
|
-
url
|
77
|
+
normalized_href = href.start_with?("/") ? href : "/#{href}"
|
78
|
+
normalized_url = url.end_with?("/") ? url.delete_suffix("/") : url
|
79
|
+
normalized_url + normalized_href
|
86
80
|
end
|
87
81
|
end.recover { [] }.value!
|
88
82
|
end
|
data/miteru.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_development_dependency 'mysql2', '~> 0.5'
|
33
33
|
spec.add_development_dependency 'pg', '~> 1.5'
|
34
34
|
spec.add_development_dependency 'rake', '~> 13.1'
|
35
|
-
spec.add_development_dependency 'rspec', '~> 3.
|
35
|
+
spec.add_development_dependency 'rspec', '~> 3.13'
|
36
36
|
spec.add_development_dependency 'simplecov-lcov', '~> 0.8'
|
37
37
|
spec.add_development_dependency 'standard', '~> 1.33'
|
38
38
|
spec.add_development_dependency 'test-prof', '~> 1.3'
|
@@ -40,18 +40,19 @@ Gem::Specification.new do |spec|
|
|
40
40
|
spec.add_development_dependency 'webmock', '~> 3.19'
|
41
41
|
|
42
42
|
spec.add_dependency 'activerecord', '7.1.3'
|
43
|
+
spec.add_dependency 'addressable', '2.8.6'
|
43
44
|
spec.add_dependency 'anyway_config', '2.6.2'
|
44
45
|
spec.add_dependency 'colorize', '1.1.0'
|
45
46
|
spec.add_dependency 'dotenv', '2.8.1'
|
46
47
|
spec.add_dependency 'down', '5.4.1'
|
47
48
|
spec.add_dependency 'dry-files', '1.1.0'
|
48
49
|
spec.add_dependency 'dry-monads', '1.6.0'
|
49
|
-
spec.add_dependency 'http', '5.
|
50
|
+
spec.add_dependency 'http', '5.2.0'
|
50
51
|
spec.add_dependency 'memo_wise', '1.8.0'
|
51
52
|
spec.add_dependency 'oga', '3.4'
|
52
53
|
spec.add_dependency 'parallel', '1.24.0'
|
53
54
|
spec.add_dependency 'puma', '6.4.2'
|
54
|
-
spec.add_dependency 'rack', '3.0.
|
55
|
+
spec.add_dependency 'rack', '3.0.9'
|
55
56
|
spec.add_dependency 'rack-session', '2.0.0'
|
56
57
|
spec.add_dependency 'rackup', '2.1.0'
|
57
58
|
spec.add_dependency 'semantic_logger', '4.15.0'
|
@@ -59,7 +60,7 @@ Gem::Specification.new do |spec|
|
|
59
60
|
spec.add_dependency 'sentry-sidekiq', '5.16.1'
|
60
61
|
spec.add_dependency 'sidekiq', '7.2.1'
|
61
62
|
spec.add_dependency 'slack-notifier', '2.4.0'
|
62
|
-
spec.add_dependency 'sqlite3', '1.7.
|
63
|
+
spec.add_dependency 'sqlite3', '1.7.2'
|
63
64
|
spec.add_dependency 'thor', '1.3.0'
|
64
65
|
spec.add_dependency 'thor-hollaback', '0.2.1'
|
65
66
|
spec.add_dependency 'uuidtools', '2.2.0'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miteru
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manabu Niseki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -114,14 +114,14 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '3.
|
117
|
+
version: '3.13'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '3.
|
124
|
+
version: '3.13'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: simplecov-lcov
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -206,6 +206,20 @@ dependencies:
|
|
206
206
|
- - '='
|
207
207
|
- !ruby/object:Gem::Version
|
208
208
|
version: 7.1.3
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: addressable
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - '='
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: 2.8.6
|
216
|
+
type: :runtime
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - '='
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: 2.8.6
|
209
223
|
- !ruby/object:Gem::Dependency
|
210
224
|
name: anyway_config
|
211
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -296,14 +310,14 @@ dependencies:
|
|
296
310
|
requirements:
|
297
311
|
- - '='
|
298
312
|
- !ruby/object:Gem::Version
|
299
|
-
version: 5.
|
313
|
+
version: 5.2.0
|
300
314
|
type: :runtime
|
301
315
|
prerelease: false
|
302
316
|
version_requirements: !ruby/object:Gem::Requirement
|
303
317
|
requirements:
|
304
318
|
- - '='
|
305
319
|
- !ruby/object:Gem::Version
|
306
|
-
version: 5.
|
320
|
+
version: 5.2.0
|
307
321
|
- !ruby/object:Gem::Dependency
|
308
322
|
name: memo_wise
|
309
323
|
requirement: !ruby/object:Gem::Requirement
|
@@ -366,14 +380,14 @@ dependencies:
|
|
366
380
|
requirements:
|
367
381
|
- - '='
|
368
382
|
- !ruby/object:Gem::Version
|
369
|
-
version: 3.0.
|
383
|
+
version: 3.0.9
|
370
384
|
type: :runtime
|
371
385
|
prerelease: false
|
372
386
|
version_requirements: !ruby/object:Gem::Requirement
|
373
387
|
requirements:
|
374
388
|
- - '='
|
375
389
|
- !ruby/object:Gem::Version
|
376
|
-
version: 3.0.
|
390
|
+
version: 3.0.9
|
377
391
|
- !ruby/object:Gem::Dependency
|
378
392
|
name: rack-session
|
379
393
|
requirement: !ruby/object:Gem::Requirement
|
@@ -478,14 +492,14 @@ dependencies:
|
|
478
492
|
requirements:
|
479
493
|
- - '='
|
480
494
|
- !ruby/object:Gem::Version
|
481
|
-
version: 1.7.
|
495
|
+
version: 1.7.2
|
482
496
|
type: :runtime
|
483
497
|
prerelease: false
|
484
498
|
version_requirements: !ruby/object:Gem::Requirement
|
485
499
|
requirements:
|
486
500
|
- - '='
|
487
501
|
- !ruby/object:Gem::Version
|
488
|
-
version: 1.7.
|
502
|
+
version: 1.7.2
|
489
503
|
- !ruby/object:Gem::Dependency
|
490
504
|
name: thor
|
491
505
|
requirement: !ruby/object:Gem::Requirement
|
@@ -586,7 +600,6 @@ files:
|
|
586
600
|
- lib/miteru/website.rb
|
587
601
|
- miteru.gemspec
|
588
602
|
- renovate.json
|
589
|
-
- screenshots/slack.png
|
590
603
|
homepage: https://github.com/ninoseki/miteru
|
591
604
|
licenses:
|
592
605
|
- MIT
|
data/screenshots/slack.png
DELETED
Binary file
|